Skip to content

Commit 1ea328d

Browse files
authored
chore: leave specialised bench helpers (#21810)
## Which issue does this PR close? - Follow-up to #21544 ## Rationale for this change @Jefffrey [noted](#21544 (comment)) that some bench helpers created a special distribution of values, so we can revisit their usage. ## What changes are included in this PR? Some of the helpers are created for the purpose. For this reason, I initially avoided migrating most candidate helpers from other files (`array_has`, `array_sort` etc). - `array_remove`'s helpers (`create_f64_list_array` and `create_decimal64_list_array`) should be brought back because the probability of finding a needle depends on the distribution, which is set explicitly instead of the range of a template parameter - bringing them back in this PR - array_min_max is safe since it locates a min/max elements, which isn't affected by the value distribution - array_repeat doesn't check contents, so it is safe - array_to_string is not content-aware ## Are these changes tested? Run changed benchmarks ## Are there any user-facing changes? No
1 parent cc67c13 commit 1ea328d

1 file changed

Lines changed: 100 additions & 33 deletions

File tree

datafusion/functions-nested/benches/array_remove.rs

Lines changed: 100 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,11 @@
1616
// under the License.
1717

1818
use arrow::array::{
19-
Array, ArrayRef, BinaryArray, BooleanArray, FixedSizeBinaryArray, ListArray,
20-
StringArray,
19+
Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, FixedSizeBinaryArray,
20+
Float64Array, Int64Array, ListArray, StringArray,
2121
};
2222
use arrow::buffer::OffsetBuffer;
23-
use arrow::datatypes::{DataType, Decimal128Type, Field, Float64Type, Int64Type};
24-
use arrow::util::bench_util::create_primitive_list_array_with_seed;
23+
use arrow::datatypes::{DataType, Field};
2524
use criterion::{
2625
criterion_group, criterion_main, {BenchmarkId, Criterion},
2726
};
@@ -56,15 +55,7 @@ fn bench_array_remove_int64(c: &mut Criterion) {
5655
let mut group = c.benchmark_group("array_remove_int64");
5756

5857
for &array_size in ARRAY_SIZES {
59-
let list_array: ArrayRef =
60-
Arc::new(create_primitive_list_array_with_seed::<i32, Int64Type>(
61-
NUM_ROWS,
62-
0.0,
63-
NULL_DENSITY as f32,
64-
array_size,
65-
SEED,
66-
));
67-
58+
let list_array = create_int64_list_array(NUM_ROWS, array_size, NULL_DENSITY);
6859
let element_to_remove = ScalarValue::Int64(Some(1));
6960
let args = create_args(list_array.clone(), element_to_remove.clone());
7061

@@ -105,14 +96,7 @@ fn bench_array_remove_f64(c: &mut Criterion) {
10596
let mut group = c.benchmark_group("array_remove_f64");
10697

10798
for &array_size in ARRAY_SIZES {
108-
let list_array: ArrayRef =
109-
Arc::new(create_primitive_list_array_with_seed::<i32, Float64Type>(
110-
NUM_ROWS,
111-
0.0,
112-
NULL_DENSITY as f32,
113-
array_size,
114-
SEED,
115-
));
99+
let list_array = create_f64_list_array(NUM_ROWS, array_size, NULL_DENSITY);
116100
let element_to_remove = ScalarValue::Float64(Some(1.0));
117101
let args = create_args(list_array.clone(), element_to_remove.clone());
118102

@@ -276,17 +260,8 @@ fn bench_array_remove_decimal64(c: &mut Criterion) {
276260
let mut group = c.benchmark_group("array_remove_decimal64");
277261

278262
for &array_size in ARRAY_SIZES {
279-
let list_array: ArrayRef = Arc::new(create_primitive_list_array_with_seed::<
280-
i32,
281-
Decimal128Type,
282-
>(
283-
NUM_ROWS,
284-
0.0,
285-
NULL_DENSITY as f32,
286-
array_size,
287-
SEED,
288-
));
289-
let element_to_remove = ScalarValue::Decimal128(Some(100_i128), 38, 10);
263+
let list_array = create_decimal64_list_array(NUM_ROWS, array_size, NULL_DENSITY);
264+
let element_to_remove = ScalarValue::Decimal128(Some(100_i128), 10, 2);
290265
let args = create_args(list_array.clone(), element_to_remove.clone());
291266

292267
group.bench_with_input(
@@ -301,7 +276,7 @@ fn bench_array_remove_decimal64(c: &mut Criterion) {
301276
arg_fields: vec![
302277
Field::new("arr", list_array.data_type().clone(), false)
303278
.into(),
304-
Field::new("el", DataType::Decimal128(38, 10), false)
279+
Field::new("el", DataType::Decimal128(10, 2), false)
305280
.into(),
306281
],
307282
number_rows: NUM_ROWS,
@@ -373,6 +348,66 @@ fn create_args(list_array: ArrayRef, element: ScalarValue) -> Vec<ColumnarValue>
373348
]
374349
}
375350

351+
fn create_int64_list_array(
352+
num_rows: usize,
353+
array_size: usize,
354+
null_density: f64,
355+
) -> ArrayRef {
356+
let mut rng = StdRng::seed_from_u64(SEED);
357+
let values = (0..num_rows * array_size)
358+
.map(|_| {
359+
if rng.random::<f64>() < null_density {
360+
None
361+
} else {
362+
Some(rng.random_range(0..array_size as i64))
363+
}
364+
})
365+
.collect::<Int64Array>();
366+
let offsets = (0..=num_rows)
367+
.map(|i| (i * array_size) as i32)
368+
.collect::<Vec<i32>>();
369+
370+
Arc::new(
371+
ListArray::try_new(
372+
Arc::new(Field::new("item", DataType::Int64, true)),
373+
OffsetBuffer::new(offsets.into()),
374+
Arc::new(values),
375+
None,
376+
)
377+
.unwrap(),
378+
)
379+
}
380+
381+
fn create_f64_list_array(
382+
num_rows: usize,
383+
array_size: usize,
384+
null_density: f64,
385+
) -> ArrayRef {
386+
let mut rng = StdRng::seed_from_u64(SEED);
387+
let values = (0..num_rows * array_size)
388+
.map(|_| {
389+
if rng.random::<f64>() < null_density {
390+
None
391+
} else {
392+
Some(rng.random_range(0..array_size as i64) as f64)
393+
}
394+
})
395+
.collect::<Float64Array>();
396+
let offsets = (0..=num_rows)
397+
.map(|i| (i * array_size) as i32)
398+
.collect::<Vec<i32>>();
399+
400+
Arc::new(
401+
ListArray::try_new(
402+
Arc::new(Field::new("item", DataType::Float64, true)),
403+
OffsetBuffer::new(offsets.into()),
404+
Arc::new(values),
405+
None,
406+
)
407+
.unwrap(),
408+
)
409+
}
410+
376411
fn create_string_list_array(
377412
num_rows: usize,
378413
array_size: usize,
@@ -465,6 +500,38 @@ fn create_boolean_list_array(
465500
)
466501
}
467502

503+
fn create_decimal64_list_array(
504+
num_rows: usize,
505+
array_size: usize,
506+
null_density: f64,
507+
) -> ArrayRef {
508+
let mut rng = StdRng::seed_from_u64(SEED);
509+
let values = (0..num_rows * array_size)
510+
.map(|_| {
511+
if rng.random::<f64>() < null_density {
512+
None
513+
} else {
514+
Some(rng.random_range(0..array_size) as i128 * 100)
515+
}
516+
})
517+
.collect::<Decimal128Array>()
518+
.with_precision_and_scale(10, 2)
519+
.unwrap();
520+
let offsets = (0..=num_rows)
521+
.map(|i| (i * array_size) as i32)
522+
.collect::<Vec<i32>>();
523+
524+
Arc::new(
525+
ListArray::try_new(
526+
Arc::new(Field::new("item", DataType::Decimal128(10, 2), true)),
527+
OffsetBuffer::new(offsets.into()),
528+
Arc::new(values),
529+
None,
530+
)
531+
.unwrap(),
532+
)
533+
}
534+
468535
fn create_fixed_size_binary_list_array(
469536
num_rows: usize,
470537
array_size: usize,

0 commit comments

Comments
 (0)