|
18 | 18 | use std::sync::Arc; |
19 | 19 |
|
20 | 20 | use arrow::array::{ |
21 | | - ArrayRef, Int8Array, Int16Array, Int64Array, UInt8Array, UInt16Array, |
| 21 | + Array, ArrayRef, Int8Array, Int16Array, Int64Array, UInt8Array, UInt16Array, |
22 | 22 | }; |
23 | 23 | use arrow::datatypes::{DataType, Field, Schema}; |
24 | 24 | use criterion::{Criterion, criterion_group, criterion_main}; |
@@ -239,19 +239,28 @@ fn count_distinct_groups_benchmark(c: &mut Criterion) { |
239 | 239 | }) |
240 | 240 | }); |
241 | 241 | } else { |
| 242 | + let arr = values.as_any().downcast_ref::<Int64Array>().unwrap(); |
| 243 | + let mut group_rows: Vec<Vec<i64>> = vec![Vec::new(); num_groups]; |
| 244 | + for (idx, &group_idx) in group_indices.iter().enumerate() { |
| 245 | + if arr.is_valid(idx) { |
| 246 | + group_rows[group_idx].push(arr.value(idx)); |
| 247 | + } |
| 248 | + } |
| 249 | + let group_arrays: Vec<ArrayRef> = group_rows |
| 250 | + .iter() |
| 251 | + .map(|rows| Arc::new(Int64Array::from(rows.clone())) as ArrayRef) |
| 252 | + .collect(); |
| 253 | + |
242 | 254 | c.bench_function(&format!("count_distinct_groups {name}"), |b| { |
243 | 255 | b.iter(|| { |
244 | 256 | let mut accumulators: Vec<_> = (0..num_groups) |
245 | 257 | .map(|_| prepare_accumulator(DataType::Int64)) |
246 | 258 | .collect(); |
247 | 259 |
|
248 | | - let arr = values.as_any().downcast_ref::<Int64Array>().unwrap(); |
249 | | - for (idx, group_idx) in group_indices.iter().enumerate() { |
250 | | - if let Some(val) = arr.value(idx).into() { |
251 | | - let single_val = |
252 | | - Arc::new(Int64Array::from(vec![Some(val)])) as ArrayRef; |
253 | | - accumulators[*group_idx] |
254 | | - .update_batch(std::slice::from_ref(&single_val)) |
| 260 | + for (group_idx, batch) in group_arrays.iter().enumerate() { |
| 261 | + if !batch.is_empty() { |
| 262 | + accumulators[group_idx] |
| 263 | + .update_batch(std::slice::from_ref(batch)) |
255 | 264 | .unwrap(); |
256 | 265 | } |
257 | 266 | } |
|
0 commit comments