|
18 | 18 | use std::sync::Arc; |
19 | 19 |
|
20 | 20 | use arrow::array::{ |
21 | | - ArrayRef, Int8Array, Int16Array, Int64Array, StringArray, StringViewArray, |
22 | | - UInt8Array, UInt16Array, |
| 21 | + ArrayRef, BooleanArray, Int8Array, Int16Array, Int64Array, StringArray, |
| 22 | + StringViewArray, UInt8Array, UInt16Array, |
23 | 23 | }; |
24 | 24 | use arrow::datatypes::{DataType, Field, Schema}; |
25 | 25 | use criterion::{Criterion, criterion_group, criterion_main}; |
@@ -91,6 +91,13 @@ fn create_i16_array(n_distinct: usize) -> Int16Array { |
91 | 91 | .collect() |
92 | 92 | } |
93 | 93 |
|
| 94 | +fn create_bool_array() -> BooleanArray { |
| 95 | + let mut rng = StdRng::seed_from_u64(42); |
| 96 | + (0..BATCH_SIZE) |
| 97 | + .map(|_| Some(rng.random_bool(0.5))) |
| 98 | + .collect() |
| 99 | +} |
| 100 | + |
94 | 101 | /// Creates a pool of `n_distinct` random strings of the given length. |
95 | 102 | fn create_string_pool(n_distinct: usize, string_length: usize) -> Vec<String> { |
96 | 103 | let mut rng = StdRng::seed_from_u64(42); |
@@ -169,7 +176,7 @@ fn approx_distinct_benchmark(c: &mut Criterion) { |
169 | 176 | } |
170 | 177 | } |
171 | 178 |
|
172 | | - // Small integer types |
| 179 | + // --- Bitmap type benchmarks (our optimization) --- |
173 | 180 |
|
174 | 181 | // UInt8 |
175 | 182 | let values = Arc::new(create_u8_array(200)) as ArrayRef; |
@@ -214,6 +221,17 @@ fn approx_distinct_benchmark(c: &mut Criterion) { |
214 | 221 | .unwrap() |
215 | 222 | }) |
216 | 223 | }); |
| 224 | + |
| 225 | + // Boolean |
| 226 | + let values = Arc::new(create_bool_array()) as ArrayRef; |
| 227 | + c.bench_function("approx_distinct bool bitmap", |b| { |
| 228 | + b.iter(|| { |
| 229 | + let mut accumulator = prepare_accumulator(DataType::Boolean); |
| 230 | + accumulator |
| 231 | + .update_batch(std::slice::from_ref(&values)) |
| 232 | + .unwrap() |
| 233 | + }) |
| 234 | + }); |
217 | 235 | } |
218 | 236 |
|
219 | 237 | criterion_group!(benches, approx_distinct_benchmark); |
|
0 commit comments