Skip to content

Commit 8efed09

Browse files
committed
Revert "feat: use bitmasks for multiple column aggregation"
This reverts commit 31556f6.
1 parent 31556f6 commit 8efed09

5 files changed

Lines changed: 197 additions & 333 deletions

File tree

datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs

Lines changed: 47 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818
use std::sync::Arc;
1919

2020
use crate::aggregates::group_values::multi_group_by::Nulls;
21-
use crate::aggregates::group_values::multi_group_by::{EqualToResults, GroupColumn, nulls_equal_to};
21+
use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_to};
2222
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
2323
use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder};
2424
use datafusion_common::Result;
25+
use itertools::izip;
2526

2627
/// An implementation of [`GroupColumn`] for booleans
2728
///
@@ -80,34 +81,32 @@ impl<const NULLABLE: bool> GroupColumn for BooleanGroupValueBuilder<NULLABLE> {
8081
lhs_rows: &[usize],
8182
array: &ArrayRef,
8283
rhs_rows: &[usize],
83-
equal_to_results: &mut EqualToResults,
84+
equal_to_results: &mut [bool],
8485
) {
8586
let array = array.as_boolean();
8687

87-
for (idx, (&lhs_row, &rhs_row)) in
88-
lhs_rows.iter().zip(rhs_rows.iter()).enumerate()
89-
{
90-
if !equal_to_results.get_bit(idx) {
88+
let iter = izip!(
89+
lhs_rows.iter(),
90+
rhs_rows.iter(),
91+
equal_to_results.iter_mut(),
92+
);
93+
94+
for (&lhs_row, &rhs_row, equal_to_result) in iter {
95+
// Has found not equal to in previous column, don't need to check
96+
if !*equal_to_result {
9197
continue;
9298
}
9399

94-
let chunk_idx = idx / 64;
95-
let bit_pos = idx % 64;
96-
97100
if NULLABLE {
98101
let exist_null = self.nulls.is_null(lhs_row);
99102
let input_null = array.is_null(rhs_row);
100103
if let Some(result) = nulls_equal_to(exist_null, input_null) {
101-
if !result {
102-
equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos));
103-
}
104+
*equal_to_result = result;
104105
continue;
105106
}
106107
}
107108

108-
if self.buffer.get_bit(lhs_row) != array.value(rhs_row) {
109-
equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos));
110-
}
109+
*equal_to_result = self.buffer.get_bit(lhs_row) == array.value(rhs_row);
111110
}
112111
}
113112

@@ -196,7 +195,6 @@ impl<const NULLABLE: bool> GroupColumn for BooleanGroupValueBuilder<NULLABLE> {
196195

197196
#[cfg(test)]
198197
mod tests {
199-
use crate::aggregates::group_values::multi_group_by::EqualToResults;
200198
use arrow::array::NullBufferBuilder;
201199

202200
use super::*;
@@ -215,10 +213,10 @@ mod tests {
215213
lhs_rows: &[usize],
216214
input_array: &ArrayRef,
217215
rhs_rows: &[usize],
218-
equal_to_results: &mut EqualToResults| {
216+
equal_to_results: &mut Vec<bool>| {
219217
let iter = lhs_rows.iter().zip(rhs_rows.iter());
220218
for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() {
221-
equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row));
219+
equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row);
222220
}
223221
};
224222

@@ -239,7 +237,7 @@ mod tests {
239237
lhs_rows: &[usize],
240238
input_array: &ArrayRef,
241239
rhs_rows: &[usize],
242-
equal_to_results: &mut EqualToResults| {
240+
equal_to_results: &mut Vec<bool>| {
243241
builder.vectorized_equal_to(
244242
lhs_rows,
245243
input_array,
@@ -259,7 +257,7 @@ mod tests {
259257
&[usize],
260258
&ArrayRef,
261259
&[usize],
262-
&mut EqualToResults,
260+
&mut Vec<bool>,
263261
),
264262
{
265263
// Will cover such cases:
@@ -304,23 +302,21 @@ mod tests {
304302
let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef;
305303

306304
// Check
307-
let mut equal_to_results = EqualToResults::new();
308-
equal_to_results.reset(builder.len());
305+
let mut equal_to_results = vec![true; builder.len()];
309306
equal_to(
310307
&builder,
311308
&[0, 1, 2, 3, 4, 5],
312309
&input_array,
313310
&[0, 1, 2, 3, 4, 5],
314311
&mut equal_to_results,
315312
);
316-
let results = equal_to_results.to_vec();
317-
318-
assert!(!results[0]);
319-
assert!(results[1]);
320-
assert!(results[2]);
321-
assert!(!results[3]);
322-
assert!(!results[4]);
323-
assert!(results[5]);
313+
314+
assert!(!equal_to_results[0]);
315+
assert!(equal_to_results[1]);
316+
assert!(equal_to_results[2]);
317+
assert!(!equal_to_results[3]);
318+
assert!(!equal_to_results[4]);
319+
assert!(equal_to_results[5]);
324320
}
325321

326322
#[test]
@@ -337,10 +333,10 @@ mod tests {
337333
lhs_rows: &[usize],
338334
input_array: &ArrayRef,
339335
rhs_rows: &[usize],
340-
equal_to_results: &mut EqualToResults| {
336+
equal_to_results: &mut Vec<bool>| {
341337
let iter = lhs_rows.iter().zip(rhs_rows.iter());
342338
for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() {
343-
equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row));
339+
equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row);
344340
}
345341
};
346342

@@ -361,7 +357,7 @@ mod tests {
361357
lhs_rows: &[usize],
362358
input_array: &ArrayRef,
363359
rhs_rows: &[usize],
364-
equal_to_results: &mut EqualToResults| {
360+
equal_to_results: &mut Vec<bool>| {
365361
builder.vectorized_equal_to(
366362
lhs_rows,
367363
input_array,
@@ -381,7 +377,7 @@ mod tests {
381377
&[usize],
382378
&ArrayRef,
383379
&[usize],
384-
&mut EqualToResults,
380+
&mut Vec<bool>,
385381
),
386382
{
387383
// Will cover such cases:
@@ -407,21 +403,19 @@ mod tests {
407403
])) as ArrayRef;
408404

409405
// Check
410-
let mut equal_to_results = EqualToResults::new();
411-
equal_to_results.reset(builder.len());
406+
let mut equal_to_results = vec![true; builder.len()];
412407
equal_to(
413408
&builder,
414409
&[0, 1, 2, 3],
415410
&input_array,
416411
&[0, 1, 2, 3],
417412
&mut equal_to_results,
418413
);
419-
let results = equal_to_results.to_vec();
420414

421-
assert!(results[0]);
422-
assert!(!results[1]);
423-
assert!(!results[2]);
424-
assert!(results[3]);
415+
assert!(equal_to_results[0]);
416+
assert!(!equal_to_results[1]);
417+
assert!(!equal_to_results[2]);
418+
assert!(equal_to_results[3]);
425419
}
426420

427421
#[test]
@@ -438,21 +432,19 @@ mod tests {
438432
.vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4])
439433
.unwrap();
440434

441-
let mut equal_to_results = EqualToResults::new();
442-
equal_to_results.reset(all_nulls_input_array.len());
435+
let mut equal_to_results = vec![true; all_nulls_input_array.len()];
443436
builder.vectorized_equal_to(
444437
&[0, 1, 2, 3, 4],
445438
&all_nulls_input_array,
446439
&[0, 1, 2, 3, 4],
447440
&mut equal_to_results,
448441
);
449-
let results = equal_to_results.to_vec();
450442

451-
assert!(results[0]);
452-
assert!(results[1]);
453-
assert!(results[2]);
454-
assert!(results[3]);
455-
assert!(results[4]);
443+
assert!(equal_to_results[0]);
444+
assert!(equal_to_results[1]);
445+
assert!(equal_to_results[2]);
446+
assert!(equal_to_results[3]);
447+
assert!(equal_to_results[4]);
456448

457449
// All not nulls input array
458450
let all_not_nulls_input_array = Arc::new(BooleanArray::from(vec![
@@ -466,20 +458,18 @@ mod tests {
466458
.vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4])
467459
.unwrap();
468460

469-
let mut equal_to_results = EqualToResults::new();
470-
equal_to_results.reset(all_not_nulls_input_array.len());
461+
let mut equal_to_results = vec![true; all_not_nulls_input_array.len()];
471462
builder.vectorized_equal_to(
472463
&[5, 6, 7, 8, 9],
473464
&all_not_nulls_input_array,
474465
&[0, 1, 2, 3, 4],
475466
&mut equal_to_results,
476467
);
477-
let results = equal_to_results.to_vec();
478468

479-
assert!(results[0]);
480-
assert!(results[1]);
481-
assert!(results[2]);
482-
assert!(results[3]);
483-
assert!(results[4]);
469+
assert!(equal_to_results[0]);
470+
assert!(equal_to_results[1]);
471+
assert!(equal_to_results[2]);
472+
assert!(equal_to_results[3]);
473+
assert!(equal_to_results[4]);
484474
}
485475
}

0 commit comments

Comments
 (0)