From 31556f66e8d00bf2010cd642b194ebc18cc4f0de Mon Sep 17 00:00:00 2001 From: Huy Mac Date: Tue, 28 Apr 2026 17:08:19 +0900 Subject: [PATCH 1/8] feat: use bitmasks for multiple column aggregation --- .../group_values/multi_group_by/boolean.rs | 104 +++++++------ .../group_values/multi_group_by/bytes.rs | 81 +++++----- .../group_values/multi_group_by/bytes_view.rs | 94 +++++------ .../group_values/multi_group_by/mod.rs | 105 +++++++++++-- .../group_values/multi_group_by/primitive.rs | 146 +++++++++++------- 5 files changed, 333 insertions(+), 197 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 91a39f28f33c1..2cbe4f6365b85 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -18,11 +18,10 @@ use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::Nulls; -use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_to}; +use crate::aggregates::group_values::multi_group_by::{EqualToResults, GroupColumn, nulls_equal_to}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder}; use datafusion_common::Result; -use itertools::izip; /// An implementation of [`GroupColumn`] for booleans /// @@ -81,32 +80,34 @@ impl GroupColumn for BooleanGroupValueBuilder { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { let array = array.as_boolean(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to in previous column, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } + let chunk_idx = idx / 64; + let bit_pos = idx % 64; + if NULLABLE { let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - *equal_to_result = result; + if !result { + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } continue; } } - *equal_to_result = self.buffer.get_bit(lhs_row) == array.value(rhs_row); + if self.buffer.get_bit(lhs_row) != array.value(rhs_row) { + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } } } @@ -195,6 +196,7 @@ impl GroupColumn for BooleanGroupValueBuilder { #[cfg(test)] mod tests { + use crate::aggregates::group_values::multi_group_by::EqualToResults; use arrow::array::NullBufferBuilder; use super::*; @@ -213,10 +215,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -237,7 +239,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -257,7 +259,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -302,7 +304,8 @@ mod tests { let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -310,13 +313,14 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); + let results = equal_to_results.to_vec(); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(results[5]); } #[test] @@ -333,10 +337,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -357,7 +361,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -377,7 +381,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -403,7 +407,8 @@ mod tests { ])) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1, 2, 3], @@ -411,11 +416,12 @@ mod tests { &[0, 1, 2, 3], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(!equal_to_results[1]); - assert!(!equal_to_results[2]); - assert!(equal_to_results[3]); + assert!(results[0]); + assert!(!results[1]); + assert!(!results[2]); + assert!(results[3]); } #[test] @@ -432,19 +438,21 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(BooleanArray::from(vec![ @@ -458,18 +466,20 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index cd173741b6464..e716ebf8eb713 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - GroupColumn, Nulls, nulls_equal_to, + EqualToResults, GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{ @@ -28,7 +28,6 @@ use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType}; use datafusion_common::utils::proxy::VecAllocExt; use datafusion_common::{Result, exec_datafusion_err}; use datafusion_physical_expr_common::binary_map::{INITIAL_BUFFER_CAPACITY, OutputType}; -use itertools::izip; use std::mem::size_of; use std::sync::Arc; use std::vec; @@ -106,25 +105,24 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) where B: ByteArrayType, { let array = array.as_bytes::(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - *equal_to_result = self.do_equal_to_inner(lhs_row, array, rhs_row); + if !self.do_equal_to_inner(lhs_row, array, rhs_row) { + let chunk_idx = idx / 64; + let bit_pos = idx % 64; + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } } } @@ -275,7 +273,7 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { // Sanity array type match self.output_type { @@ -432,6 +430,7 @@ where mod tests { use std::sync::Arc; + use crate::aggregates::group_values::multi_group_by::EqualToResults; use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; use arrow::array::{ArrayRef, NullBufferBuilder, StringArray}; use datafusion_common::DataFusionError; @@ -520,10 +519,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -544,7 +543,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -575,19 +574,21 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringArray::from(vec![ @@ -601,19 +602,21 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } fn test_byte_equal_to_internal(mut append: A, mut equal_to: E) @@ -624,7 +627,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -670,7 +673,8 @@ mod tests { Arc::new(StringArray::new(offsets, buffer, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -678,12 +682,13 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); + let results = equal_to_results.to_vec(); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(results[5]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index a91dd3115d879..bdecc2bfdd177 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -16,14 +16,13 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - GroupColumn, Nulls, nulls_equal_to, + EqualToResults, GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array, ArrayRef, AsArray, ByteView, GenericByteViewArray, make_view}; use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::ByteViewType; use datafusion_common::Result; -use itertools::izip; use std::marker::PhantomData; use std::mem::{replace, size_of}; use std::sync::Arc; @@ -126,22 +125,20 @@ impl ByteViewGroupValueBuilder { lhs_rows: &[usize], array: &GenericByteViewArray, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - *equal_to_result = - self.do_equal_to_inner::(lhs_row, array, rhs_row); + if !self.do_equal_to_inner::(lhs_row, array, rhs_row) { + let chunk_idx = idx / 64; + let bit_pos = idx % 64; + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } } } @@ -513,7 +510,7 @@ impl GroupColumn for ByteViewGroupValueBuilder { group_indices: &[usize], array: &ArrayRef, rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { let has_nulls = array.null_count() != 0; let array = array.as_byte_view::(); @@ -583,6 +580,7 @@ impl GroupColumn for ByteViewGroupValueBuilder { mod tests { use std::sync::Arc; + use crate::aggregates::group_values::multi_group_by::EqualToResults; use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; use arrow::array::{ArrayRef, AsArray, NullBufferBuilder, StringViewArray}; use arrow::datatypes::StringViewType; @@ -627,10 +625,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -651,7 +649,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -683,19 +681,21 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringViewArray::from(vec![ @@ -709,19 +709,21 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } fn test_byte_view_equal_to_internal(mut append: A, mut equal_to: E) @@ -732,7 +734,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -811,7 +813,8 @@ mod tests { Arc::new(StringViewArray::new(views, buffer, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = vec![true; input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(input_array.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 8], @@ -819,19 +822,20 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(!equal_to_results[5]); - assert!(equal_to_results[6]); - assert!(!equal_to_results[7]); - assert!(!equal_to_results[8]); - assert!(equal_to_results[9]); - assert!(!equal_to_results[10]); - assert!(equal_to_results[11]); + let results = equal_to_results.to_vec(); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(!results[5]); + assert!(results[6]); + assert!(!results[7]); + assert!(!results[8]); + assert!(results[9]); + assert!(!results[10]); + assert!(results[11]); } #[test] diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index cc4576eabddbd..0a41d7207d219 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -82,7 +82,7 @@ pub trait GroupColumn: Send + Sync { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ); /// The vectorized version `append_val` @@ -235,8 +235,8 @@ struct VectorizedOperationBuffers { /// The `vectorized_equal_to` group indices buffer equal_to_group_indices: Vec, - /// The `vectorized_equal_to` result buffer - equal_to_results: Vec, + /// The `vectorized_equal_to` result buffer (bitmask) + equal_to_results: EqualToResults, /// The buffer for storing row indices found not equal to /// exist groups in `group_values` in `vectorized_equal_to`. @@ -249,7 +249,6 @@ impl VectorizedOperationBuffers { self.append_row_indices.clear(); self.equal_to_row_indices.clear(); self.equal_to_group_indices.clear(); - self.equal_to_results.clear(); self.remaining_row_indices.clear(); } } @@ -615,15 +614,13 @@ impl GroupValuesColumn { // 1. Perform `vectorized_equal_to` for `rows` in `vectorized_equal_to_group_indices` // and `group_indices` in `vectorized_equal_to_group_indices` + let n = self + .vectorized_operation_buffers + .equal_to_group_indices + .len(); let mut equal_to_results = mem::take(&mut self.vectorized_operation_buffers.equal_to_results); - equal_to_results.clear(); - equal_to_results.resize( - self.vectorized_operation_buffers - .equal_to_group_indices - .len(), - true, - ); + equal_to_results.reset(n); for (col_idx, group_col) in self.group_values.iter().enumerate() { group_col.vectorized_equal_to( @@ -643,7 +640,7 @@ impl GroupValuesColumn { .iter() .enumerate() { - let equal_to_result = equal_to_results[idx]; + let equal_to_result = equal_to_results.get_bit(idx); // Equal to case, set the `group_indices` to `rows` in `groups` if equal_to_result { @@ -1238,6 +1235,90 @@ fn supported_type(data_type: &DataType) -> bool { ) } +/// Bitmask tracking which rows are still potentially equal across column comparisons: +/// - Bit `i` = 1 means row `i` has not yet been determined unequal +/// - Bit `i` = 0 means row `i` is already known to be unequal +pub struct EqualToResults { + chunks: Vec, + len: usize, +} + +impl EqualToResults { + pub fn new() -> Self { + Self { + chunks: Vec::new(), + len: 0, + } + } + + /// Reset to all-true (`1` bits) for `n` rows + pub fn reset(&mut self, n: usize) { + self.len = n; + let n_full = n / 64; + let remainder = n % 64; + let total = n_full + usize::from(remainder > 0); + + self.chunks.resize(total, 0); + self.chunks[..n_full].fill(!0u64); + if remainder > 0 { + self.chunks[n_full] = (1u64 << remainder) - 1; + } + } + + #[inline] + pub fn get_bit(&self, i: usize) -> bool { + debug_assert!(i < self.len); + (self.chunks[i / 64] >> (i % 64)) & 1 == 1 + } + + #[inline] + pub fn get_chunk(&self, chunk_idx: usize) -> u64 { + self.chunks[chunk_idx] + } + + #[inline] + pub fn and_chunk(&mut self, chunk_idx: usize, mask: u64) { + self.chunks[chunk_idx] &= mask; + } + + #[inline] + pub fn n_chunks(&self) -> usize { + self.chunks.len() + } + + #[inline] + pub fn len(&self) -> usize { + self.len + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + #[inline] + pub fn set_bit(&mut self, i: usize, value: bool) { + debug_assert!(i < self.len); + if value { + self.chunks[i / 64] |= 1u64 << (i % 64); + } else { + self.chunks[i / 64] &= !(1u64 << (i % 64)); + } + } + + /// Convert to `Vec` for testing. + #[cfg(test)] + pub fn to_vec(&self) -> Vec { + (0..self.len).map(|i| self.get_bit(i)).collect() + } +} + +impl Default for EqualToResults { + fn default() -> Self { + Self::new() + } +} + ///Shows how many `null`s there are in an array enum Nulls { /// All array items are `null`s diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 31126348b3fd4..691c82d5af68d 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - GroupColumn, Nulls, nulls_equal_to, + EqualToResults, GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::ArrowNativeTypeOp; @@ -25,7 +25,6 @@ use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; -use itertools::izip; use std::iter; use std::sync::Arc; @@ -62,42 +61,67 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { assert!( !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()), "called with nullable input" ); let array_values = array.as_primitive::().values(); + let n = lhs_rows.len(); + let n_full = n / 64; - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); + // Process 64 rows per chunk via bitmask AND + for chunk_idx in 0..n_full { + // Skip entirely if all bits already false + if equal_to_results.get_chunk(chunk_idx) == 0 { + continue; + } - for (&lhs_row, &rhs_row, equal_to_result) in iter { - let result = { - // Getting unchecked not only for bound checks but because the bound checks are - // what prevents auto-vectorization + let base = chunk_idx * 64; + let mut eq_mask: u64 = 0; + + // Build a 64-bit mask: bit k=1 if rows[base+k] are equal + for k in 0..64usize { + let i = base + k; let left = if cfg!(debug_assertions) { - self.group_values[lhs_row] + self.group_values[lhs_rows[i]] } else { // SAFETY: indices are guaranteed to be in bounds - unsafe { *self.group_values.get_unchecked(lhs_row) } + unsafe { *self.group_values.get_unchecked(*lhs_rows.get_unchecked(i)) } }; let right = if cfg!(debug_assertions) { - array_values[rhs_row] + array_values[rhs_rows[i]] } else { // SAFETY: indices are guaranteed to be in bounds - unsafe { *array_values.get_unchecked(rhs_row) } + unsafe { *array_values.get_unchecked(*rhs_rows.get_unchecked(i)) } }; + eq_mask |= (left.is_eq(right) as u64) << k; + } + equal_to_results.and_chunk(chunk_idx, eq_mask); + } - // Always evaluate, to allow for auto-vectorization - left.is_eq(right) - }; - - *equal_to_result = result && *equal_to_result; + // Handle the remaining rows (< 64) + let remainder_start = n_full * 64; + if remainder_start < n { + let chunk_idx = n_full; + if equal_to_results.get_chunk(chunk_idx) != 0 { + let mut eq_mask: u64 = 0; + for (k, i) in (remainder_start..n).enumerate() { + let left = if cfg!(debug_assertions) { + self.group_values[lhs_rows[i]] + } else { + unsafe { *self.group_values.get_unchecked(*lhs_rows.get_unchecked(i)) } + }; + let right = if cfg!(debug_assertions) { + array_values[rhs_rows[i]] + } else { + unsafe { *array_values.get_unchecked(*rhs_rows.get_unchecked(i)) } + }; + eq_mask |= (left.is_eq(right) as u64) << k; + } + equal_to_results.and_chunk(chunk_idx, eq_mask); + } } } @@ -106,33 +130,36 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { assert!(NULLABLE, "called with non-nullable input"); let array = array.as_primitive::(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { // Has found not equal to in previous column, don't need to check - if !*equal_to_result { + if !equal_to_results.get_bit(idx) { continue; } + let chunk_idx = idx / 64; + let bit_pos = idx % 64; + // Perf: skip null check (by short circuit) if input is not nullable let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - *equal_to_result = result; + if !result { + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } continue; } // Otherwise, we need to check their values - *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row)); + if !self.group_values[lhs_row].is_eq(array.value(rhs_row)) { + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } } } } @@ -176,7 +203,7 @@ impl GroupColumn lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { if !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()) { self.vectorized_equal_to_non_nullable( @@ -281,6 +308,7 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; + use crate::aggregates::group_values::multi_group_by::EqualToResults; use arrow::array::{ArrayRef, Float32Array, Int64Array, NullBufferBuilder}; use arrow::datatypes::{DataType, Float32Type, Int64Type}; @@ -300,10 +328,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -324,7 +352,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -344,7 +372,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -393,7 +421,8 @@ mod tests { let input_array = Arc::new(Float32Array::new(values, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6], @@ -401,6 +430,7 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6], &mut equal_to_results, ); + let equal_to_results = equal_to_results.to_vec(); assert!(!equal_to_results[0]); assert!(equal_to_results[1]); @@ -425,10 +455,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -449,7 +479,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -469,7 +499,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -487,7 +517,8 @@ mod tests { let input_array = Arc::new(Int64Array::from(vec![Some(0), Some(2)])) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1], @@ -495,6 +526,7 @@ mod tests { &[0, 1], &mut equal_to_results, ); + let equal_to_results = equal_to_results.to_vec(); assert!(equal_to_results[0]); assert!(!equal_to_results[1]); @@ -520,19 +552,21 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(Int64Array::from(vec![ @@ -546,18 +580,20 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } } From 8efed098a86c240b1c79ede9c5599950272eaa19 Mon Sep 17 00:00:00 2001 From: Huy Mac Date: Tue, 28 Apr 2026 17:50:24 +0900 Subject: [PATCH 2/8] Revert "feat: use bitmasks for multiple column aggregation" This reverts commit 31556f66e8d00bf2010cd642b194ebc18cc4f0de. --- .../group_values/multi_group_by/boolean.rs | 104 ++++++------- .../group_values/multi_group_by/bytes.rs | 81 +++++----- .../group_values/multi_group_by/bytes_view.rs | 94 ++++++----- .../group_values/multi_group_by/mod.rs | 105 ++----------- .../group_values/multi_group_by/primitive.rs | 146 +++++++----------- 5 files changed, 197 insertions(+), 333 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 2cbe4f6365b85..91a39f28f33c1 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -18,10 +18,11 @@ use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::Nulls; -use crate::aggregates::group_values::multi_group_by::{EqualToResults, GroupColumn, nulls_equal_to}; +use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_to}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder}; use datafusion_common::Result; +use itertools::izip; /// An implementation of [`GroupColumn`] for booleans /// @@ -80,34 +81,32 @@ impl GroupColumn for BooleanGroupValueBuilder { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { let array = array.as_boolean(); - for (idx, (&lhs_row, &rhs_row)) in - lhs_rows.iter().zip(rhs_rows.iter()).enumerate() - { - if !equal_to_results.get_bit(idx) { + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); + + for (&lhs_row, &rhs_row, equal_to_result) in iter { + // Has found not equal to in previous column, don't need to check + if !*equal_to_result { continue; } - let chunk_idx = idx / 64; - let bit_pos = idx % 64; - if NULLABLE { let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - if !result { - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = result; continue; } } - if self.buffer.get_bit(lhs_row) != array.value(rhs_row) { - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = self.buffer.get_bit(lhs_row) == array.value(rhs_row); } } @@ -196,7 +195,6 @@ impl GroupColumn for BooleanGroupValueBuilder { #[cfg(test)] mod tests { - use crate::aggregates::group_values::multi_group_by::EqualToResults; use arrow::array::NullBufferBuilder; use super::*; @@ -215,10 +213,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -239,7 +237,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -259,7 +257,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -304,8 +302,7 @@ mod tests { let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -313,14 +310,13 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - - assert!(!results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(!results[3]); - assert!(!results[4]); - assert!(results[5]); + + assert!(!equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(!equal_to_results[3]); + assert!(!equal_to_results[4]); + assert!(equal_to_results[5]); } #[test] @@ -337,10 +333,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -361,7 +357,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -381,7 +377,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -407,8 +403,7 @@ mod tests { ])) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1, 2, 3], @@ -416,12 +411,11 @@ mod tests { &[0, 1, 2, 3], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(!results[1]); - assert!(!results[2]); - assert!(results[3]); + assert!(equal_to_results[0]); + assert!(!equal_to_results[1]); + assert!(!equal_to_results[2]); + assert!(equal_to_results[3]); } #[test] @@ -438,21 +432,19 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_nulls_input_array.len()]; builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(BooleanArray::from(vec![ @@ -466,20 +458,18 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_not_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index e716ebf8eb713..cd173741b6464 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - EqualToResults, GroupColumn, Nulls, nulls_equal_to, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{ @@ -28,6 +28,7 @@ use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType}; use datafusion_common::utils::proxy::VecAllocExt; use datafusion_common::{Result, exec_datafusion_err}; use datafusion_physical_expr_common::binary_map::{INITIAL_BUFFER_CAPACITY, OutputType}; +use itertools::izip; use std::mem::size_of; use std::sync::Arc; use std::vec; @@ -105,24 +106,25 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) where B: ByteArrayType, { let array = array.as_bytes::(); - for (idx, (&lhs_row, &rhs_row)) in - lhs_rows.iter().zip(rhs_rows.iter()).enumerate() - { - if !equal_to_results.get_bit(idx) { + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); + + for (&lhs_row, &rhs_row, equal_to_result) in iter { + // Has found not equal to, don't need to check + if !*equal_to_result { continue; } - if !self.do_equal_to_inner(lhs_row, array, rhs_row) { - let chunk_idx = idx / 64; - let bit_pos = idx % 64; - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = self.do_equal_to_inner(lhs_row, array, rhs_row); } } @@ -273,7 +275,7 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { // Sanity array type match self.output_type { @@ -430,7 +432,6 @@ where mod tests { use std::sync::Arc; - use crate::aggregates::group_values::multi_group_by::EqualToResults; use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; use arrow::array::{ArrayRef, NullBufferBuilder, StringArray}; use datafusion_common::DataFusionError; @@ -519,10 +520,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -543,7 +544,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -574,21 +575,19 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_nulls_input_array.len()]; builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringArray::from(vec![ @@ -602,21 +601,19 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_not_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); } fn test_byte_equal_to_internal(mut append: A, mut equal_to: E) @@ -627,7 +624,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -673,8 +670,7 @@ mod tests { Arc::new(StringArray::new(offsets, buffer, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -682,13 +678,12 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - - assert!(!results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(!results[3]); - assert!(!results[4]); - assert!(results[5]); + + assert!(!equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(!equal_to_results[3]); + assert!(!equal_to_results[4]); + assert!(equal_to_results[5]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index bdecc2bfdd177..a91dd3115d879 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -16,13 +16,14 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - EqualToResults, GroupColumn, Nulls, nulls_equal_to, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array, ArrayRef, AsArray, ByteView, GenericByteViewArray, make_view}; use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::ByteViewType; use datafusion_common::Result; +use itertools::izip; use std::marker::PhantomData; use std::mem::{replace, size_of}; use std::sync::Arc; @@ -125,20 +126,22 @@ impl ByteViewGroupValueBuilder { lhs_rows: &[usize], array: &GenericByteViewArray, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { - for (idx, (&lhs_row, &rhs_row)) in - lhs_rows.iter().zip(rhs_rows.iter()).enumerate() - { - if !equal_to_results.get_bit(idx) { + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); + + for (&lhs_row, &rhs_row, equal_to_result) in iter { + // Has found not equal to, don't need to check + if !*equal_to_result { continue; } - if !self.do_equal_to_inner::(lhs_row, array, rhs_row) { - let chunk_idx = idx / 64; - let bit_pos = idx % 64; - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = + self.do_equal_to_inner::(lhs_row, array, rhs_row); } } @@ -510,7 +513,7 @@ impl GroupColumn for ByteViewGroupValueBuilder { group_indices: &[usize], array: &ArrayRef, rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { let has_nulls = array.null_count() != 0; let array = array.as_byte_view::(); @@ -580,7 +583,6 @@ impl GroupColumn for ByteViewGroupValueBuilder { mod tests { use std::sync::Arc; - use crate::aggregates::group_values::multi_group_by::EqualToResults; use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; use arrow::array::{ArrayRef, AsArray, NullBufferBuilder, StringViewArray}; use arrow::datatypes::StringViewType; @@ -625,10 +627,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -649,7 +651,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -681,21 +683,19 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_nulls_input_array.len()]; builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringViewArray::from(vec![ @@ -709,21 +709,19 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_not_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); } fn test_byte_view_equal_to_internal(mut append: A, mut equal_to: E) @@ -734,7 +732,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -813,8 +811,7 @@ mod tests { Arc::new(StringViewArray::new(views, buffer, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(input_array.len()); + let mut equal_to_results = vec![true; input_array.len()]; equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 8], @@ -822,20 +819,19 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - - assert!(!results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(!results[3]); - assert!(!results[4]); - assert!(!results[5]); - assert!(results[6]); - assert!(!results[7]); - assert!(!results[8]); - assert!(results[9]); - assert!(!results[10]); - assert!(results[11]); + + assert!(!equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(!equal_to_results[3]); + assert!(!equal_to_results[4]); + assert!(!equal_to_results[5]); + assert!(equal_to_results[6]); + assert!(!equal_to_results[7]); + assert!(!equal_to_results[8]); + assert!(equal_to_results[9]); + assert!(!equal_to_results[10]); + assert!(equal_to_results[11]); } #[test] diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index 0a41d7207d219..cc4576eabddbd 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -82,7 +82,7 @@ pub trait GroupColumn: Send + Sync { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ); /// The vectorized version `append_val` @@ -235,8 +235,8 @@ struct VectorizedOperationBuffers { /// The `vectorized_equal_to` group indices buffer equal_to_group_indices: Vec, - /// The `vectorized_equal_to` result buffer (bitmask) - equal_to_results: EqualToResults, + /// The `vectorized_equal_to` result buffer + equal_to_results: Vec, /// The buffer for storing row indices found not equal to /// exist groups in `group_values` in `vectorized_equal_to`. @@ -249,6 +249,7 @@ impl VectorizedOperationBuffers { self.append_row_indices.clear(); self.equal_to_row_indices.clear(); self.equal_to_group_indices.clear(); + self.equal_to_results.clear(); self.remaining_row_indices.clear(); } } @@ -614,13 +615,15 @@ impl GroupValuesColumn { // 1. Perform `vectorized_equal_to` for `rows` in `vectorized_equal_to_group_indices` // and `group_indices` in `vectorized_equal_to_group_indices` - let n = self - .vectorized_operation_buffers - .equal_to_group_indices - .len(); let mut equal_to_results = mem::take(&mut self.vectorized_operation_buffers.equal_to_results); - equal_to_results.reset(n); + equal_to_results.clear(); + equal_to_results.resize( + self.vectorized_operation_buffers + .equal_to_group_indices + .len(), + true, + ); for (col_idx, group_col) in self.group_values.iter().enumerate() { group_col.vectorized_equal_to( @@ -640,7 +643,7 @@ impl GroupValuesColumn { .iter() .enumerate() { - let equal_to_result = equal_to_results.get_bit(idx); + let equal_to_result = equal_to_results[idx]; // Equal to case, set the `group_indices` to `rows` in `groups` if equal_to_result { @@ -1235,90 +1238,6 @@ fn supported_type(data_type: &DataType) -> bool { ) } -/// Bitmask tracking which rows are still potentially equal across column comparisons: -/// - Bit `i` = 1 means row `i` has not yet been determined unequal -/// - Bit `i` = 0 means row `i` is already known to be unequal -pub struct EqualToResults { - chunks: Vec, - len: usize, -} - -impl EqualToResults { - pub fn new() -> Self { - Self { - chunks: Vec::new(), - len: 0, - } - } - - /// Reset to all-true (`1` bits) for `n` rows - pub fn reset(&mut self, n: usize) { - self.len = n; - let n_full = n / 64; - let remainder = n % 64; - let total = n_full + usize::from(remainder > 0); - - self.chunks.resize(total, 0); - self.chunks[..n_full].fill(!0u64); - if remainder > 0 { - self.chunks[n_full] = (1u64 << remainder) - 1; - } - } - - #[inline] - pub fn get_bit(&self, i: usize) -> bool { - debug_assert!(i < self.len); - (self.chunks[i / 64] >> (i % 64)) & 1 == 1 - } - - #[inline] - pub fn get_chunk(&self, chunk_idx: usize) -> u64 { - self.chunks[chunk_idx] - } - - #[inline] - pub fn and_chunk(&mut self, chunk_idx: usize, mask: u64) { - self.chunks[chunk_idx] &= mask; - } - - #[inline] - pub fn n_chunks(&self) -> usize { - self.chunks.len() - } - - #[inline] - pub fn len(&self) -> usize { - self.len - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - #[inline] - pub fn set_bit(&mut self, i: usize, value: bool) { - debug_assert!(i < self.len); - if value { - self.chunks[i / 64] |= 1u64 << (i % 64); - } else { - self.chunks[i / 64] &= !(1u64 << (i % 64)); - } - } - - /// Convert to `Vec` for testing. - #[cfg(test)] - pub fn to_vec(&self) -> Vec { - (0..self.len).map(|i| self.get_bit(i)).collect() - } -} - -impl Default for EqualToResults { - fn default() -> Self { - Self::new() - } -} - ///Shows how many `null`s there are in an array enum Nulls { /// All array items are `null`s diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 691c82d5af68d..31126348b3fd4 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - EqualToResults, GroupColumn, Nulls, nulls_equal_to, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::ArrowNativeTypeOp; @@ -25,6 +25,7 @@ use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; +use itertools::izip; use std::iter; use std::sync::Arc; @@ -61,67 +62,42 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { assert!( !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()), "called with nullable input" ); let array_values = array.as_primitive::().values(); - let n = lhs_rows.len(); - let n_full = n / 64; - // Process 64 rows per chunk via bitmask AND - for chunk_idx in 0..n_full { - // Skip entirely if all bits already false - if equal_to_results.get_chunk(chunk_idx) == 0 { - continue; - } - - let base = chunk_idx * 64; - let mut eq_mask: u64 = 0; + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); - // Build a 64-bit mask: bit k=1 if rows[base+k] are equal - for k in 0..64usize { - let i = base + k; + for (&lhs_row, &rhs_row, equal_to_result) in iter { + let result = { + // Getting unchecked not only for bound checks but because the bound checks are + // what prevents auto-vectorization let left = if cfg!(debug_assertions) { - self.group_values[lhs_rows[i]] + self.group_values[lhs_row] } else { // SAFETY: indices are guaranteed to be in bounds - unsafe { *self.group_values.get_unchecked(*lhs_rows.get_unchecked(i)) } + unsafe { *self.group_values.get_unchecked(lhs_row) } }; let right = if cfg!(debug_assertions) { - array_values[rhs_rows[i]] + array_values[rhs_row] } else { // SAFETY: indices are guaranteed to be in bounds - unsafe { *array_values.get_unchecked(*rhs_rows.get_unchecked(i)) } + unsafe { *array_values.get_unchecked(rhs_row) } }; - eq_mask |= (left.is_eq(right) as u64) << k; - } - equal_to_results.and_chunk(chunk_idx, eq_mask); - } - // Handle the remaining rows (< 64) - let remainder_start = n_full * 64; - if remainder_start < n { - let chunk_idx = n_full; - if equal_to_results.get_chunk(chunk_idx) != 0 { - let mut eq_mask: u64 = 0; - for (k, i) in (remainder_start..n).enumerate() { - let left = if cfg!(debug_assertions) { - self.group_values[lhs_rows[i]] - } else { - unsafe { *self.group_values.get_unchecked(*lhs_rows.get_unchecked(i)) } - }; - let right = if cfg!(debug_assertions) { - array_values[rhs_rows[i]] - } else { - unsafe { *array_values.get_unchecked(*rhs_rows.get_unchecked(i)) } - }; - eq_mask |= (left.is_eq(right) as u64) << k; - } - equal_to_results.and_chunk(chunk_idx, eq_mask); - } + // Always evaluate, to allow for auto-vectorization + left.is_eq(right) + }; + + *equal_to_result = result && *equal_to_result; } } @@ -130,36 +106,33 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { assert!(NULLABLE, "called with non-nullable input"); let array = array.as_primitive::(); - for (idx, (&lhs_row, &rhs_row)) in - lhs_rows.iter().zip(rhs_rows.iter()).enumerate() - { + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); + + for (&lhs_row, &rhs_row, equal_to_result) in iter { // Has found not equal to in previous column, don't need to check - if !equal_to_results.get_bit(idx) { + if !*equal_to_result { continue; } - let chunk_idx = idx / 64; - let bit_pos = idx % 64; - // Perf: skip null check (by short circuit) if input is not nullable let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - if !result { - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = result; continue; } // Otherwise, we need to check their values - if !self.group_values[lhs_row].is_eq(array.value(rhs_row)) { - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row)); } } } @@ -203,7 +176,7 @@ impl GroupColumn lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { if !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()) { self.vectorized_equal_to_non_nullable( @@ -308,7 +281,6 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; - use crate::aggregates::group_values::multi_group_by::EqualToResults; use arrow::array::{ArrayRef, Float32Array, Int64Array, NullBufferBuilder}; use arrow::datatypes::{DataType, Float32Type, Int64Type}; @@ -328,10 +300,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -352,7 +324,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -372,7 +344,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -421,8 +393,7 @@ mod tests { let input_array = Arc::new(Float32Array::new(values, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6], @@ -430,7 +401,6 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6], &mut equal_to_results, ); - let equal_to_results = equal_to_results.to_vec(); assert!(!equal_to_results[0]); assert!(equal_to_results[1]); @@ -455,10 +425,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -479,7 +449,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -499,7 +469,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -517,8 +487,7 @@ mod tests { let input_array = Arc::new(Int64Array::from(vec![Some(0), Some(2)])) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1], @@ -526,7 +495,6 @@ mod tests { &[0, 1], &mut equal_to_results, ); - let equal_to_results = equal_to_results.to_vec(); assert!(equal_to_results[0]); assert!(!equal_to_results[1]); @@ -552,21 +520,19 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_nulls_input_array.len()]; builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(Int64Array::from(vec![ @@ -580,20 +546,18 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_not_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); } } From 28b5b3720cdfdd1eacfa566721d7a21b9ad87b2d Mon Sep 17 00:00:00 2001 From: Huy Mac Date: Tue, 28 Apr 2026 18:31:18 +0900 Subject: [PATCH 3/8] feat: use bitmasks for multiple column aggregation use `BooleanBufferBuilder` from https://github.com/apache/arrow-rs/blob/d6168e526aae79d6fbafe8c11062b5f834021052/arrow-buffer/src/util/bit_util.rs --- .../group_values/multi_group_by/boolean.rs | 132 ++++++------ .../group_values/multi_group_by/bytes.rs | 107 +++++----- .../group_values/multi_group_by/bytes_view.rs | 144 ++++++------- .../group_values/multi_group_by/mod.rs | 41 ++-- .../group_values/multi_group_by/primitive.rs | 198 +++++++++--------- 5 files changed, 300 insertions(+), 322 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 91a39f28f33c1..6da6d7085384d 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -22,7 +22,6 @@ use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_t use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder}; use datafusion_common::Result; -use itertools::izip; /// An implementation of [`GroupColumn`] for booleans /// @@ -81,19 +80,14 @@ impl GroupColumn for BooleanGroupValueBuilder { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { let array = array.as_boolean(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to in previous column, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } @@ -101,12 +95,16 @@ impl GroupColumn for BooleanGroupValueBuilder { let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - *equal_to_result = result; + if !result { + equal_to_results.set_bit(idx, false); + } continue; } } - *equal_to_result = self.buffer.get_bit(lhs_row) == array.value(rhs_row); + if self.buffer.get_bit(lhs_row) != array.value(rhs_row) { + equal_to_results.set_bit(idx, false); + } } } @@ -195,10 +193,20 @@ impl GroupColumn for BooleanGroupValueBuilder { #[cfg(test)] mod tests { - use arrow::array::NullBufferBuilder; + use arrow::array::{BooleanBufferBuilder, NullBufferBuilder}; use super::*; + fn make_true_buffer(n: usize) -> BooleanBufferBuilder { + let mut buf = BooleanBufferBuilder::new(n); + buf.append_n(n, true); + buf + } + + fn to_vec(buf: &BooleanBufferBuilder) -> Vec { + (0..buf.len()).map(|i| buf.get_bit(i)).collect() + } + #[test] fn test_nullable_boolean_equal_to() { let append = |builder: &mut BooleanGroupValueBuilder, @@ -213,10 +221,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -237,7 +246,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -257,18 +266,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - exist null, input not null - // - exist null, input null; values not equal - // - exist null, input null; values equal - // - exist not null, input null - // - exist not null, input not null; values not equal - // - exist not null, input not null; values equal - - // Define PrimitiveGroupValueBuilder let mut builder = BooleanGroupValueBuilder::::new(); let builder_array = Arc::new(BooleanArray::from(vec![ None, @@ -280,7 +280,6 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5]); - // Define input array let (values, _nulls) = BooleanArray::from(vec![ Some(true), Some(false), @@ -291,18 +290,16 @@ mod tests { ]) .into_parts(); - // explicitly build a null buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); - nulls.append_null(); // this sets Some(false) to null above + nulls.append_null(); nulls.append_null(); nulls.append_null(); nulls.append_non_null(); nulls.append_non_null(); let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -310,13 +307,14 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); + let results = to_vec(&equal_to_results); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(results[5]); } #[test] @@ -333,10 +331,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -357,7 +356,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -377,14 +376,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - values equal - // - values not equal - - // Define PrimitiveGroupValueBuilder let mut builder = BooleanGroupValueBuilder::::new(); let builder_array = Arc::new(BooleanArray::from(vec![ Some(false), @@ -394,7 +388,6 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3]); - // Define input array let input_array = Arc::new(BooleanArray::from(vec![ Some(false), Some(false), @@ -402,8 +395,7 @@ mod tests { Some(true), ])) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1, 2, 3], @@ -411,18 +403,16 @@ mod tests { &[0, 1, 2, 3], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(!equal_to_results[1]); - assert!(!equal_to_results[2]); - assert!(equal_to_results[3]); + assert!(results[0]); + assert!(!results[1]); + assert!(!results[2]); + assert!(results[3]); } #[test] fn test_nullable_boolean_vectorized_operation_special_case() { - // Test the special `all nulls` or `not nulls` input array case - // for vectorized append and equal to - let mut builder = BooleanGroupValueBuilder::::new(); // All nulls input array @@ -432,19 +422,20 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(BooleanArray::from(vec![ @@ -458,18 +449,19 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index cd173741b6464..ef99ef37b472d 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -20,15 +20,14 @@ use crate::aggregates::group_values::multi_group_by::{ }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{ - Array, ArrayRef, AsArray, BufferBuilder, GenericBinaryArray, GenericByteArray, - GenericStringArray, OffsetSizeTrait, types::GenericStringType, + Array, ArrayRef, AsArray, BooleanBufferBuilder, BufferBuilder, GenericBinaryArray, + GenericByteArray, GenericStringArray, OffsetSizeTrait, types::GenericStringType, }; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType}; use datafusion_common::utils::proxy::VecAllocExt; use datafusion_common::{Result, exec_datafusion_err}; use datafusion_physical_expr_common::binary_map::{INITIAL_BUFFER_CAPACITY, OutputType}; -use itertools::izip; use std::mem::size_of; use std::sync::Arc; use std::vec; @@ -106,25 +105,22 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) where B: ByteArrayType, { let array = array.as_bytes::(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - *equal_to_result = self.do_equal_to_inner(lhs_row, array, rhs_row); + if !self.do_equal_to_inner(lhs_row, array, rhs_row) { + equal_to_results.set_bit(idx, false); + } } } @@ -275,7 +271,7 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { // Sanity array type match self.output_type { @@ -433,12 +429,22 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; - use arrow::array::{ArrayRef, NullBufferBuilder, StringArray}; + use arrow::array::{ArrayRef, BooleanBufferBuilder, NullBufferBuilder, StringArray}; use datafusion_common::DataFusionError; use datafusion_physical_expr::binary_map::OutputType; use super::GroupColumn; + fn make_true_buffer(n: usize) -> BooleanBufferBuilder { + let mut buf = BooleanBufferBuilder::new(n); + buf.append_n(n, true); + buf + } + + fn to_vec(buf: &BooleanBufferBuilder) -> Vec { + (0..buf.len()).map(|i| buf.get_bit(i)).collect() + } + #[test] fn test_byte_group_value_builder_overflow() { let mut builder = ByteGroupValueBuilder::::new(OutputType::Utf8); @@ -520,10 +526,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -544,7 +551,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -558,9 +565,6 @@ mod tests { #[test] fn test_byte_vectorized_operation_special_case() { - // Test the special `all nulls` or `not nulls` input array case - // for vectorized append and equal to - let mut builder = ByteGroupValueBuilder::::new(OutputType::Utf8); // All nulls input array @@ -575,19 +579,20 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringArray::from(vec![ @@ -601,19 +606,20 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } fn test_byte_equal_to_internal(mut append: A, mut equal_to: E) @@ -624,18 +630,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - exist null, input not null - // - exist null, input null; values not equal - // - exist null, input null; values equal - // - exist not null, input null - // - exist not null, input not null; values not equal - // - exist not null, input not null; values equal - - // Define ByteGroupValueBuilder let mut builder = ByteGroupValueBuilder::::new(OutputType::Utf8); let builder_array = Arc::new(StringArray::from(vec![ None, @@ -647,7 +644,6 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5]); - // Define input array let (offsets, buffer, _nulls) = StringArray::from(vec![ Some("foo"), Some("bar"), @@ -658,10 +654,9 @@ mod tests { ]) .into_parts(); - // explicitly build a boolean buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); - nulls.append_null(); // this sets Some("bar") to null above + nulls.append_null(); nulls.append_null(); nulls.append_null(); nulls.append_non_null(); @@ -669,8 +664,7 @@ mod tests { let input_array = Arc::new(StringArray::new(offsets, buffer, nulls.finish())) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -678,12 +672,13 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); + let results = to_vec(&equal_to_results); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(results[5]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index a91dd3115d879..f82aaad0dcc29 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -19,11 +19,13 @@ use crate::aggregates::group_values::multi_group_by::{ GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; -use arrow::array::{Array, ArrayRef, AsArray, ByteView, GenericByteViewArray, make_view}; +use arrow::array::{ + Array, ArrayRef, AsArray, BooleanBufferBuilder, ByteView, GenericByteViewArray, + make_view, +}; use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::ByteViewType; use datafusion_common::Result; -use itertools::izip; use std::marker::PhantomData; use std::mem::{replace, size_of}; use std::sync::Arc; @@ -126,22 +128,19 @@ impl ByteViewGroupValueBuilder { lhs_rows: &[usize], array: &GenericByteViewArray, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - *equal_to_result = - self.do_equal_to_inner::(lhs_row, array, rhs_row); + if !self.do_equal_to_inner::(lhs_row, array, rhs_row) + { + equal_to_results.set_bit(idx, false); + } } } @@ -513,12 +512,11 @@ impl GroupColumn for ByteViewGroupValueBuilder { group_indices: &[usize], array: &ArrayRef, rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { let has_nulls = array.null_count() != 0; let array = array.as_byte_view::(); let has_buffers = !array.data_buffers().is_empty(); - // call specialized version based on nulls and buffers presence match (has_nulls, has_buffers) { (true, true) => self.vectorized_equal_to_inner::( group_indices, @@ -584,11 +582,23 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; - use arrow::array::{ArrayRef, AsArray, NullBufferBuilder, StringViewArray}; + use arrow::array::{ + ArrayRef, AsArray, BooleanBufferBuilder, NullBufferBuilder, StringViewArray, + }; use arrow::datatypes::StringViewType; use super::GroupColumn; + fn make_true_buffer(n: usize) -> BooleanBufferBuilder { + let mut buf = BooleanBufferBuilder::new(n); + buf.append_n(n, true); + buf + } + + fn to_vec(buf: &BooleanBufferBuilder) -> Vec { + (0..buf.len()).map(|i| buf.get_bit(i)).collect() + } + #[test] fn test_byte_view_append_val() { let mut builder = @@ -627,10 +637,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -651,7 +662,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -665,9 +676,6 @@ mod tests { #[test] fn test_byte_view_vectorized_operation_special_case() { - // Test the special `all nulls` or `not nulls` input array case - // for vectorized append and equal to - let mut builder = ByteViewGroupValueBuilder::::new().with_max_block_size(60); @@ -683,19 +691,20 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringViewArray::from(vec![ @@ -709,19 +718,20 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } fn test_byte_view_equal_to_internal(mut append: A, mut equal_to: E) @@ -732,35 +742,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - exist null, input not null - // - exist null, input null; values not equal - // - exist null, input null; values equal - // - exist not null, input null - // - exist not null, input not null; value lens not equal - // - exist not null, input not null; value not equal(inlined case) - // - exist not null, input not null; value equal(inlined case) - // - // - exist not null, input not null; value not equal - // (non-inlined case + prefix not equal) - // - // - exist not null, input not null; value not equal - // (non-inlined case + value in `completed`) - // - // - exist not null, input not null; value equal - // (non-inlined case + value in `completed`) - // - // - exist not null, input not null; value not equal - // (non-inlined case + value in `in_progress`) - // - // - exist not null, input not null; value equal - // (non-inlined case + value in `in_progress`) - - // Set the block size to 40 for ensuring some unlined values are in `in_progress`, - // and some are in `completed`, so both two branches in `value` function can be covered. let mut builder = ByteViewGroupValueBuilder::::new().with_max_block_size(60); let builder_array = Arc::new(StringViewArray::from(vec![ @@ -776,10 +760,9 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5, 6, 7, 8]); - // Define input array let (views, buffer, _nulls) = StringViewArray::from(vec![ Some("foo"), - Some("bar"), // set to null + Some("bar"), None, None, Some("baz"), @@ -793,10 +776,9 @@ mod tests { ]) .into_parts(); - // explicitly build a boolean buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(9); nulls.append_non_null(); - nulls.append_null(); // this sets Some("bar") to null above + nulls.append_null(); nulls.append_null(); nulls.append_null(); nulls.append_non_null(); @@ -810,8 +792,7 @@ mod tests { let input_array = Arc::new(StringViewArray::new(views, buffer, nulls.finish())) as ArrayRef; - // Check - let mut equal_to_results = vec![true; input_array.len()]; + let mut equal_to_results = make_true_buffer(input_array.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 8], @@ -819,19 +800,20 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(!equal_to_results[5]); - assert!(equal_to_results[6]); - assert!(!equal_to_results[7]); - assert!(!equal_to_results[8]); - assert!(equal_to_results[9]); - assert!(!equal_to_results[10]); - assert!(equal_to_results[11]); + let results = to_vec(&equal_to_results); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(!results[5]); + assert!(results[6]); + assert!(!results[7]); + assert!(!results[8]); + assert!(results[9]); + assert!(!results[10]); + assert!(results[11]); } #[test] diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index cc4576eabddbd..2115e9a34da64 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -29,7 +29,7 @@ use crate::aggregates::group_values::multi_group_by::{ boolean::BooleanGroupValueBuilder, bytes::ByteGroupValueBuilder, bytes_view::ByteViewGroupValueBuilder, primitive::PrimitiveGroupValueBuilder, }; -use arrow::array::{Array, ArrayRef}; +use arrow::array::{Array, ArrayRef, BooleanBufferBuilder}; use arrow::compute::cast; use arrow::datatypes::{ BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, @@ -82,7 +82,7 @@ pub trait GroupColumn: Send + Sync { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ); /// The vectorized version `append_val` @@ -224,7 +224,6 @@ pub struct GroupValuesColumn { /// Buffers to store intermediate results in `vectorized_append` /// and `vectorized_equal_to`, for reducing memory allocation -#[derive(Default)] struct VectorizedOperationBuffers { /// The `vectorized append` row indices buffer append_row_indices: Vec, @@ -235,8 +234,8 @@ struct VectorizedOperationBuffers { /// The `vectorized_equal_to` group indices buffer equal_to_group_indices: Vec, - /// The `vectorized_equal_to` result buffer - equal_to_results: Vec, + /// The `vectorized_equal_to` result buffer (bitmask) + equal_to_results: BooleanBufferBuilder, /// The buffer for storing row indices found not equal to /// exist groups in `group_values` in `vectorized_equal_to`. @@ -244,12 +243,23 @@ struct VectorizedOperationBuffers { remaining_row_indices: Vec, } +impl Default for VectorizedOperationBuffers { + fn default() -> Self { + Self { + append_row_indices: Vec::new(), + equal_to_row_indices: Vec::new(), + equal_to_group_indices: Vec::new(), + equal_to_results: BooleanBufferBuilder::new(0), + remaining_row_indices: Vec::new(), + } + } +} + impl VectorizedOperationBuffers { fn clear(&mut self) { self.append_row_indices.clear(); self.equal_to_row_indices.clear(); self.equal_to_group_indices.clear(); - self.equal_to_results.clear(); self.remaining_row_indices.clear(); } } @@ -615,15 +625,16 @@ impl GroupValuesColumn { // 1. Perform `vectorized_equal_to` for `rows` in `vectorized_equal_to_group_indices` // and `group_indices` in `vectorized_equal_to_group_indices` - let mut equal_to_results = - mem::take(&mut self.vectorized_operation_buffers.equal_to_results); - equal_to_results.clear(); - equal_to_results.resize( - self.vectorized_operation_buffers - .equal_to_group_indices - .len(), - true, + let n = self + .vectorized_operation_buffers + .equal_to_group_indices + .len(); + let mut equal_to_results = mem::replace( + &mut self.vectorized_operation_buffers.equal_to_results, + BooleanBufferBuilder::new(0), ); + equal_to_results.truncate(0); + equal_to_results.append_n(n, true); for (col_idx, group_col) in self.group_values.iter().enumerate() { group_col.vectorized_equal_to( @@ -643,7 +654,7 @@ impl GroupValuesColumn { .iter() .enumerate() { - let equal_to_result = equal_to_results[idx]; + let equal_to_result = equal_to_results.get_bit(idx); // Equal to case, set the `group_indices` to `rows` in `groups` if equal_to_result { diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 31126348b3fd4..be92027f58a81 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -20,12 +20,15 @@ use crate::aggregates::group_values::multi_group_by::{ }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::ArrowNativeTypeOp; -use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray, cast::AsArray}; +use arrow::array::{ + Array, ArrayRef, ArrowPrimitiveType, BooleanBufferBuilder, PrimitiveArray, + cast::AsArray, +}; use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; +use arrow::util::bit_util::apply_bitwise_binary_op; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; -use itertools::izip; use std::iter; use std::sync::Arc; @@ -62,43 +65,45 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { assert!( !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()), "called with nullable input" ); let array_values = array.as_primitive::().values(); + let n = lhs_rows.len(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); + // Build a packed comparison bitmask, then AND it into equal_to_results + let num_bytes = n.div_ceil(8); + let mut cmp_buf = vec![0u8; num_bytes]; - for (&lhs_row, &rhs_row, equal_to_result) in iter { - let result = { - // Getting unchecked not only for bound checks but because the bound checks are - // what prevents auto-vectorization - let left = if cfg!(debug_assertions) { - self.group_values[lhs_row] - } else { - // SAFETY: indices are guaranteed to be in bounds - unsafe { *self.group_values.get_unchecked(lhs_row) } - }; - let right = if cfg!(debug_assertions) { - array_values[rhs_row] - } else { - // SAFETY: indices are guaranteed to be in bounds - unsafe { *array_values.get_unchecked(rhs_row) } - }; - - // Always evaluate, to allow for auto-vectorization - left.is_eq(right) + for (i, (&lhs_row, &rhs_row)) in lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + let left = if cfg!(debug_assertions) { + self.group_values[lhs_row] + } else { + unsafe { *self.group_values.get_unchecked(lhs_row) } }; - - *equal_to_result = result && *equal_to_result; + let right = if cfg!(debug_assertions) { + array_values[rhs_row] + } else { + unsafe { *array_values.get_unchecked(rhs_row) } + }; + if left.is_eq(right) { + cmp_buf[i / 8] |= 1 << (i % 8); + } } + + // AND the comparison result into the existing equal_to_results bitmask + apply_bitwise_binary_op( + equal_to_results.as_slice_mut(), + 0, + &cmp_buf, + 0, + n, + |a, b| a & b, + ); } pub fn vectorized_equal_nullable( @@ -106,33 +111,30 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { assert!(NULLABLE, "called with non-nullable input"); let array = array.as_primitive::(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to in previous column, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - // Perf: skip null check (by short circuit) if input is not nullable let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - *equal_to_result = result; + if !result { + equal_to_results.set_bit(idx, false); + } continue; } - // Otherwise, we need to check their values - *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row)); + if !self.group_values[lhs_row].is_eq(array.value(rhs_row)) { + equal_to_results.set_bit(idx, false); + } } } } @@ -176,7 +178,7 @@ impl GroupColumn lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { if !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()) { self.vectorized_equal_to_non_nullable( @@ -281,11 +283,23 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; - use arrow::array::{ArrayRef, Float32Array, Int64Array, NullBufferBuilder}; + use arrow::array::{ + ArrayRef, BooleanBufferBuilder, Float32Array, Int64Array, NullBufferBuilder, + }; use arrow::datatypes::{DataType, Float32Type, Int64Type}; use super::GroupColumn; + fn make_true_buffer(n: usize) -> BooleanBufferBuilder { + let mut buf = BooleanBufferBuilder::new(n); + buf.append_n(n, true); + buf + } + + fn to_vec(buf: &BooleanBufferBuilder) -> Vec { + (0..buf.len()).map(|i| buf.get_bit(i)).collect() + } + #[test] fn test_nullable_primitive_equal_to() { let append = |builder: &mut PrimitiveGroupValueBuilder, @@ -300,10 +314,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -324,7 +339,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -344,18 +359,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - exist null, input not null - // - exist null, input null; values not equal - // - exist null, input null; values equal - // - exist not null, input null - // - exist not null, input not null; values not equal - // - exist not null, input not null; values equal - - // Define PrimitiveGroupValueBuilder let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Float32); let builder_array = Arc::new(Float32Array::from(vec![ @@ -369,7 +375,6 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5, 6]); - // Define input array let (_, values, _nulls) = Float32Array::from(vec![ Some(1.0), Some(2.0), @@ -381,10 +386,9 @@ mod tests { ]) .into_parts(); - // explicitly build a null buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); - nulls.append_null(); // this sets Some(2) to null above + nulls.append_null(); nulls.append_null(); nulls.append_non_null(); nulls.append_null(); @@ -392,8 +396,7 @@ mod tests { nulls.append_null(); let input_array = Arc::new(Float32Array::new(values, nulls.finish())) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6], @@ -401,14 +404,15 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); - assert!(!equal_to_results[6]); + let results = to_vec(&equal_to_results); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(!results[4]); + assert!(results[5]); + assert!(!results[6]); } #[test] @@ -425,10 +429,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -449,7 +454,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -469,25 +474,18 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - values equal - // - values not equal - - // Define PrimitiveGroupValueBuilder let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Int64); let builder_array = Arc::new(Int64Array::from(vec![Some(0), Some(1)])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1]); - // Define input array let input_array = Arc::new(Int64Array::from(vec![Some(0), Some(2)])) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1], @@ -495,16 +493,14 @@ mod tests { &[0, 1], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(!equal_to_results[1]); + assert!(results[0]); + assert!(!results[1]); } #[test] fn test_nullable_primitive_vectorized_operation_special_case() { - // Test the special `all nulls` or `not nulls` input array case - // for vectorized append and equal to - let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Int64); @@ -520,19 +516,20 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(Int64Array::from(vec![ @@ -546,18 +543,19 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } } From 4162bb64d68aaaf01985ad48c770a4695b67bf51 Mon Sep 17 00:00:00 2001 From: Huy Mac Date: Tue, 28 Apr 2026 17:08:19 +0900 Subject: [PATCH 4/8] feat: use bitmasks for multiple column aggregation --- .../group_values/multi_group_by/boolean.rs | 104 +++++++------ .../group_values/multi_group_by/bytes.rs | 81 +++++----- .../group_values/multi_group_by/bytes_view.rs | 94 +++++------ .../group_values/multi_group_by/mod.rs | 105 +++++++++++-- .../group_values/multi_group_by/primitive.rs | 146 +++++++++++------- 5 files changed, 333 insertions(+), 197 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 91a39f28f33c1..2cbe4f6365b85 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -18,11 +18,10 @@ use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::Nulls; -use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_to}; +use crate::aggregates::group_values::multi_group_by::{EqualToResults, GroupColumn, nulls_equal_to}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder}; use datafusion_common::Result; -use itertools::izip; /// An implementation of [`GroupColumn`] for booleans /// @@ -81,32 +80,34 @@ impl GroupColumn for BooleanGroupValueBuilder { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { let array = array.as_boolean(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to in previous column, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } + let chunk_idx = idx / 64; + let bit_pos = idx % 64; + if NULLABLE { let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - *equal_to_result = result; + if !result { + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } continue; } } - *equal_to_result = self.buffer.get_bit(lhs_row) == array.value(rhs_row); + if self.buffer.get_bit(lhs_row) != array.value(rhs_row) { + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } } } @@ -195,6 +196,7 @@ impl GroupColumn for BooleanGroupValueBuilder { #[cfg(test)] mod tests { + use crate::aggregates::group_values::multi_group_by::EqualToResults; use arrow::array::NullBufferBuilder; use super::*; @@ -213,10 +215,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -237,7 +239,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -257,7 +259,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -302,7 +304,8 @@ mod tests { let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -310,13 +313,14 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); + let results = equal_to_results.to_vec(); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(results[5]); } #[test] @@ -333,10 +337,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -357,7 +361,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -377,7 +381,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -403,7 +407,8 @@ mod tests { ])) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1, 2, 3], @@ -411,11 +416,12 @@ mod tests { &[0, 1, 2, 3], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(!equal_to_results[1]); - assert!(!equal_to_results[2]); - assert!(equal_to_results[3]); + assert!(results[0]); + assert!(!results[1]); + assert!(!results[2]); + assert!(results[3]); } #[test] @@ -432,19 +438,21 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(BooleanArray::from(vec![ @@ -458,18 +466,20 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index cd173741b6464..e716ebf8eb713 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - GroupColumn, Nulls, nulls_equal_to, + EqualToResults, GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{ @@ -28,7 +28,6 @@ use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType}; use datafusion_common::utils::proxy::VecAllocExt; use datafusion_common::{Result, exec_datafusion_err}; use datafusion_physical_expr_common::binary_map::{INITIAL_BUFFER_CAPACITY, OutputType}; -use itertools::izip; use std::mem::size_of; use std::sync::Arc; use std::vec; @@ -106,25 +105,24 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) where B: ByteArrayType, { let array = array.as_bytes::(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - *equal_to_result = self.do_equal_to_inner(lhs_row, array, rhs_row); + if !self.do_equal_to_inner(lhs_row, array, rhs_row) { + let chunk_idx = idx / 64; + let bit_pos = idx % 64; + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } } } @@ -275,7 +273,7 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { // Sanity array type match self.output_type { @@ -432,6 +430,7 @@ where mod tests { use std::sync::Arc; + use crate::aggregates::group_values::multi_group_by::EqualToResults; use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; use arrow::array::{ArrayRef, NullBufferBuilder, StringArray}; use datafusion_common::DataFusionError; @@ -520,10 +519,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -544,7 +543,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -575,19 +574,21 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringArray::from(vec![ @@ -601,19 +602,21 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } fn test_byte_equal_to_internal(mut append: A, mut equal_to: E) @@ -624,7 +627,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -670,7 +673,8 @@ mod tests { Arc::new(StringArray::new(offsets, buffer, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -678,12 +682,13 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); + let results = equal_to_results.to_vec(); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(results[5]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index a91dd3115d879..bdecc2bfdd177 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -16,14 +16,13 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - GroupColumn, Nulls, nulls_equal_to, + EqualToResults, GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array, ArrayRef, AsArray, ByteView, GenericByteViewArray, make_view}; use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::ByteViewType; use datafusion_common::Result; -use itertools::izip; use std::marker::PhantomData; use std::mem::{replace, size_of}; use std::sync::Arc; @@ -126,22 +125,20 @@ impl ByteViewGroupValueBuilder { lhs_rows: &[usize], array: &GenericByteViewArray, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - *equal_to_result = - self.do_equal_to_inner::(lhs_row, array, rhs_row); + if !self.do_equal_to_inner::(lhs_row, array, rhs_row) { + let chunk_idx = idx / 64; + let bit_pos = idx % 64; + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } } } @@ -513,7 +510,7 @@ impl GroupColumn for ByteViewGroupValueBuilder { group_indices: &[usize], array: &ArrayRef, rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { let has_nulls = array.null_count() != 0; let array = array.as_byte_view::(); @@ -583,6 +580,7 @@ impl GroupColumn for ByteViewGroupValueBuilder { mod tests { use std::sync::Arc; + use crate::aggregates::group_values::multi_group_by::EqualToResults; use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; use arrow::array::{ArrayRef, AsArray, NullBufferBuilder, StringViewArray}; use arrow::datatypes::StringViewType; @@ -627,10 +625,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -651,7 +649,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -683,19 +681,21 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringViewArray::from(vec![ @@ -709,19 +709,21 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } fn test_byte_view_equal_to_internal(mut append: A, mut equal_to: E) @@ -732,7 +734,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -811,7 +813,8 @@ mod tests { Arc::new(StringViewArray::new(views, buffer, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = vec![true; input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(input_array.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 8], @@ -819,19 +822,20 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(!equal_to_results[5]); - assert!(equal_to_results[6]); - assert!(!equal_to_results[7]); - assert!(!equal_to_results[8]); - assert!(equal_to_results[9]); - assert!(!equal_to_results[10]); - assert!(equal_to_results[11]); + let results = equal_to_results.to_vec(); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(!results[5]); + assert!(results[6]); + assert!(!results[7]); + assert!(!results[8]); + assert!(results[9]); + assert!(!results[10]); + assert!(results[11]); } #[test] diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index cc4576eabddbd..0a41d7207d219 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -82,7 +82,7 @@ pub trait GroupColumn: Send + Sync { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ); /// The vectorized version `append_val` @@ -235,8 +235,8 @@ struct VectorizedOperationBuffers { /// The `vectorized_equal_to` group indices buffer equal_to_group_indices: Vec, - /// The `vectorized_equal_to` result buffer - equal_to_results: Vec, + /// The `vectorized_equal_to` result buffer (bitmask) + equal_to_results: EqualToResults, /// The buffer for storing row indices found not equal to /// exist groups in `group_values` in `vectorized_equal_to`. @@ -249,7 +249,6 @@ impl VectorizedOperationBuffers { self.append_row_indices.clear(); self.equal_to_row_indices.clear(); self.equal_to_group_indices.clear(); - self.equal_to_results.clear(); self.remaining_row_indices.clear(); } } @@ -615,15 +614,13 @@ impl GroupValuesColumn { // 1. Perform `vectorized_equal_to` for `rows` in `vectorized_equal_to_group_indices` // and `group_indices` in `vectorized_equal_to_group_indices` + let n = self + .vectorized_operation_buffers + .equal_to_group_indices + .len(); let mut equal_to_results = mem::take(&mut self.vectorized_operation_buffers.equal_to_results); - equal_to_results.clear(); - equal_to_results.resize( - self.vectorized_operation_buffers - .equal_to_group_indices - .len(), - true, - ); + equal_to_results.reset(n); for (col_idx, group_col) in self.group_values.iter().enumerate() { group_col.vectorized_equal_to( @@ -643,7 +640,7 @@ impl GroupValuesColumn { .iter() .enumerate() { - let equal_to_result = equal_to_results[idx]; + let equal_to_result = equal_to_results.get_bit(idx); // Equal to case, set the `group_indices` to `rows` in `groups` if equal_to_result { @@ -1238,6 +1235,90 @@ fn supported_type(data_type: &DataType) -> bool { ) } +/// Bitmask tracking which rows are still potentially equal across column comparisons: +/// - Bit `i` = 1 means row `i` has not yet been determined unequal +/// - Bit `i` = 0 means row `i` is already known to be unequal +pub struct EqualToResults { + chunks: Vec, + len: usize, +} + +impl EqualToResults { + pub fn new() -> Self { + Self { + chunks: Vec::new(), + len: 0, + } + } + + /// Reset to all-true (`1` bits) for `n` rows + pub fn reset(&mut self, n: usize) { + self.len = n; + let n_full = n / 64; + let remainder = n % 64; + let total = n_full + usize::from(remainder > 0); + + self.chunks.resize(total, 0); + self.chunks[..n_full].fill(!0u64); + if remainder > 0 { + self.chunks[n_full] = (1u64 << remainder) - 1; + } + } + + #[inline] + pub fn get_bit(&self, i: usize) -> bool { + debug_assert!(i < self.len); + (self.chunks[i / 64] >> (i % 64)) & 1 == 1 + } + + #[inline] + pub fn get_chunk(&self, chunk_idx: usize) -> u64 { + self.chunks[chunk_idx] + } + + #[inline] + pub fn and_chunk(&mut self, chunk_idx: usize, mask: u64) { + self.chunks[chunk_idx] &= mask; + } + + #[inline] + pub fn n_chunks(&self) -> usize { + self.chunks.len() + } + + #[inline] + pub fn len(&self) -> usize { + self.len + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + #[inline] + pub fn set_bit(&mut self, i: usize, value: bool) { + debug_assert!(i < self.len); + if value { + self.chunks[i / 64] |= 1u64 << (i % 64); + } else { + self.chunks[i / 64] &= !(1u64 << (i % 64)); + } + } + + /// Convert to `Vec` for testing. + #[cfg(test)] + pub fn to_vec(&self) -> Vec { + (0..self.len).map(|i| self.get_bit(i)).collect() + } +} + +impl Default for EqualToResults { + fn default() -> Self { + Self::new() + } +} + ///Shows how many `null`s there are in an array enum Nulls { /// All array items are `null`s diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 31126348b3fd4..691c82d5af68d 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - GroupColumn, Nulls, nulls_equal_to, + EqualToResults, GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::ArrowNativeTypeOp; @@ -25,7 +25,6 @@ use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; -use itertools::izip; use std::iter; use std::sync::Arc; @@ -62,42 +61,67 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { assert!( !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()), "called with nullable input" ); let array_values = array.as_primitive::().values(); + let n = lhs_rows.len(); + let n_full = n / 64; - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); + // Process 64 rows per chunk via bitmask AND + for chunk_idx in 0..n_full { + // Skip entirely if all bits already false + if equal_to_results.get_chunk(chunk_idx) == 0 { + continue; + } - for (&lhs_row, &rhs_row, equal_to_result) in iter { - let result = { - // Getting unchecked not only for bound checks but because the bound checks are - // what prevents auto-vectorization + let base = chunk_idx * 64; + let mut eq_mask: u64 = 0; + + // Build a 64-bit mask: bit k=1 if rows[base+k] are equal + for k in 0..64usize { + let i = base + k; let left = if cfg!(debug_assertions) { - self.group_values[lhs_row] + self.group_values[lhs_rows[i]] } else { // SAFETY: indices are guaranteed to be in bounds - unsafe { *self.group_values.get_unchecked(lhs_row) } + unsafe { *self.group_values.get_unchecked(*lhs_rows.get_unchecked(i)) } }; let right = if cfg!(debug_assertions) { - array_values[rhs_row] + array_values[rhs_rows[i]] } else { // SAFETY: indices are guaranteed to be in bounds - unsafe { *array_values.get_unchecked(rhs_row) } + unsafe { *array_values.get_unchecked(*rhs_rows.get_unchecked(i)) } }; + eq_mask |= (left.is_eq(right) as u64) << k; + } + equal_to_results.and_chunk(chunk_idx, eq_mask); + } - // Always evaluate, to allow for auto-vectorization - left.is_eq(right) - }; - - *equal_to_result = result && *equal_to_result; + // Handle the remaining rows (< 64) + let remainder_start = n_full * 64; + if remainder_start < n { + let chunk_idx = n_full; + if equal_to_results.get_chunk(chunk_idx) != 0 { + let mut eq_mask: u64 = 0; + for (k, i) in (remainder_start..n).enumerate() { + let left = if cfg!(debug_assertions) { + self.group_values[lhs_rows[i]] + } else { + unsafe { *self.group_values.get_unchecked(*lhs_rows.get_unchecked(i)) } + }; + let right = if cfg!(debug_assertions) { + array_values[rhs_rows[i]] + } else { + unsafe { *array_values.get_unchecked(*rhs_rows.get_unchecked(i)) } + }; + eq_mask |= (left.is_eq(right) as u64) << k; + } + equal_to_results.and_chunk(chunk_idx, eq_mask); + } } } @@ -106,33 +130,36 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { assert!(NULLABLE, "called with non-nullable input"); let array = array.as_primitive::(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { // Has found not equal to in previous column, don't need to check - if !*equal_to_result { + if !equal_to_results.get_bit(idx) { continue; } + let chunk_idx = idx / 64; + let bit_pos = idx % 64; + // Perf: skip null check (by short circuit) if input is not nullable let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - *equal_to_result = result; + if !result { + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } continue; } // Otherwise, we need to check their values - *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row)); + if !self.group_values[lhs_row].is_eq(array.value(rhs_row)) { + equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); + } } } } @@ -176,7 +203,7 @@ impl GroupColumn lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut EqualToResults, ) { if !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()) { self.vectorized_equal_to_non_nullable( @@ -281,6 +308,7 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; + use crate::aggregates::group_values::multi_group_by::EqualToResults; use arrow::array::{ArrayRef, Float32Array, Int64Array, NullBufferBuilder}; use arrow::datatypes::{DataType, Float32Type, Int64Type}; @@ -300,10 +328,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -324,7 +352,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -344,7 +372,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -393,7 +421,8 @@ mod tests { let input_array = Arc::new(Float32Array::new(values, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6], @@ -401,6 +430,7 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6], &mut equal_to_results, ); + let equal_to_results = equal_to_results.to_vec(); assert!(!equal_to_results[0]); assert!(equal_to_results[1]); @@ -425,10 +455,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -449,7 +479,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut EqualToResults| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -469,7 +499,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut EqualToResults, ), { // Will cover such cases: @@ -487,7 +517,8 @@ mod tests { let input_array = Arc::new(Int64Array::from(vec![Some(0), Some(2)])) as ArrayRef; // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(builder.len()); equal_to( &builder, &[0, 1], @@ -495,6 +526,7 @@ mod tests { &[0, 1], &mut equal_to_results, ); + let equal_to_results = equal_to_results.to_vec(); assert!(equal_to_results[0]); assert!(!equal_to_results[1]); @@ -520,19 +552,21 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(Int64Array::from(vec![ @@ -546,18 +580,20 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = EqualToResults::new(); + equal_to_results.reset(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = equal_to_results.to_vec(); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } } From 063a26b2b9c15e77cdbd783b1176551b84a45cf6 Mon Sep 17 00:00:00 2001 From: Huy Mac Date: Tue, 28 Apr 2026 17:50:24 +0900 Subject: [PATCH 5/8] Revert "feat: use bitmasks for multiple column aggregation" This reverts commit 31556f66e8d00bf2010cd642b194ebc18cc4f0de. --- .../group_values/multi_group_by/boolean.rs | 104 ++++++------- .../group_values/multi_group_by/bytes.rs | 81 +++++----- .../group_values/multi_group_by/bytes_view.rs | 94 ++++++----- .../group_values/multi_group_by/mod.rs | 105 ++----------- .../group_values/multi_group_by/primitive.rs | 146 +++++++----------- 5 files changed, 197 insertions(+), 333 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 2cbe4f6365b85..91a39f28f33c1 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -18,10 +18,11 @@ use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::Nulls; -use crate::aggregates::group_values::multi_group_by::{EqualToResults, GroupColumn, nulls_equal_to}; +use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_to}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder}; use datafusion_common::Result; +use itertools::izip; /// An implementation of [`GroupColumn`] for booleans /// @@ -80,34 +81,32 @@ impl GroupColumn for BooleanGroupValueBuilder { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { let array = array.as_boolean(); - for (idx, (&lhs_row, &rhs_row)) in - lhs_rows.iter().zip(rhs_rows.iter()).enumerate() - { - if !equal_to_results.get_bit(idx) { + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); + + for (&lhs_row, &rhs_row, equal_to_result) in iter { + // Has found not equal to in previous column, don't need to check + if !*equal_to_result { continue; } - let chunk_idx = idx / 64; - let bit_pos = idx % 64; - if NULLABLE { let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - if !result { - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = result; continue; } } - if self.buffer.get_bit(lhs_row) != array.value(rhs_row) { - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = self.buffer.get_bit(lhs_row) == array.value(rhs_row); } } @@ -196,7 +195,6 @@ impl GroupColumn for BooleanGroupValueBuilder { #[cfg(test)] mod tests { - use crate::aggregates::group_values::multi_group_by::EqualToResults; use arrow::array::NullBufferBuilder; use super::*; @@ -215,10 +213,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -239,7 +237,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -259,7 +257,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -304,8 +302,7 @@ mod tests { let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -313,14 +310,13 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - - assert!(!results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(!results[3]); - assert!(!results[4]); - assert!(results[5]); + + assert!(!equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(!equal_to_results[3]); + assert!(!equal_to_results[4]); + assert!(equal_to_results[5]); } #[test] @@ -337,10 +333,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -361,7 +357,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -381,7 +377,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -407,8 +403,7 @@ mod tests { ])) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1, 2, 3], @@ -416,12 +411,11 @@ mod tests { &[0, 1, 2, 3], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(!results[1]); - assert!(!results[2]); - assert!(results[3]); + assert!(equal_to_results[0]); + assert!(!equal_to_results[1]); + assert!(!equal_to_results[2]); + assert!(equal_to_results[3]); } #[test] @@ -438,21 +432,19 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_nulls_input_array.len()]; builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(BooleanArray::from(vec![ @@ -466,20 +458,18 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_not_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index e716ebf8eb713..cd173741b6464 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - EqualToResults, GroupColumn, Nulls, nulls_equal_to, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{ @@ -28,6 +28,7 @@ use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType}; use datafusion_common::utils::proxy::VecAllocExt; use datafusion_common::{Result, exec_datafusion_err}; use datafusion_physical_expr_common::binary_map::{INITIAL_BUFFER_CAPACITY, OutputType}; +use itertools::izip; use std::mem::size_of; use std::sync::Arc; use std::vec; @@ -105,24 +106,25 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) where B: ByteArrayType, { let array = array.as_bytes::(); - for (idx, (&lhs_row, &rhs_row)) in - lhs_rows.iter().zip(rhs_rows.iter()).enumerate() - { - if !equal_to_results.get_bit(idx) { + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); + + for (&lhs_row, &rhs_row, equal_to_result) in iter { + // Has found not equal to, don't need to check + if !*equal_to_result { continue; } - if !self.do_equal_to_inner(lhs_row, array, rhs_row) { - let chunk_idx = idx / 64; - let bit_pos = idx % 64; - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = self.do_equal_to_inner(lhs_row, array, rhs_row); } } @@ -273,7 +275,7 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { // Sanity array type match self.output_type { @@ -430,7 +432,6 @@ where mod tests { use std::sync::Arc; - use crate::aggregates::group_values::multi_group_by::EqualToResults; use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; use arrow::array::{ArrayRef, NullBufferBuilder, StringArray}; use datafusion_common::DataFusionError; @@ -519,10 +520,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -543,7 +544,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -574,21 +575,19 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_nulls_input_array.len()]; builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringArray::from(vec![ @@ -602,21 +601,19 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_not_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); } fn test_byte_equal_to_internal(mut append: A, mut equal_to: E) @@ -627,7 +624,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -673,8 +670,7 @@ mod tests { Arc::new(StringArray::new(offsets, buffer, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -682,13 +678,12 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - - assert!(!results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(!results[3]); - assert!(!results[4]); - assert!(results[5]); + + assert!(!equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(!equal_to_results[3]); + assert!(!equal_to_results[4]); + assert!(equal_to_results[5]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index bdecc2bfdd177..a91dd3115d879 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -16,13 +16,14 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - EqualToResults, GroupColumn, Nulls, nulls_equal_to, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array, ArrayRef, AsArray, ByteView, GenericByteViewArray, make_view}; use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::ByteViewType; use datafusion_common::Result; +use itertools::izip; use std::marker::PhantomData; use std::mem::{replace, size_of}; use std::sync::Arc; @@ -125,20 +126,22 @@ impl ByteViewGroupValueBuilder { lhs_rows: &[usize], array: &GenericByteViewArray, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { - for (idx, (&lhs_row, &rhs_row)) in - lhs_rows.iter().zip(rhs_rows.iter()).enumerate() - { - if !equal_to_results.get_bit(idx) { + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); + + for (&lhs_row, &rhs_row, equal_to_result) in iter { + // Has found not equal to, don't need to check + if !*equal_to_result { continue; } - if !self.do_equal_to_inner::(lhs_row, array, rhs_row) { - let chunk_idx = idx / 64; - let bit_pos = idx % 64; - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = + self.do_equal_to_inner::(lhs_row, array, rhs_row); } } @@ -510,7 +513,7 @@ impl GroupColumn for ByteViewGroupValueBuilder { group_indices: &[usize], array: &ArrayRef, rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { let has_nulls = array.null_count() != 0; let array = array.as_byte_view::(); @@ -580,7 +583,6 @@ impl GroupColumn for ByteViewGroupValueBuilder { mod tests { use std::sync::Arc; - use crate::aggregates::group_values::multi_group_by::EqualToResults; use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; use arrow::array::{ArrayRef, AsArray, NullBufferBuilder, StringViewArray}; use arrow::datatypes::StringViewType; @@ -625,10 +627,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -649,7 +651,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -681,21 +683,19 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_nulls_input_array.len()]; builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringViewArray::from(vec![ @@ -709,21 +709,19 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_not_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); } fn test_byte_view_equal_to_internal(mut append: A, mut equal_to: E) @@ -734,7 +732,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -813,8 +811,7 @@ mod tests { Arc::new(StringViewArray::new(views, buffer, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(input_array.len()); + let mut equal_to_results = vec![true; input_array.len()]; equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 8], @@ -822,20 +819,19 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - - assert!(!results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(!results[3]); - assert!(!results[4]); - assert!(!results[5]); - assert!(results[6]); - assert!(!results[7]); - assert!(!results[8]); - assert!(results[9]); - assert!(!results[10]); - assert!(results[11]); + + assert!(!equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(!equal_to_results[3]); + assert!(!equal_to_results[4]); + assert!(!equal_to_results[5]); + assert!(equal_to_results[6]); + assert!(!equal_to_results[7]); + assert!(!equal_to_results[8]); + assert!(equal_to_results[9]); + assert!(!equal_to_results[10]); + assert!(equal_to_results[11]); } #[test] diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index 0a41d7207d219..cc4576eabddbd 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -82,7 +82,7 @@ pub trait GroupColumn: Send + Sync { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ); /// The vectorized version `append_val` @@ -235,8 +235,8 @@ struct VectorizedOperationBuffers { /// The `vectorized_equal_to` group indices buffer equal_to_group_indices: Vec, - /// The `vectorized_equal_to` result buffer (bitmask) - equal_to_results: EqualToResults, + /// The `vectorized_equal_to` result buffer + equal_to_results: Vec, /// The buffer for storing row indices found not equal to /// exist groups in `group_values` in `vectorized_equal_to`. @@ -249,6 +249,7 @@ impl VectorizedOperationBuffers { self.append_row_indices.clear(); self.equal_to_row_indices.clear(); self.equal_to_group_indices.clear(); + self.equal_to_results.clear(); self.remaining_row_indices.clear(); } } @@ -614,13 +615,15 @@ impl GroupValuesColumn { // 1. Perform `vectorized_equal_to` for `rows` in `vectorized_equal_to_group_indices` // and `group_indices` in `vectorized_equal_to_group_indices` - let n = self - .vectorized_operation_buffers - .equal_to_group_indices - .len(); let mut equal_to_results = mem::take(&mut self.vectorized_operation_buffers.equal_to_results); - equal_to_results.reset(n); + equal_to_results.clear(); + equal_to_results.resize( + self.vectorized_operation_buffers + .equal_to_group_indices + .len(), + true, + ); for (col_idx, group_col) in self.group_values.iter().enumerate() { group_col.vectorized_equal_to( @@ -640,7 +643,7 @@ impl GroupValuesColumn { .iter() .enumerate() { - let equal_to_result = equal_to_results.get_bit(idx); + let equal_to_result = equal_to_results[idx]; // Equal to case, set the `group_indices` to `rows` in `groups` if equal_to_result { @@ -1235,90 +1238,6 @@ fn supported_type(data_type: &DataType) -> bool { ) } -/// Bitmask tracking which rows are still potentially equal across column comparisons: -/// - Bit `i` = 1 means row `i` has not yet been determined unequal -/// - Bit `i` = 0 means row `i` is already known to be unequal -pub struct EqualToResults { - chunks: Vec, - len: usize, -} - -impl EqualToResults { - pub fn new() -> Self { - Self { - chunks: Vec::new(), - len: 0, - } - } - - /// Reset to all-true (`1` bits) for `n` rows - pub fn reset(&mut self, n: usize) { - self.len = n; - let n_full = n / 64; - let remainder = n % 64; - let total = n_full + usize::from(remainder > 0); - - self.chunks.resize(total, 0); - self.chunks[..n_full].fill(!0u64); - if remainder > 0 { - self.chunks[n_full] = (1u64 << remainder) - 1; - } - } - - #[inline] - pub fn get_bit(&self, i: usize) -> bool { - debug_assert!(i < self.len); - (self.chunks[i / 64] >> (i % 64)) & 1 == 1 - } - - #[inline] - pub fn get_chunk(&self, chunk_idx: usize) -> u64 { - self.chunks[chunk_idx] - } - - #[inline] - pub fn and_chunk(&mut self, chunk_idx: usize, mask: u64) { - self.chunks[chunk_idx] &= mask; - } - - #[inline] - pub fn n_chunks(&self) -> usize { - self.chunks.len() - } - - #[inline] - pub fn len(&self) -> usize { - self.len - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - #[inline] - pub fn set_bit(&mut self, i: usize, value: bool) { - debug_assert!(i < self.len); - if value { - self.chunks[i / 64] |= 1u64 << (i % 64); - } else { - self.chunks[i / 64] &= !(1u64 << (i % 64)); - } - } - - /// Convert to `Vec` for testing. - #[cfg(test)] - pub fn to_vec(&self) -> Vec { - (0..self.len).map(|i| self.get_bit(i)).collect() - } -} - -impl Default for EqualToResults { - fn default() -> Self { - Self::new() - } -} - ///Shows how many `null`s there are in an array enum Nulls { /// All array items are `null`s diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 691c82d5af68d..31126348b3fd4 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -16,7 +16,7 @@ // under the License. use crate::aggregates::group_values::multi_group_by::{ - EqualToResults, GroupColumn, Nulls, nulls_equal_to, + GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::ArrowNativeTypeOp; @@ -25,6 +25,7 @@ use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; +use itertools::izip; use std::iter; use std::sync::Arc; @@ -61,67 +62,42 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { assert!( !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()), "called with nullable input" ); let array_values = array.as_primitive::().values(); - let n = lhs_rows.len(); - let n_full = n / 64; - // Process 64 rows per chunk via bitmask AND - for chunk_idx in 0..n_full { - // Skip entirely if all bits already false - if equal_to_results.get_chunk(chunk_idx) == 0 { - continue; - } - - let base = chunk_idx * 64; - let mut eq_mask: u64 = 0; + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); - // Build a 64-bit mask: bit k=1 if rows[base+k] are equal - for k in 0..64usize { - let i = base + k; + for (&lhs_row, &rhs_row, equal_to_result) in iter { + let result = { + // Getting unchecked not only for bound checks but because the bound checks are + // what prevents auto-vectorization let left = if cfg!(debug_assertions) { - self.group_values[lhs_rows[i]] + self.group_values[lhs_row] } else { // SAFETY: indices are guaranteed to be in bounds - unsafe { *self.group_values.get_unchecked(*lhs_rows.get_unchecked(i)) } + unsafe { *self.group_values.get_unchecked(lhs_row) } }; let right = if cfg!(debug_assertions) { - array_values[rhs_rows[i]] + array_values[rhs_row] } else { // SAFETY: indices are guaranteed to be in bounds - unsafe { *array_values.get_unchecked(*rhs_rows.get_unchecked(i)) } + unsafe { *array_values.get_unchecked(rhs_row) } }; - eq_mask |= (left.is_eq(right) as u64) << k; - } - equal_to_results.and_chunk(chunk_idx, eq_mask); - } - // Handle the remaining rows (< 64) - let remainder_start = n_full * 64; - if remainder_start < n { - let chunk_idx = n_full; - if equal_to_results.get_chunk(chunk_idx) != 0 { - let mut eq_mask: u64 = 0; - for (k, i) in (remainder_start..n).enumerate() { - let left = if cfg!(debug_assertions) { - self.group_values[lhs_rows[i]] - } else { - unsafe { *self.group_values.get_unchecked(*lhs_rows.get_unchecked(i)) } - }; - let right = if cfg!(debug_assertions) { - array_values[rhs_rows[i]] - } else { - unsafe { *array_values.get_unchecked(*rhs_rows.get_unchecked(i)) } - }; - eq_mask |= (left.is_eq(right) as u64) << k; - } - equal_to_results.and_chunk(chunk_idx, eq_mask); - } + // Always evaluate, to allow for auto-vectorization + left.is_eq(right) + }; + + *equal_to_result = result && *equal_to_result; } } @@ -130,36 +106,33 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { assert!(NULLABLE, "called with non-nullable input"); let array = array.as_primitive::(); - for (idx, (&lhs_row, &rhs_row)) in - lhs_rows.iter().zip(rhs_rows.iter()).enumerate() - { + let iter = izip!( + lhs_rows.iter(), + rhs_rows.iter(), + equal_to_results.iter_mut(), + ); + + for (&lhs_row, &rhs_row, equal_to_result) in iter { // Has found not equal to in previous column, don't need to check - if !equal_to_results.get_bit(idx) { + if !*equal_to_result { continue; } - let chunk_idx = idx / 64; - let bit_pos = idx % 64; - // Perf: skip null check (by short circuit) if input is not nullable let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - if !result { - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = result; continue; } // Otherwise, we need to check their values - if !self.group_values[lhs_row].is_eq(array.value(rhs_row)) { - equal_to_results.and_chunk(chunk_idx, !(1u64 << bit_pos)); - } + *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row)); } } } @@ -203,7 +176,7 @@ impl GroupColumn lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults, + equal_to_results: &mut [bool], ) { if !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()) { self.vectorized_equal_to_non_nullable( @@ -308,7 +281,6 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; - use crate::aggregates::group_values::multi_group_by::EqualToResults; use arrow::array::{ArrayRef, Float32Array, Int64Array, NullBufferBuilder}; use arrow::datatypes::{DataType, Float32Type, Int64Type}; @@ -328,10 +300,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -352,7 +324,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -372,7 +344,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -421,8 +393,7 @@ mod tests { let input_array = Arc::new(Float32Array::new(values, nulls.finish())) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6], @@ -430,7 +401,6 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6], &mut equal_to_results, ); - let equal_to_results = equal_to_results.to_vec(); assert!(!equal_to_results[0]); assert!(equal_to_results[1]); @@ -455,10 +425,10 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results.set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); } }; @@ -479,7 +449,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut EqualToResults| { + equal_to_results: &mut Vec| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -499,7 +469,7 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut EqualToResults, + &mut Vec, ), { // Will cover such cases: @@ -517,8 +487,7 @@ mod tests { let input_array = Arc::new(Int64Array::from(vec![Some(0), Some(2)])) as ArrayRef; // Check - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(builder.len()); + let mut equal_to_results = vec![true; builder.len()]; equal_to( &builder, &[0, 1], @@ -526,7 +495,6 @@ mod tests { &[0, 1], &mut equal_to_results, ); - let equal_to_results = equal_to_results.to_vec(); assert!(equal_to_results[0]); assert!(!equal_to_results[1]); @@ -552,21 +520,19 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_nulls_input_array.len()]; builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(Int64Array::from(vec![ @@ -580,20 +546,18 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = EqualToResults::new(); - equal_to_results.reset(all_not_nulls_input_array.len()); + let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); - let results = equal_to_results.to_vec(); - assert!(results[0]); - assert!(results[1]); - assert!(results[2]); - assert!(results[3]); - assert!(results[4]); + assert!(equal_to_results[0]); + assert!(equal_to_results[1]); + assert!(equal_to_results[2]); + assert!(equal_to_results[3]); + assert!(equal_to_results[4]); } } From 27dfdb7953020c22d129a06d4d7cdcaeefdedc43 Mon Sep 17 00:00:00 2001 From: Huy Mac Date: Tue, 28 Apr 2026 18:31:18 +0900 Subject: [PATCH 6/8] feat: use bitmasks for multiple column aggregation use `BooleanBufferBuilder` from https://github.com/apache/arrow-rs/blob/d6168e526aae79d6fbafe8c11062b5f834021052/arrow-buffer/src/util/bit_util.rs --- .../group_values/multi_group_by/boolean.rs | 132 ++++++------ .../group_values/multi_group_by/bytes.rs | 107 +++++----- .../group_values/multi_group_by/bytes_view.rs | 144 ++++++------- .../group_values/multi_group_by/mod.rs | 41 ++-- .../group_values/multi_group_by/primitive.rs | 198 +++++++++--------- 5 files changed, 300 insertions(+), 322 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 91a39f28f33c1..6da6d7085384d 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -22,7 +22,6 @@ use crate::aggregates::group_values::multi_group_by::{GroupColumn, nulls_equal_t use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray, BooleanBufferBuilder}; use datafusion_common::Result; -use itertools::izip; /// An implementation of [`GroupColumn`] for booleans /// @@ -81,19 +80,14 @@ impl GroupColumn for BooleanGroupValueBuilder { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { let array = array.as_boolean(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to in previous column, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } @@ -101,12 +95,16 @@ impl GroupColumn for BooleanGroupValueBuilder { let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - *equal_to_result = result; + if !result { + equal_to_results.set_bit(idx, false); + } continue; } } - *equal_to_result = self.buffer.get_bit(lhs_row) == array.value(rhs_row); + if self.buffer.get_bit(lhs_row) != array.value(rhs_row) { + equal_to_results.set_bit(idx, false); + } } } @@ -195,10 +193,20 @@ impl GroupColumn for BooleanGroupValueBuilder { #[cfg(test)] mod tests { - use arrow::array::NullBufferBuilder; + use arrow::array::{BooleanBufferBuilder, NullBufferBuilder}; use super::*; + fn make_true_buffer(n: usize) -> BooleanBufferBuilder { + let mut buf = BooleanBufferBuilder::new(n); + buf.append_n(n, true); + buf + } + + fn to_vec(buf: &BooleanBufferBuilder) -> Vec { + (0..buf.len()).map(|i| buf.get_bit(i)).collect() + } + #[test] fn test_nullable_boolean_equal_to() { let append = |builder: &mut BooleanGroupValueBuilder, @@ -213,10 +221,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -237,7 +246,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -257,18 +266,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - exist null, input not null - // - exist null, input null; values not equal - // - exist null, input null; values equal - // - exist not null, input null - // - exist not null, input not null; values not equal - // - exist not null, input not null; values equal - - // Define PrimitiveGroupValueBuilder let mut builder = BooleanGroupValueBuilder::::new(); let builder_array = Arc::new(BooleanArray::from(vec![ None, @@ -280,7 +280,6 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5]); - // Define input array let (values, _nulls) = BooleanArray::from(vec![ Some(true), Some(false), @@ -291,18 +290,16 @@ mod tests { ]) .into_parts(); - // explicitly build a null buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); - nulls.append_null(); // this sets Some(false) to null above + nulls.append_null(); nulls.append_null(); nulls.append_null(); nulls.append_non_null(); nulls.append_non_null(); let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -310,13 +307,14 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); + let results = to_vec(&equal_to_results); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(results[5]); } #[test] @@ -333,10 +331,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -357,7 +356,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -377,14 +376,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - values equal - // - values not equal - - // Define PrimitiveGroupValueBuilder let mut builder = BooleanGroupValueBuilder::::new(); let builder_array = Arc::new(BooleanArray::from(vec![ Some(false), @@ -394,7 +388,6 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3]); - // Define input array let input_array = Arc::new(BooleanArray::from(vec![ Some(false), Some(false), @@ -402,8 +395,7 @@ mod tests { Some(true), ])) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1, 2, 3], @@ -411,18 +403,16 @@ mod tests { &[0, 1, 2, 3], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(!equal_to_results[1]); - assert!(!equal_to_results[2]); - assert!(equal_to_results[3]); + assert!(results[0]); + assert!(!results[1]); + assert!(!results[2]); + assert!(results[3]); } #[test] fn test_nullable_boolean_vectorized_operation_special_case() { - // Test the special `all nulls` or `not nulls` input array case - // for vectorized append and equal to - let mut builder = BooleanGroupValueBuilder::::new(); // All nulls input array @@ -432,19 +422,20 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(BooleanArray::from(vec![ @@ -458,18 +449,19 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index cd173741b6464..ef99ef37b472d 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -20,15 +20,14 @@ use crate::aggregates::group_values::multi_group_by::{ }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{ - Array, ArrayRef, AsArray, BufferBuilder, GenericBinaryArray, GenericByteArray, - GenericStringArray, OffsetSizeTrait, types::GenericStringType, + Array, ArrayRef, AsArray, BooleanBufferBuilder, BufferBuilder, GenericBinaryArray, + GenericByteArray, GenericStringArray, OffsetSizeTrait, types::GenericStringType, }; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType}; use datafusion_common::utils::proxy::VecAllocExt; use datafusion_common::{Result, exec_datafusion_err}; use datafusion_physical_expr_common::binary_map::{INITIAL_BUFFER_CAPACITY, OutputType}; -use itertools::izip; use std::mem::size_of; use std::sync::Arc; use std::vec; @@ -106,25 +105,22 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) where B: ByteArrayType, { let array = array.as_bytes::(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - *equal_to_result = self.do_equal_to_inner(lhs_row, array, rhs_row); + if !self.do_equal_to_inner(lhs_row, array, rhs_row) { + equal_to_results.set_bit(idx, false); + } } } @@ -275,7 +271,7 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { // Sanity array type match self.output_type { @@ -433,12 +429,22 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; - use arrow::array::{ArrayRef, NullBufferBuilder, StringArray}; + use arrow::array::{ArrayRef, BooleanBufferBuilder, NullBufferBuilder, StringArray}; use datafusion_common::DataFusionError; use datafusion_physical_expr::binary_map::OutputType; use super::GroupColumn; + fn make_true_buffer(n: usize) -> BooleanBufferBuilder { + let mut buf = BooleanBufferBuilder::new(n); + buf.append_n(n, true); + buf + } + + fn to_vec(buf: &BooleanBufferBuilder) -> Vec { + (0..buf.len()).map(|i| buf.get_bit(i)).collect() + } + #[test] fn test_byte_group_value_builder_overflow() { let mut builder = ByteGroupValueBuilder::::new(OutputType::Utf8); @@ -520,10 +526,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -544,7 +551,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -558,9 +565,6 @@ mod tests { #[test] fn test_byte_vectorized_operation_special_case() { - // Test the special `all nulls` or `not nulls` input array case - // for vectorized append and equal to - let mut builder = ByteGroupValueBuilder::::new(OutputType::Utf8); // All nulls input array @@ -575,19 +579,20 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringArray::from(vec![ @@ -601,19 +606,20 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } fn test_byte_equal_to_internal(mut append: A, mut equal_to: E) @@ -624,18 +630,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - exist null, input not null - // - exist null, input null; values not equal - // - exist null, input null; values equal - // - exist not null, input null - // - exist not null, input not null; values not equal - // - exist not null, input not null; values equal - - // Define ByteGroupValueBuilder let mut builder = ByteGroupValueBuilder::::new(OutputType::Utf8); let builder_array = Arc::new(StringArray::from(vec![ None, @@ -647,7 +644,6 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5]); - // Define input array let (offsets, buffer, _nulls) = StringArray::from(vec![ Some("foo"), Some("bar"), @@ -658,10 +654,9 @@ mod tests { ]) .into_parts(); - // explicitly build a boolean buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); - nulls.append_null(); // this sets Some("bar") to null above + nulls.append_null(); nulls.append_null(); nulls.append_null(); nulls.append_non_null(); @@ -669,8 +664,7 @@ mod tests { let input_array = Arc::new(StringArray::new(offsets, buffer, nulls.finish())) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5], @@ -678,12 +672,13 @@ mod tests { &[0, 1, 2, 3, 4, 5], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); + let results = to_vec(&equal_to_results); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(results[5]); } } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index a91dd3115d879..f82aaad0dcc29 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -19,11 +19,13 @@ use crate::aggregates::group_values::multi_group_by::{ GroupColumn, Nulls, nulls_equal_to, }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; -use arrow::array::{Array, ArrayRef, AsArray, ByteView, GenericByteViewArray, make_view}; +use arrow::array::{ + Array, ArrayRef, AsArray, BooleanBufferBuilder, ByteView, GenericByteViewArray, + make_view, +}; use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::ByteViewType; use datafusion_common::Result; -use itertools::izip; use std::marker::PhantomData; use std::mem::{replace, size_of}; use std::sync::Arc; @@ -126,22 +128,19 @@ impl ByteViewGroupValueBuilder { lhs_rows: &[usize], array: &GenericByteViewArray, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - *equal_to_result = - self.do_equal_to_inner::(lhs_row, array, rhs_row); + if !self.do_equal_to_inner::(lhs_row, array, rhs_row) + { + equal_to_results.set_bit(idx, false); + } } } @@ -513,12 +512,11 @@ impl GroupColumn for ByteViewGroupValueBuilder { group_indices: &[usize], array: &ArrayRef, rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { let has_nulls = array.null_count() != 0; let array = array.as_byte_view::(); let has_buffers = !array.data_buffers().is_empty(); - // call specialized version based on nulls and buffers presence match (has_nulls, has_buffers) { (true, true) => self.vectorized_equal_to_inner::( group_indices, @@ -584,11 +582,23 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; - use arrow::array::{ArrayRef, AsArray, NullBufferBuilder, StringViewArray}; + use arrow::array::{ + ArrayRef, AsArray, BooleanBufferBuilder, NullBufferBuilder, StringViewArray, + }; use arrow::datatypes::StringViewType; use super::GroupColumn; + fn make_true_buffer(n: usize) -> BooleanBufferBuilder { + let mut buf = BooleanBufferBuilder::new(n); + buf.append_n(n, true); + buf + } + + fn to_vec(buf: &BooleanBufferBuilder) -> Vec { + (0..buf.len()).map(|i| buf.get_bit(i)).collect() + } + #[test] fn test_byte_view_append_val() { let mut builder = @@ -627,10 +637,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -651,7 +662,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -665,9 +676,6 @@ mod tests { #[test] fn test_byte_view_vectorized_operation_special_case() { - // Test the special `all nulls` or `not nulls` input array case - // for vectorized append and equal to - let mut builder = ByteViewGroupValueBuilder::::new().with_max_block_size(60); @@ -683,19 +691,20 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(StringViewArray::from(vec![ @@ -709,19 +718,20 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } fn test_byte_view_equal_to_internal(mut append: A, mut equal_to: E) @@ -732,35 +742,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - exist null, input not null - // - exist null, input null; values not equal - // - exist null, input null; values equal - // - exist not null, input null - // - exist not null, input not null; value lens not equal - // - exist not null, input not null; value not equal(inlined case) - // - exist not null, input not null; value equal(inlined case) - // - // - exist not null, input not null; value not equal - // (non-inlined case + prefix not equal) - // - // - exist not null, input not null; value not equal - // (non-inlined case + value in `completed`) - // - // - exist not null, input not null; value equal - // (non-inlined case + value in `completed`) - // - // - exist not null, input not null; value not equal - // (non-inlined case + value in `in_progress`) - // - // - exist not null, input not null; value equal - // (non-inlined case + value in `in_progress`) - - // Set the block size to 40 for ensuring some unlined values are in `in_progress`, - // and some are in `completed`, so both two branches in `value` function can be covered. let mut builder = ByteViewGroupValueBuilder::::new().with_max_block_size(60); let builder_array = Arc::new(StringViewArray::from(vec![ @@ -776,10 +760,9 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5, 6, 7, 8]); - // Define input array let (views, buffer, _nulls) = StringViewArray::from(vec![ Some("foo"), - Some("bar"), // set to null + Some("bar"), None, None, Some("baz"), @@ -793,10 +776,9 @@ mod tests { ]) .into_parts(); - // explicitly build a boolean buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(9); nulls.append_non_null(); - nulls.append_null(); // this sets Some("bar") to null above + nulls.append_null(); nulls.append_null(); nulls.append_null(); nulls.append_non_null(); @@ -810,8 +792,7 @@ mod tests { let input_array = Arc::new(StringViewArray::new(views, buffer, nulls.finish())) as ArrayRef; - // Check - let mut equal_to_results = vec![true; input_array.len()]; + let mut equal_to_results = make_true_buffer(input_array.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 8], @@ -819,19 +800,20 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(!equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(!equal_to_results[5]); - assert!(equal_to_results[6]); - assert!(!equal_to_results[7]); - assert!(!equal_to_results[8]); - assert!(equal_to_results[9]); - assert!(!equal_to_results[10]); - assert!(equal_to_results[11]); + let results = to_vec(&equal_to_results); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(!results[3]); + assert!(!results[4]); + assert!(!results[5]); + assert!(results[6]); + assert!(!results[7]); + assert!(!results[8]); + assert!(results[9]); + assert!(!results[10]); + assert!(results[11]); } #[test] diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index cc4576eabddbd..2115e9a34da64 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -29,7 +29,7 @@ use crate::aggregates::group_values::multi_group_by::{ boolean::BooleanGroupValueBuilder, bytes::ByteGroupValueBuilder, bytes_view::ByteViewGroupValueBuilder, primitive::PrimitiveGroupValueBuilder, }; -use arrow::array::{Array, ArrayRef}; +use arrow::array::{Array, ArrayRef, BooleanBufferBuilder}; use arrow::compute::cast; use arrow::datatypes::{ BinaryViewType, DataType, Date32Type, Date64Type, Decimal128Type, Float32Type, @@ -82,7 +82,7 @@ pub trait GroupColumn: Send + Sync { lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ); /// The vectorized version `append_val` @@ -224,7 +224,6 @@ pub struct GroupValuesColumn { /// Buffers to store intermediate results in `vectorized_append` /// and `vectorized_equal_to`, for reducing memory allocation -#[derive(Default)] struct VectorizedOperationBuffers { /// The `vectorized append` row indices buffer append_row_indices: Vec, @@ -235,8 +234,8 @@ struct VectorizedOperationBuffers { /// The `vectorized_equal_to` group indices buffer equal_to_group_indices: Vec, - /// The `vectorized_equal_to` result buffer - equal_to_results: Vec, + /// The `vectorized_equal_to` result buffer (bitmask) + equal_to_results: BooleanBufferBuilder, /// The buffer for storing row indices found not equal to /// exist groups in `group_values` in `vectorized_equal_to`. @@ -244,12 +243,23 @@ struct VectorizedOperationBuffers { remaining_row_indices: Vec, } +impl Default for VectorizedOperationBuffers { + fn default() -> Self { + Self { + append_row_indices: Vec::new(), + equal_to_row_indices: Vec::new(), + equal_to_group_indices: Vec::new(), + equal_to_results: BooleanBufferBuilder::new(0), + remaining_row_indices: Vec::new(), + } + } +} + impl VectorizedOperationBuffers { fn clear(&mut self) { self.append_row_indices.clear(); self.equal_to_row_indices.clear(); self.equal_to_group_indices.clear(); - self.equal_to_results.clear(); self.remaining_row_indices.clear(); } } @@ -615,15 +625,16 @@ impl GroupValuesColumn { // 1. Perform `vectorized_equal_to` for `rows` in `vectorized_equal_to_group_indices` // and `group_indices` in `vectorized_equal_to_group_indices` - let mut equal_to_results = - mem::take(&mut self.vectorized_operation_buffers.equal_to_results); - equal_to_results.clear(); - equal_to_results.resize( - self.vectorized_operation_buffers - .equal_to_group_indices - .len(), - true, + let n = self + .vectorized_operation_buffers + .equal_to_group_indices + .len(); + let mut equal_to_results = mem::replace( + &mut self.vectorized_operation_buffers.equal_to_results, + BooleanBufferBuilder::new(0), ); + equal_to_results.truncate(0); + equal_to_results.append_n(n, true); for (col_idx, group_col) in self.group_values.iter().enumerate() { group_col.vectorized_equal_to( @@ -643,7 +654,7 @@ impl GroupValuesColumn { .iter() .enumerate() { - let equal_to_result = equal_to_results[idx]; + let equal_to_result = equal_to_results.get_bit(idx); // Equal to case, set the `group_indices` to `rows` in `groups` if equal_to_result { diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 31126348b3fd4..be92027f58a81 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -20,12 +20,15 @@ use crate::aggregates::group_values::multi_group_by::{ }; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::ArrowNativeTypeOp; -use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray, cast::AsArray}; +use arrow::array::{ + Array, ArrayRef, ArrowPrimitiveType, BooleanBufferBuilder, PrimitiveArray, + cast::AsArray, +}; use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; +use arrow::util::bit_util::apply_bitwise_binary_op; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; -use itertools::izip; use std::iter; use std::sync::Arc; @@ -62,43 +65,45 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { assert!( !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()), "called with nullable input" ); let array_values = array.as_primitive::().values(); + let n = lhs_rows.len(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); + // Build a packed comparison bitmask, then AND it into equal_to_results + let num_bytes = n.div_ceil(8); + let mut cmp_buf = vec![0u8; num_bytes]; - for (&lhs_row, &rhs_row, equal_to_result) in iter { - let result = { - // Getting unchecked not only for bound checks but because the bound checks are - // what prevents auto-vectorization - let left = if cfg!(debug_assertions) { - self.group_values[lhs_row] - } else { - // SAFETY: indices are guaranteed to be in bounds - unsafe { *self.group_values.get_unchecked(lhs_row) } - }; - let right = if cfg!(debug_assertions) { - array_values[rhs_row] - } else { - // SAFETY: indices are guaranteed to be in bounds - unsafe { *array_values.get_unchecked(rhs_row) } - }; - - // Always evaluate, to allow for auto-vectorization - left.is_eq(right) + for (i, (&lhs_row, &rhs_row)) in lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + let left = if cfg!(debug_assertions) { + self.group_values[lhs_row] + } else { + unsafe { *self.group_values.get_unchecked(lhs_row) } }; - - *equal_to_result = result && *equal_to_result; + let right = if cfg!(debug_assertions) { + array_values[rhs_row] + } else { + unsafe { *array_values.get_unchecked(rhs_row) } + }; + if left.is_eq(right) { + cmp_buf[i / 8] |= 1 << (i % 8); + } } + + // AND the comparison result into the existing equal_to_results bitmask + apply_bitwise_binary_op( + equal_to_results.as_slice_mut(), + 0, + &cmp_buf, + 0, + n, + |a, b| a & b, + ); } pub fn vectorized_equal_nullable( @@ -106,33 +111,30 @@ where lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { assert!(NULLABLE, "called with non-nullable input"); let array = array.as_primitive::(); - let iter = izip!( - lhs_rows.iter(), - rhs_rows.iter(), - equal_to_results.iter_mut(), - ); - - for (&lhs_row, &rhs_row, equal_to_result) in iter { - // Has found not equal to in previous column, don't need to check - if !*equal_to_result { + for (idx, (&lhs_row, &rhs_row)) in + lhs_rows.iter().zip(rhs_rows.iter()).enumerate() + { + if !equal_to_results.get_bit(idx) { continue; } - // Perf: skip null check (by short circuit) if input is not nullable let exist_null = self.nulls.is_null(lhs_row); let input_null = array.is_null(rhs_row); if let Some(result) = nulls_equal_to(exist_null, input_null) { - *equal_to_result = result; + if !result { + equal_to_results.set_bit(idx, false); + } continue; } - // Otherwise, we need to check their values - *equal_to_result = self.group_values[lhs_row].is_eq(array.value(rhs_row)); + if !self.group_values[lhs_row].is_eq(array.value(rhs_row)) { + equal_to_results.set_bit(idx, false); + } } } } @@ -176,7 +178,7 @@ impl GroupColumn lhs_rows: &[usize], array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut [bool], + equal_to_results: &mut BooleanBufferBuilder, ) { if !NULLABLE || (array.null_count() == 0 && !self.nulls.might_have_nulls()) { self.vectorized_equal_to_non_nullable( @@ -281,11 +283,23 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; - use arrow::array::{ArrayRef, Float32Array, Int64Array, NullBufferBuilder}; + use arrow::array::{ + ArrayRef, BooleanBufferBuilder, Float32Array, Int64Array, NullBufferBuilder, + }; use arrow::datatypes::{DataType, Float32Type, Int64Type}; use super::GroupColumn; + fn make_true_buffer(n: usize) -> BooleanBufferBuilder { + let mut buf = BooleanBufferBuilder::new(n); + buf.append_n(n, true); + buf + } + + fn to_vec(buf: &BooleanBufferBuilder) -> Vec { + (0..buf.len()).map(|i| buf.get_bit(i)).collect() + } + #[test] fn test_nullable_primitive_equal_to() { let append = |builder: &mut PrimitiveGroupValueBuilder, @@ -300,10 +314,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -324,7 +339,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -344,18 +359,9 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - exist null, input not null - // - exist null, input null; values not equal - // - exist null, input null; values equal - // - exist not null, input null - // - exist not null, input not null; values not equal - // - exist not null, input not null; values equal - - // Define PrimitiveGroupValueBuilder let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Float32); let builder_array = Arc::new(Float32Array::from(vec![ @@ -369,7 +375,6 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5, 6]); - // Define input array let (_, values, _nulls) = Float32Array::from(vec![ Some(1.0), Some(2.0), @@ -381,10 +386,9 @@ mod tests { ]) .into_parts(); - // explicitly build a null buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); - nulls.append_null(); // this sets Some(2) to null above + nulls.append_null(); nulls.append_null(); nulls.append_non_null(); nulls.append_null(); @@ -392,8 +396,7 @@ mod tests { nulls.append_null(); let input_array = Arc::new(Float32Array::new(values, nulls.finish())) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1, 2, 3, 4, 5, 6], @@ -401,14 +404,15 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6], &mut equal_to_results, ); - - assert!(!equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(!equal_to_results[4]); - assert!(equal_to_results[5]); - assert!(!equal_to_results[6]); + let results = to_vec(&equal_to_results); + + assert!(!results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(!results[4]); + assert!(results[5]); + assert!(!results[6]); } #[test] @@ -425,10 +429,11 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { let iter = lhs_rows.iter().zip(rhs_rows.iter()); for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results[idx] = builder.equal_to(lhs_row, input_array, rhs_row); + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); } }; @@ -449,7 +454,7 @@ mod tests { lhs_rows: &[usize], input_array: &ArrayRef, rhs_rows: &[usize], - equal_to_results: &mut Vec| { + equal_to_results: &mut BooleanBufferBuilder| { builder.vectorized_equal_to( lhs_rows, input_array, @@ -469,25 +474,18 @@ mod tests { &[usize], &ArrayRef, &[usize], - &mut Vec, + &mut BooleanBufferBuilder, ), { - // Will cover such cases: - // - values equal - // - values not equal - - // Define PrimitiveGroupValueBuilder let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Int64); let builder_array = Arc::new(Int64Array::from(vec![Some(0), Some(1)])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1]); - // Define input array let input_array = Arc::new(Int64Array::from(vec![Some(0), Some(2)])) as ArrayRef; - // Check - let mut equal_to_results = vec![true; builder.len()]; + let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, &[0, 1], @@ -495,16 +493,14 @@ mod tests { &[0, 1], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(!equal_to_results[1]); + assert!(results[0]); + assert!(!results[1]); } #[test] fn test_nullable_primitive_vectorized_operation_special_case() { - // Test the special `all nulls` or `not nulls` input array case - // for vectorized append and equal to - let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Int64); @@ -520,19 +516,20 @@ mod tests { .vectorized_append(&all_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_nulls_input_array.len()); builder.vectorized_equal_to( &[0, 1, 2, 3, 4], &all_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); // All not nulls input array let all_not_nulls_input_array = Arc::new(Int64Array::from(vec![ @@ -546,18 +543,19 @@ mod tests { .vectorized_append(&all_not_nulls_input_array, &[0, 1, 2, 3, 4]) .unwrap(); - let mut equal_to_results = vec![true; all_not_nulls_input_array.len()]; + let mut equal_to_results = make_true_buffer(all_not_nulls_input_array.len()); builder.vectorized_equal_to( &[5, 6, 7, 8, 9], &all_not_nulls_input_array, &[0, 1, 2, 3, 4], &mut equal_to_results, ); + let results = to_vec(&equal_to_results); - assert!(equal_to_results[0]); - assert!(equal_to_results[1]); - assert!(equal_to_results[2]); - assert!(equal_to_results[3]); - assert!(equal_to_results[4]); + assert!(results[0]); + assert!(results[1]); + assert!(results[2]); + assert!(results[3]); + assert!(results[4]); } } From f3f0deff99d640f6c3d0897d78f06bbba9de1db2 Mon Sep 17 00:00:00 2001 From: Huy Mac Date: Tue, 28 Apr 2026 18:43:00 +0900 Subject: [PATCH 7/8] chore: add comments --- .../group_values/multi_group_by/boolean.rs | 22 +++++++++++++ .../group_values/multi_group_by/bytes.rs | 15 +++++++++ .../group_values/multi_group_by/bytes_view.rs | 33 +++++++++++++++++++ .../group_values/multi_group_by/primitive.rs | 22 +++++++++++++ 4 files changed, 92 insertions(+) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 6da6d7085384d..858459fbaa4fd 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -269,6 +269,15 @@ mod tests { &mut BooleanBufferBuilder, ), { + // Will cover such cases: + // - exist null, input not null + // - exist null, input null; values not equal + // - exist null, input null; values equal + // - exist not null, input null + // - exist not null, input not null; values not equal + // - exist not null, input not null; values equal + + // Define BooleanGroupValueBuilder let mut builder = BooleanGroupValueBuilder::::new(); let builder_array = Arc::new(BooleanArray::from(vec![ None, @@ -280,6 +289,7 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5]); + // Define input array let (values, _nulls) = BooleanArray::from(vec![ Some(true), Some(false), @@ -290,6 +300,7 @@ mod tests { ]) .into_parts(); + // explicitly build a null buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); nulls.append_null(); @@ -299,6 +310,7 @@ mod tests { nulls.append_non_null(); let input_array = Arc::new(BooleanArray::new(values, nulls.finish())) as ArrayRef; + // Check let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, @@ -379,6 +391,11 @@ mod tests { &mut BooleanBufferBuilder, ), { + // Will cover such cases: + // - values equal + // - values not equal + + // Define BooleanGroupValueBuilder let mut builder = BooleanGroupValueBuilder::::new(); let builder_array = Arc::new(BooleanArray::from(vec![ Some(false), @@ -388,6 +405,7 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3]); + // Define input array let input_array = Arc::new(BooleanArray::from(vec![ Some(false), Some(false), @@ -395,6 +413,7 @@ mod tests { Some(true), ])) as ArrayRef; + // Check let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, @@ -413,6 +432,9 @@ mod tests { #[test] fn test_nullable_boolean_vectorized_operation_special_case() { + // Test the special `all nulls` or `not nulls` input array case + // for vectorized append and equal to + let mut builder = BooleanGroupValueBuilder::::new(); // All nulls input array diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index ef99ef37b472d..93684764dc285 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -565,6 +565,9 @@ mod tests { #[test] fn test_byte_vectorized_operation_special_case() { + // Test the special `all nulls` or `not nulls` input array case + // for vectorized append and equal to + let mut builder = ByteGroupValueBuilder::::new(OutputType::Utf8); // All nulls input array @@ -633,6 +636,15 @@ mod tests { &mut BooleanBufferBuilder, ), { + // Will cover such cases: + // - exist null, input not null + // - exist null, input null; values not equal + // - exist null, input null; values equal + // - exist not null, input null + // - exist not null, input not null; values not equal + // - exist not null, input not null; values equal + + // Define ByteGroupValueBuilder let mut builder = ByteGroupValueBuilder::::new(OutputType::Utf8); let builder_array = Arc::new(StringArray::from(vec![ None, @@ -644,6 +656,7 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5]); + // Define input array let (offsets, buffer, _nulls) = StringArray::from(vec![ Some("foo"), Some("bar"), @@ -654,6 +667,7 @@ mod tests { ]) .into_parts(); + // explicitly build a null buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); nulls.append_null(); @@ -664,6 +678,7 @@ mod tests { let input_array = Arc::new(StringArray::new(offsets, buffer, nulls.finish())) as ArrayRef; + // Check let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index f82aaad0dcc29..85b57123f5915 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -517,6 +517,7 @@ impl GroupColumn for ByteViewGroupValueBuilder { let has_nulls = array.null_count() != 0; let array = array.as_byte_view::(); let has_buffers = !array.data_buffers().is_empty(); + // call specialized version based on nulls and buffers presence match (has_nulls, has_buffers) { (true, true) => self.vectorized_equal_to_inner::( group_indices, @@ -676,6 +677,9 @@ mod tests { #[test] fn test_byte_view_vectorized_operation_special_case() { + // Test the special `all nulls` or `not nulls` input array case + // for vectorized append and equal to + let mut builder = ByteViewGroupValueBuilder::::new().with_max_block_size(60); @@ -745,6 +749,32 @@ mod tests { &mut BooleanBufferBuilder, ), { + // Will cover such cases: + // - exist null, input not null + // - exist null, input null; values not equal + // - exist null, input null; values equal + // - exist not null, input null + // - exist not null, input not null; value lens not equal + // - exist not null, input not null; value not equal(inlined case) + // - exist not null, input not null; value equal(inlined case) + // + // - exist not null, input not null; value not equal + // (non-inlined case + prefix not equal) + // + // - exist not null, input not null; value not equal + // (non-inlined case + value in `completed`) + // + // - exist not null, input not null; value equal + // (non-inlined case + value in `completed`) + // + // - exist not null, input not null; value not equal + // (non-inlined case + value in `in_progress`) + // + // - exist not null, input not null; value equal + // (non-inlined case + value in `in_progress`) + + // Set the block size to 40 for ensuring some unlined values are in `in_progress`, + // and some are in `completed`, so both two branches in `value` function can be covered. let mut builder = ByteViewGroupValueBuilder::::new().with_max_block_size(60); let builder_array = Arc::new(StringViewArray::from(vec![ @@ -760,6 +790,7 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5, 6, 7, 8]); + // Define input array let (views, buffer, _nulls) = StringViewArray::from(vec![ Some("foo"), Some("bar"), @@ -776,6 +807,7 @@ mod tests { ]) .into_parts(); + // explicitly build a null buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(9); nulls.append_non_null(); nulls.append_null(); @@ -792,6 +824,7 @@ mod tests { let input_array = Arc::new(StringViewArray::new(views, buffer, nulls.finish())) as ArrayRef; + // Check let mut equal_to_results = make_true_buffer(input_array.len()); equal_to( &builder, diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index be92027f58a81..1b5466f8f96ee 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -362,6 +362,15 @@ mod tests { &mut BooleanBufferBuilder, ), { + // Will cover such cases: + // - exist null, input not null + // - exist null, input null; values not equal + // - exist null, input null; values equal + // - exist not null, input null + // - exist not null, input not null; values not equal + // - exist not null, input not null; values equal + + // Define PrimitiveGroupValueBuilder let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Float32); let builder_array = Arc::new(Float32Array::from(vec![ @@ -375,6 +384,7 @@ mod tests { ])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1, 2, 3, 4, 5, 6]); + // Define input array let (_, values, _nulls) = Float32Array::from(vec![ Some(1.0), Some(2.0), @@ -386,6 +396,7 @@ mod tests { ]) .into_parts(); + // explicitly build a null buffer where one of the null values also happens to match let mut nulls = NullBufferBuilder::new(6); nulls.append_non_null(); nulls.append_null(); @@ -396,6 +407,7 @@ mod tests { nulls.append_null(); let input_array = Arc::new(Float32Array::new(values, nulls.finish())) as ArrayRef; + // Check let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, @@ -477,14 +489,21 @@ mod tests { &mut BooleanBufferBuilder, ), { + // Will cover such cases: + // - values equal + // - values not equal + + // Define PrimitiveGroupValueBuilder let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Int64); let builder_array = Arc::new(Int64Array::from(vec![Some(0), Some(1)])) as ArrayRef; append(&mut builder, &builder_array, &[0, 1]); + // Define input array let input_array = Arc::new(Int64Array::from(vec![Some(0), Some(2)])) as ArrayRef; + // Check let mut equal_to_results = make_true_buffer(builder.len()); equal_to( &builder, @@ -501,6 +520,9 @@ mod tests { #[test] fn test_nullable_primitive_vectorized_operation_special_case() { + // Test the special `all nulls` or `not nulls` input array case + // for vectorized append and equal to + let mut builder = PrimitiveGroupValueBuilder::::new(DataType::Int64); From f878431cb9c5be3911b8d204efdfc63d826d2c7a Mon Sep 17 00:00:00 2001 From: Huy Mac Date: Tue, 28 Apr 2026 19:33:37 +0900 Subject: [PATCH 8/8] chore: run cargo fmt --- .../group_values/multi_group_by/boolean.rs | 96 ++++++++++--------- .../group_values/multi_group_by/bytes.rs | 48 +++++----- .../group_values/multi_group_by/bytes_view.rs | 48 +++++----- .../group_values/multi_group_by/primitive.rs | 96 ++++++++++--------- 4 files changed, 150 insertions(+), 138 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs index 858459fbaa4fd..5fdbe434f9f30 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs @@ -217,17 +217,18 @@ mod tests { } }; - let equal_to = |builder: &BooleanGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - let iter = lhs_rows.iter().zip(rhs_rows.iter()); - for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results - .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); - } - }; + let equal_to = + |builder: &BooleanGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + let iter = lhs_rows.iter().zip(rhs_rows.iter()); + for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + } + }; test_nullable_boolean_equal_to_internal(append, equal_to); } @@ -242,18 +243,19 @@ mod tests { .unwrap(); }; - let equal_to = |builder: &BooleanGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - builder.vectorized_equal_to( - lhs_rows, - input_array, - rhs_rows, - equal_to_results, - ); - }; + let equal_to = + |builder: &BooleanGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + builder.vectorized_equal_to( + lhs_rows, + input_array, + rhs_rows, + equal_to_results, + ); + }; test_nullable_boolean_equal_to_internal(append, equal_to); } @@ -339,17 +341,18 @@ mod tests { } }; - let equal_to = |builder: &BooleanGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - let iter = lhs_rows.iter().zip(rhs_rows.iter()); - for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results - .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); - } - }; + let equal_to = + |builder: &BooleanGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + let iter = lhs_rows.iter().zip(rhs_rows.iter()); + for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + } + }; test_not_nullable_boolean_equal_to_internal(append, equal_to); } @@ -364,18 +367,19 @@ mod tests { .unwrap(); }; - let equal_to = |builder: &BooleanGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - builder.vectorized_equal_to( - lhs_rows, - input_array, - rhs_rows, - equal_to_results, - ); - }; + let equal_to = + |builder: &BooleanGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + builder.vectorized_equal_to( + lhs_rows, + input_array, + rhs_rows, + equal_to_results, + ); + }; test_not_nullable_boolean_equal_to_internal(append, equal_to); } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index 93684764dc285..e407be5e390dc 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -522,17 +522,18 @@ mod tests { } }; - let equal_to = |builder: &ByteGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - let iter = lhs_rows.iter().zip(rhs_rows.iter()); - for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results - .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); - } - }; + let equal_to = + |builder: &ByteGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + let iter = lhs_rows.iter().zip(rhs_rows.iter()); + for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + } + }; test_byte_equal_to_internal(append, equal_to); } @@ -547,18 +548,19 @@ mod tests { .unwrap(); }; - let equal_to = |builder: &ByteGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - builder.vectorized_equal_to( - lhs_rows, - input_array, - rhs_rows, - equal_to_results, - ); - }; + let equal_to = + |builder: &ByteGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + builder.vectorized_equal_to( + lhs_rows, + input_array, + rhs_rows, + equal_to_results, + ); + }; test_byte_equal_to_internal(append, equal_to); } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index 85b57123f5915..9267cf4f27f35 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -634,17 +634,18 @@ mod tests { } }; - let equal_to = |builder: &ByteViewGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - let iter = lhs_rows.iter().zip(rhs_rows.iter()); - for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results - .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); - } - }; + let equal_to = + |builder: &ByteViewGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + let iter = lhs_rows.iter().zip(rhs_rows.iter()); + for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + } + }; test_byte_view_equal_to_internal(append, equal_to); } @@ -659,18 +660,19 @@ mod tests { .unwrap(); }; - let equal_to = |builder: &ByteViewGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - builder.vectorized_equal_to( - lhs_rows, - input_array, - rhs_rows, - equal_to_results, - ); - }; + let equal_to = + |builder: &ByteViewGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + builder.vectorized_equal_to( + lhs_rows, + input_array, + rhs_rows, + equal_to_results, + ); + }; test_byte_view_equal_to_internal(append, equal_to); } diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 1b5466f8f96ee..bdc06fa553de5 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -310,17 +310,18 @@ mod tests { } }; - let equal_to = |builder: &PrimitiveGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - let iter = lhs_rows.iter().zip(rhs_rows.iter()); - for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results - .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); - } - }; + let equal_to = + |builder: &PrimitiveGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + let iter = lhs_rows.iter().zip(rhs_rows.iter()); + for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + } + }; test_nullable_primitive_equal_to_internal(append, equal_to); } @@ -335,18 +336,19 @@ mod tests { .unwrap(); }; - let equal_to = |builder: &PrimitiveGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - builder.vectorized_equal_to( - lhs_rows, - input_array, - rhs_rows, - equal_to_results, - ); - }; + let equal_to = + |builder: &PrimitiveGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + builder.vectorized_equal_to( + lhs_rows, + input_array, + rhs_rows, + equal_to_results, + ); + }; test_nullable_primitive_equal_to_internal(append, equal_to); } @@ -437,17 +439,18 @@ mod tests { } }; - let equal_to = |builder: &PrimitiveGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - let iter = lhs_rows.iter().zip(rhs_rows.iter()); - for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { - equal_to_results - .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); - } - }; + let equal_to = + |builder: &PrimitiveGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + let iter = lhs_rows.iter().zip(rhs_rows.iter()); + for (idx, (&lhs_row, &rhs_row)) in iter.enumerate() { + equal_to_results + .set_bit(idx, builder.equal_to(lhs_row, input_array, rhs_row)); + } + }; test_not_nullable_primitive_equal_to_internal(append, equal_to); } @@ -462,18 +465,19 @@ mod tests { .unwrap(); }; - let equal_to = |builder: &PrimitiveGroupValueBuilder, - lhs_rows: &[usize], - input_array: &ArrayRef, - rhs_rows: &[usize], - equal_to_results: &mut BooleanBufferBuilder| { - builder.vectorized_equal_to( - lhs_rows, - input_array, - rhs_rows, - equal_to_results, - ); - }; + let equal_to = + |builder: &PrimitiveGroupValueBuilder, + lhs_rows: &[usize], + input_array: &ArrayRef, + rhs_rows: &[usize], + equal_to_results: &mut BooleanBufferBuilder| { + builder.vectorized_equal_to( + lhs_rows, + input_array, + rhs_rows, + equal_to_results, + ); + }; test_not_nullable_primitive_equal_to_internal(append, equal_to); }