1818use std:: sync:: Arc ;
1919
2020use crate :: aggregates:: group_values:: multi_group_by:: Nulls ;
21- use crate :: aggregates:: group_values:: multi_group_by:: { GroupColumn , nulls_equal_to} ;
21+ use crate :: aggregates:: group_values:: multi_group_by:: { EqualToResults , GroupColumn , nulls_equal_to} ;
2222use crate :: aggregates:: group_values:: null_builder:: MaybeNullBufferBuilder ;
2323use arrow:: array:: { Array as _, ArrayRef , AsArray , BooleanArray , BooleanBufferBuilder } ;
2424use datafusion_common:: Result ;
25- use itertools:: izip;
2625
2726/// An implementation of [`GroupColumn`] for booleans
2827///
@@ -81,32 +80,34 @@ impl<const NULLABLE: bool> GroupColumn for BooleanGroupValueBuilder<NULLABLE> {
8180 lhs_rows : & [ usize ] ,
8281 array : & ArrayRef ,
8382 rhs_rows : & [ usize ] ,
84- equal_to_results : & mut [ bool ] ,
83+ equal_to_results : & mut EqualToResults ,
8584 ) {
8685 let array = array. as_boolean ( ) ;
8786
88- let iter = izip ! (
89- lhs_rows. iter( ) ,
90- rhs_rows. iter( ) ,
91- equal_to_results. iter_mut( ) ,
92- ) ;
93-
94- for ( & lhs_row, & rhs_row, equal_to_result) in iter {
95- // Has found not equal to in previous column, don't need to check
96- if !* equal_to_result {
87+ for ( idx, ( & lhs_row, & rhs_row) ) in
88+ lhs_rows. iter ( ) . zip ( rhs_rows. iter ( ) ) . enumerate ( )
89+ {
90+ if !equal_to_results. get_bit ( idx) {
9791 continue ;
9892 }
9993
94+ let chunk_idx = idx / 64 ;
95+ let bit_pos = idx % 64 ;
96+
10097 if NULLABLE {
10198 let exist_null = self . nulls . is_null ( lhs_row) ;
10299 let input_null = array. is_null ( rhs_row) ;
103100 if let Some ( result) = nulls_equal_to ( exist_null, input_null) {
104- * equal_to_result = result;
101+ if !result {
102+ equal_to_results. and_chunk ( chunk_idx, !( 1u64 << bit_pos) ) ;
103+ }
105104 continue ;
106105 }
107106 }
108107
109- * equal_to_result = self . buffer . get_bit ( lhs_row) == array. value ( rhs_row) ;
108+ if self . buffer . get_bit ( lhs_row) != array. value ( rhs_row) {
109+ equal_to_results. and_chunk ( chunk_idx, !( 1u64 << bit_pos) ) ;
110+ }
110111 }
111112 }
112113
@@ -195,6 +196,7 @@ impl<const NULLABLE: bool> GroupColumn for BooleanGroupValueBuilder<NULLABLE> {
195196
196197#[ cfg( test) ]
197198mod tests {
199+ use crate :: aggregates:: group_values:: multi_group_by:: EqualToResults ;
198200 use arrow:: array:: NullBufferBuilder ;
199201
200202 use super :: * ;
@@ -213,10 +215,10 @@ mod tests {
213215 lhs_rows : & [ usize ] ,
214216 input_array : & ArrayRef ,
215217 rhs_rows : & [ usize ] ,
216- equal_to_results : & mut Vec < bool > | {
218+ equal_to_results : & mut EqualToResults | {
217219 let iter = lhs_rows. iter ( ) . zip ( rhs_rows. iter ( ) ) ;
218220 for ( idx, ( & lhs_row, & rhs_row) ) in iter. enumerate ( ) {
219- equal_to_results[ idx] = builder. equal_to ( lhs_row, input_array, rhs_row) ;
221+ equal_to_results. set_bit ( idx, builder. equal_to ( lhs_row, input_array, rhs_row) ) ;
220222 }
221223 } ;
222224
@@ -237,7 +239,7 @@ mod tests {
237239 lhs_rows : & [ usize ] ,
238240 input_array : & ArrayRef ,
239241 rhs_rows : & [ usize ] ,
240- equal_to_results : & mut Vec < bool > | {
242+ equal_to_results : & mut EqualToResults | {
241243 builder. vectorized_equal_to (
242244 lhs_rows,
243245 input_array,
@@ -257,7 +259,7 @@ mod tests {
257259 & [ usize ] ,
258260 & ArrayRef ,
259261 & [ usize ] ,
260- & mut Vec < bool > ,
262+ & mut EqualToResults ,
261263 ) ,
262264 {
263265 // Will cover such cases:
@@ -302,21 +304,23 @@ mod tests {
302304 let input_array = Arc :: new ( BooleanArray :: new ( values, nulls. finish ( ) ) ) as ArrayRef ;
303305
304306 // Check
305- let mut equal_to_results = vec ! [ true ; builder. len( ) ] ;
307+ let mut equal_to_results = EqualToResults :: new ( ) ;
308+ equal_to_results. reset ( builder. len ( ) ) ;
306309 equal_to (
307310 & builder,
308311 & [ 0 , 1 , 2 , 3 , 4 , 5 ] ,
309312 & input_array,
310313 & [ 0 , 1 , 2 , 3 , 4 , 5 ] ,
311314 & mut equal_to_results,
312315 ) ;
313-
314- assert ! ( !equal_to_results[ 0 ] ) ;
315- assert ! ( equal_to_results[ 1 ] ) ;
316- assert ! ( equal_to_results[ 2 ] ) ;
317- assert ! ( !equal_to_results[ 3 ] ) ;
318- assert ! ( !equal_to_results[ 4 ] ) ;
319- assert ! ( equal_to_results[ 5 ] ) ;
316+ let results = equal_to_results. to_vec ( ) ;
317+
318+ assert ! ( !results[ 0 ] ) ;
319+ assert ! ( results[ 1 ] ) ;
320+ assert ! ( results[ 2 ] ) ;
321+ assert ! ( !results[ 3 ] ) ;
322+ assert ! ( !results[ 4 ] ) ;
323+ assert ! ( results[ 5 ] ) ;
320324 }
321325
322326 #[ test]
@@ -333,10 +337,10 @@ mod tests {
333337 lhs_rows : & [ usize ] ,
334338 input_array : & ArrayRef ,
335339 rhs_rows : & [ usize ] ,
336- equal_to_results : & mut Vec < bool > | {
340+ equal_to_results : & mut EqualToResults | {
337341 let iter = lhs_rows. iter ( ) . zip ( rhs_rows. iter ( ) ) ;
338342 for ( idx, ( & lhs_row, & rhs_row) ) in iter. enumerate ( ) {
339- equal_to_results[ idx] = builder. equal_to ( lhs_row, input_array, rhs_row) ;
343+ equal_to_results. set_bit ( idx, builder. equal_to ( lhs_row, input_array, rhs_row) ) ;
340344 }
341345 } ;
342346
@@ -357,7 +361,7 @@ mod tests {
357361 lhs_rows : & [ usize ] ,
358362 input_array : & ArrayRef ,
359363 rhs_rows : & [ usize ] ,
360- equal_to_results : & mut Vec < bool > | {
364+ equal_to_results : & mut EqualToResults | {
361365 builder. vectorized_equal_to (
362366 lhs_rows,
363367 input_array,
@@ -377,7 +381,7 @@ mod tests {
377381 & [ usize ] ,
378382 & ArrayRef ,
379383 & [ usize ] ,
380- & mut Vec < bool > ,
384+ & mut EqualToResults ,
381385 ) ,
382386 {
383387 // Will cover such cases:
@@ -403,19 +407,21 @@ mod tests {
403407 ] ) ) as ArrayRef ;
404408
405409 // Check
406- let mut equal_to_results = vec ! [ true ; builder. len( ) ] ;
410+ let mut equal_to_results = EqualToResults :: new ( ) ;
411+ equal_to_results. reset ( builder. len ( ) ) ;
407412 equal_to (
408413 & builder,
409414 & [ 0 , 1 , 2 , 3 ] ,
410415 & input_array,
411416 & [ 0 , 1 , 2 , 3 ] ,
412417 & mut equal_to_results,
413418 ) ;
419+ let results = equal_to_results. to_vec ( ) ;
414420
415- assert ! ( equal_to_results [ 0 ] ) ;
416- assert ! ( !equal_to_results [ 1 ] ) ;
417- assert ! ( !equal_to_results [ 2 ] ) ;
418- assert ! ( equal_to_results [ 3 ] ) ;
421+ assert ! ( results [ 0 ] ) ;
422+ assert ! ( !results [ 1 ] ) ;
423+ assert ! ( !results [ 2 ] ) ;
424+ assert ! ( results [ 3 ] ) ;
419425 }
420426
421427 #[ test]
@@ -432,19 +438,21 @@ mod tests {
432438 . vectorized_append ( & all_nulls_input_array, & [ 0 , 1 , 2 , 3 , 4 ] )
433439 . unwrap ( ) ;
434440
435- let mut equal_to_results = vec ! [ true ; all_nulls_input_array. len( ) ] ;
441+ let mut equal_to_results = EqualToResults :: new ( ) ;
442+ equal_to_results. reset ( all_nulls_input_array. len ( ) ) ;
436443 builder. vectorized_equal_to (
437444 & [ 0 , 1 , 2 , 3 , 4 ] ,
438445 & all_nulls_input_array,
439446 & [ 0 , 1 , 2 , 3 , 4 ] ,
440447 & mut equal_to_results,
441448 ) ;
449+ let results = equal_to_results. to_vec ( ) ;
442450
443- assert ! ( equal_to_results [ 0 ] ) ;
444- assert ! ( equal_to_results [ 1 ] ) ;
445- assert ! ( equal_to_results [ 2 ] ) ;
446- assert ! ( equal_to_results [ 3 ] ) ;
447- assert ! ( equal_to_results [ 4 ] ) ;
451+ assert ! ( results [ 0 ] ) ;
452+ assert ! ( results [ 1 ] ) ;
453+ assert ! ( results [ 2 ] ) ;
454+ assert ! ( results [ 3 ] ) ;
455+ assert ! ( results [ 4 ] ) ;
448456
449457 // All not nulls input array
450458 let all_not_nulls_input_array = Arc :: new ( BooleanArray :: from ( vec ! [
@@ -458,18 +466,20 @@ mod tests {
458466 . vectorized_append ( & all_not_nulls_input_array, & [ 0 , 1 , 2 , 3 , 4 ] )
459467 . unwrap ( ) ;
460468
461- let mut equal_to_results = vec ! [ true ; all_not_nulls_input_array. len( ) ] ;
469+ let mut equal_to_results = EqualToResults :: new ( ) ;
470+ equal_to_results. reset ( all_not_nulls_input_array. len ( ) ) ;
462471 builder. vectorized_equal_to (
463472 & [ 5 , 6 , 7 , 8 , 9 ] ,
464473 & all_not_nulls_input_array,
465474 & [ 0 , 1 , 2 , 3 , 4 ] ,
466475 & mut equal_to_results,
467476 ) ;
477+ let results = equal_to_results. to_vec ( ) ;
468478
469- assert ! ( equal_to_results [ 0 ] ) ;
470- assert ! ( equal_to_results [ 1 ] ) ;
471- assert ! ( equal_to_results [ 2 ] ) ;
472- assert ! ( equal_to_results [ 3 ] ) ;
473- assert ! ( equal_to_results [ 4 ] ) ;
479+ assert ! ( results [ 0 ] ) ;
480+ assert ! ( results [ 1 ] ) ;
481+ assert ! ( results [ 2 ] ) ;
482+ assert ! ( results [ 3 ] ) ;
483+ assert ! ( results [ 4 ] ) ;
474484 }
475485}
0 commit comments