@@ -24,6 +24,7 @@ use datafusion_expr::EmitTo;
2424use std:: collections:: HashMap ;
2525use std:: marker:: PhantomData ;
2626use std:: mem;
27+ use std:: sync:: Arc ;
2728pub struct GroupValuesDictionary < K : ArrowDictionaryKeyType + Send > {
2829 /*
2930 We know that every single &[ArrayRef] that is passed in is a dictionary array
@@ -91,7 +92,7 @@ impl<K: ArrowDictionaryKeyType + Send> GroupValues for GroupValuesDictionary<K>
9192 "GroupValuesDictionary only supports single column group by" . to_string ( ) ,
9293 ) ) ;
9394 }
94- let array = cols[ 0 ] . clone ( ) ;
95+ let array = Arc :: clone ( & cols[ 0 ] ) ;
9596 groups. clear ( ) ; // zero out buffer
9697 let dict_array = array
9798 . as_any ( )
@@ -169,7 +170,7 @@ impl<K: ArrowDictionaryKeyType + Send> GroupValues for GroupValuesDictionary<K>
169170 let values = ScalarValue :: iter_to_array ( columns. into_iter ( ) ) ?;
170171
171172 let dict_array = DictionaryArray :: < K > :: try_new ( keys, values) ?;
172- Ok ( vec ! [ std :: sync :: Arc :: new( dict_array) ] )
173+ Ok ( vec ! [ Arc :: new( dict_array) ] )
173174 }
174175 fn clear_shrink ( & mut self , num_rows : usize ) {
175176 self . seen_elements . clear ( ) ;
@@ -214,26 +215,21 @@ mod group_values_trait_test {
214215 assert_eq ! ( result. len( ) , 1 , "Expected exactly one array in emit result" ) ;
215216 let array = & result[ 0 ] ;
216217
217- // Check the data type explicitly
218218 match array. data_type ( ) {
219219 DataType :: Dictionary ( key_type, value_type) => {
220220 // Verify it's the expected key type (UInt8 in our tests)
221221 match key_type. as_ref ( ) {
222- DataType :: UInt8 => {
223- println ! ( "Dictionary key type is UInt8" ) ;
224- }
225- other => panic ! ( "Expected UInt8 key type, got {:?}" , other) ,
222+ DataType :: UInt8 => { }
223+ other => panic ! ( "Expected UInt8 key type, got {other:?}" ) ,
226224 }
227225
228226 // Verify it's the expected value type (Utf8 in our tests)
229227 match value_type. as_ref ( ) {
230- DataType :: Utf8 => {
231- println ! ( "Dictionary value type is Utf8" ) ;
232- }
233- other => panic ! ( "Expected Utf8 value type, got {:?}" , other) ,
228+ DataType :: Utf8 => { }
229+ other => panic ! ( "Expected Utf8 value type, got {other:?}" ) ,
234230 }
235231 }
236- other => panic ! ( "Expected DictionaryArray, got {:?}" , other ) ,
232+ other => panic ! ( "Expected DictionaryArray, got {other :?}" ) ,
237233 }
238234
239235 // Now verify we can actually downcast to the expected types
@@ -247,47 +243,7 @@ mod group_values_trait_test {
247243 . as_any ( )
248244 . downcast_ref :: < StringArray > ( )
249245 . expect ( "Dictionary values should be StringArray" ) ;
250-
251- println ! (
252- "Emitted array has correct composite type: Dictionary<UInt8, Utf8(StringArray)>"
253- ) ;
254- }
255-
256- /*
257- cargo test --package datafusion-physical-plan --lib -- aggregates::group_values::single_group_by::dictionary::group_values_trait_test::test_group_values_dictionary --exact --nocapture --include-ignored
258-
259- fn run_groupvalue_test_suite() -> Result<()> {
260- let tests: Vec<(&str, fn(&mut dyn GroupValues))> = vec![
261- ("test_single_group_all_same_values", basic_functionality::test_single_group_all_same_values),
262- ("test_multiple_groups", basic_functionality::test_multiple_groups),
263- ("test_all_different_values", basic_functionality::test_all_different_values),
264- ("test_empty_batch", edge_cases::test_empty_batch),
265- ("test_single_row", edge_cases::test_single_row),
266- ("test_repeated_pattern", edge_cases::test_repeated_pattern),
267- ("test_multiple_columns_passed", multi_column::test_multiple_columns_passed),
268- ("test_consecutive_batches_then_emit", consecutive_batches::test_consecutive_batches_then_emit),
269- ("test_three_consecutive_batches_with_partial_emit", consecutive_batches::test_three_consecutive_batches_with_partial_emit),
270- ("test_size_grows_after_intern", state_management::test_size_grows_after_intern),
271- ("test_complex_emit_flow_with_multiple_internS", state_management::test_complex_emit_flow_with_multiple_internS),
272- ("test_clear_shrink_resets_state", state_management::test_clear_shrink_resets_state),
273- ("test_clear_shrink_with_zero", state_management::test_clear_shrink_with_zero),
274- ("test_emit_all_clears_state", state_management::test_emit_all_clears_state),
275- ("test_emit_first_n", state_management::test_emit_first_n),
276- ("test_group_assignment_order", data_correctness::test_group_assignment_order),
277- ("test_groups_vector_correctness_first_appearance", data_correctness::test_groups_vector_correctness_first_appearance),
278- ("test_groups_vector_sequential_assignment", data_correctness::test_groups_vector_sequential_assignment),
279- ("test_emit_partial_preserves_state", data_correctness::test_emit_partial_preserves_state),
280- ("test_emit_restores_intern_ability", data_correctness::test_emit_restores_intern_ability),
281- ];
282- for (name, test_function) in tests {
283- let mut group_values = GroupValuesDictionary::<arrow::datatypes::UInt8Type>::new(&DataType::Utf8);
284- println!("Running test: {name}");
285- test_function(&mut group_values);
286- }
287-
288- Ok(())
289246 }
290- */
291247
292248 mod basic_functionality {
293249 use super :: * ;
@@ -547,8 +503,7 @@ mod group_values_trait_test {
547503 . collect ( ) ;
548504 assert ! (
549505 unexpected_values. is_empty( ) ,
550- "Emitted unexpected values: {:#?}" ,
551- unexpected_values
506+ "Emitted unexpected values: {unexpected_values:#?}"
552507 ) ;
553508 } ) ;
554509 }
@@ -761,7 +716,7 @@ mod group_values_trait_test {
761716 assert_eq ! (
762717 group_values_trait_obj. len( ) ,
763718 2 ,
764- "After emitting 2, should have 2 left"
719+ "After emitting 2, should have 2 left (c, d) "
765720 ) ;
766721
767722 let batch2 = create_dict_array ( vec ! [ 0 , 1 , 2 ] , vec ! [ "a" , "b" , "e" ] ) ;
@@ -771,29 +726,31 @@ mod group_values_trait_test {
771726 . unwrap ( ) ;
772727 assert_eq ! (
773728 group_values_trait_obj. len( ) ,
774- 3 ,
775- "After second intern, should have 3 groups"
729+ 5 ,
730+ "After second intern: 2 remaining (c,d) + 3 new from batch2 (a,b,e) = 5 groups"
776731 ) ;
777732
778- let _result = group_values_trait_obj. emit ( EmitTo :: First ( 1 ) ) . unwrap ( ) ;
733+ let result = group_values_trait_obj. emit ( EmitTo :: First ( 1 ) ) . unwrap ( ) ;
734+ assert_emitted_is_dict_array ( & result) ;
779735 assert_eq ! (
780736 group_values_trait_obj. len( ) ,
781- 2 ,
782- "After emitting 1 more, should have 2 left"
737+ 4 ,
738+ "After emitting 1 more (c) , should have 4 left (d,a,b,e) "
783739 ) ;
784740
785- let batch3 = create_dict_array ( vec ! [ 2 , 5 , 6 ] , vec ! [ "a" , "f" , "g" ] ) ;
741+ let batch3 = create_dict_array ( vec ! [ 0 , 1 , 2 ] , vec ! [ "a" , "f" , "g" ] ) ;
786742 let mut groups_vector3 = Vec :: new ( ) ;
787743 group_values_trait_obj
788744 . intern ( & [ batch3] , & mut groups_vector3)
789745 . unwrap ( ) ;
790746 assert_eq ! (
791747 group_values_trait_obj. len( ) ,
792- 4 ,
793- "After third intern, should have 4 groups"
748+ 6 ,
749+ "After third intern: 4 remaining (d,a,b,e) + 2 new from batch3 (f,g) = 6 groups (a already exists) "
794750 ) ;
795751
796- let _result = group_values_trait_obj. emit ( EmitTo :: All ) . unwrap ( ) ;
752+ let result = group_values_trait_obj. emit ( EmitTo :: All ) . unwrap ( ) ;
753+ assert_emitted_is_dict_array ( & result) ;
797754 assert ! (
798755 group_values_trait_obj. is_empty( ) ,
799756 "After emitting all, should be empty"
0 commit comments