Skip to content

Commit aa69892

Browse files
committed
fix-ci
1 parent cc18a8f commit aa69892

4 files changed

Lines changed: 28 additions & 70 deletions

File tree

datafusion/physical-plan/src/aggregates/group_values/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ pub fn new_group_values(
242242
>::new(value_type)))
243243
}
244244
_ => Err(datafusion_common::DataFusionError::NotImplemented(
245-
format!("Unsupported dictionary key type: {:?}", key_type),
245+
format!("Unsupported dictionary key type: {key_type:?}",),
246246
)),
247247
};
248248
}

datafusion/physical-plan/src/aggregates/group_values/row.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ use arrow::array::{
2323
use arrow::compute::cast;
2424
use arrow::datatypes::{DataType, SchemaRef};
2525
use arrow::row::{RowConverter, Rows, SortField};
26+
use datafusion_common::Result;
2627
use datafusion_common::hash_utils::RandomState;
2728
use datafusion_common::hash_utils::create_hashes;
28-
use datafusion_common::{Result};
2929
use datafusion_execution::memory_pool::proxy::{HashTableAllocExt, VecAllocExt};
3030
use datafusion_expr::EmitTo;
3131
use hashbrown::hash_table::HashTable;

datafusion/physical-plan/src/aggregates/group_values/single_group_by/dictionary.rs

Lines changed: 21 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use datafusion_expr::EmitTo;
2424
use std::collections::HashMap;
2525
use std::marker::PhantomData;
2626
use std::mem;
27+
use std::sync::Arc;
2728
pub struct GroupValuesDictionary<K: ArrowDictionaryKeyType + Send> {
2829
/*
2930
We know that every single &[ArrayRef] that is passed in is a dictionary array
@@ -91,7 +92,7 @@ impl<K: ArrowDictionaryKeyType + Send> GroupValues for GroupValuesDictionary<K>
9192
"GroupValuesDictionary only supports single column group by".to_string(),
9293
));
9394
}
94-
let array = cols[0].clone();
95+
let array = Arc::clone(&cols[0]);
9596
groups.clear(); // zero out buffer
9697
let dict_array = array
9798
.as_any()
@@ -169,7 +170,7 @@ impl<K: ArrowDictionaryKeyType + Send> GroupValues for GroupValuesDictionary<K>
169170
let values = ScalarValue::iter_to_array(columns.into_iter())?;
170171

171172
let dict_array = DictionaryArray::<K>::try_new(keys, values)?;
172-
Ok(vec![std::sync::Arc::new(dict_array)])
173+
Ok(vec![Arc::new(dict_array)])
173174
}
174175
fn clear_shrink(&mut self, num_rows: usize) {
175176
self.seen_elements.clear();
@@ -214,26 +215,21 @@ mod group_values_trait_test {
214215
assert_eq!(result.len(), 1, "Expected exactly one array in emit result");
215216
let array = &result[0];
216217

217-
// Check the data type explicitly
218218
match array.data_type() {
219219
DataType::Dictionary(key_type, value_type) => {
220220
// Verify it's the expected key type (UInt8 in our tests)
221221
match key_type.as_ref() {
222-
DataType::UInt8 => {
223-
println!("Dictionary key type is UInt8");
224-
}
225-
other => panic!("Expected UInt8 key type, got {:?}", other),
222+
DataType::UInt8 => {}
223+
other => panic!("Expected UInt8 key type, got {other:?}"),
226224
}
227225

228226
// Verify it's the expected value type (Utf8 in our tests)
229227
match value_type.as_ref() {
230-
DataType::Utf8 => {
231-
println!("Dictionary value type is Utf8");
232-
}
233-
other => panic!("Expected Utf8 value type, got {:?}", other),
228+
DataType::Utf8 => {}
229+
other => panic!("Expected Utf8 value type, got {other:?}"),
234230
}
235231
}
236-
other => panic!("Expected DictionaryArray, got {:?}", other),
232+
other => panic!("Expected DictionaryArray, got {other:?}"),
237233
}
238234

239235
// Now verify we can actually downcast to the expected types
@@ -247,47 +243,7 @@ mod group_values_trait_test {
247243
.as_any()
248244
.downcast_ref::<StringArray>()
249245
.expect("Dictionary values should be StringArray");
250-
251-
println!(
252-
"Emitted array has correct composite type: Dictionary<UInt8, Utf8(StringArray)>"
253-
);
254-
}
255-
256-
/*
257-
cargo test --package datafusion-physical-plan --lib -- aggregates::group_values::single_group_by::dictionary::group_values_trait_test::test_group_values_dictionary --exact --nocapture --include-ignored
258-
259-
fn run_groupvalue_test_suite() -> Result<()> {
260-
let tests: Vec<(&str, fn(&mut dyn GroupValues))> = vec![
261-
("test_single_group_all_same_values", basic_functionality::test_single_group_all_same_values),
262-
("test_multiple_groups", basic_functionality::test_multiple_groups),
263-
("test_all_different_values", basic_functionality::test_all_different_values),
264-
("test_empty_batch", edge_cases::test_empty_batch),
265-
("test_single_row", edge_cases::test_single_row),
266-
("test_repeated_pattern", edge_cases::test_repeated_pattern),
267-
("test_multiple_columns_passed", multi_column::test_multiple_columns_passed),
268-
("test_consecutive_batches_then_emit", consecutive_batches::test_consecutive_batches_then_emit),
269-
("test_three_consecutive_batches_with_partial_emit", consecutive_batches::test_three_consecutive_batches_with_partial_emit),
270-
("test_size_grows_after_intern", state_management::test_size_grows_after_intern),
271-
("test_complex_emit_flow_with_multiple_internS", state_management::test_complex_emit_flow_with_multiple_internS),
272-
("test_clear_shrink_resets_state", state_management::test_clear_shrink_resets_state),
273-
("test_clear_shrink_with_zero", state_management::test_clear_shrink_with_zero),
274-
("test_emit_all_clears_state", state_management::test_emit_all_clears_state),
275-
("test_emit_first_n", state_management::test_emit_first_n),
276-
("test_group_assignment_order", data_correctness::test_group_assignment_order),
277-
("test_groups_vector_correctness_first_appearance", data_correctness::test_groups_vector_correctness_first_appearance),
278-
("test_groups_vector_sequential_assignment", data_correctness::test_groups_vector_sequential_assignment),
279-
("test_emit_partial_preserves_state", data_correctness::test_emit_partial_preserves_state),
280-
("test_emit_restores_intern_ability", data_correctness::test_emit_restores_intern_ability),
281-
];
282-
for (name, test_function) in tests {
283-
let mut group_values = GroupValuesDictionary::<arrow::datatypes::UInt8Type>::new(&DataType::Utf8);
284-
println!("Running test: {name}");
285-
test_function(&mut group_values);
286-
}
287-
288-
Ok(())
289246
}
290-
*/
291247

292248
mod basic_functionality {
293249
use super::*;
@@ -547,8 +503,7 @@ mod group_values_trait_test {
547503
.collect();
548504
assert!(
549505
unexpected_values.is_empty(),
550-
"Emitted unexpected values: {:#?}",
551-
unexpected_values
506+
"Emitted unexpected values: {unexpected_values:#?}"
552507
);
553508
});
554509
}
@@ -761,7 +716,7 @@ mod group_values_trait_test {
761716
assert_eq!(
762717
group_values_trait_obj.len(),
763718
2,
764-
"After emitting 2, should have 2 left"
719+
"After emitting 2, should have 2 left (c, d)"
765720
);
766721

767722
let batch2 = create_dict_array(vec![0, 1, 2], vec!["a", "b", "e"]);
@@ -771,29 +726,31 @@ mod group_values_trait_test {
771726
.unwrap();
772727
assert_eq!(
773728
group_values_trait_obj.len(),
774-
3,
775-
"After second intern, should have 3 groups"
729+
5,
730+
"After second intern: 2 remaining (c,d) + 3 new from batch2 (a,b,e) = 5 groups"
776731
);
777732

778-
let _result = group_values_trait_obj.emit(EmitTo::First(1)).unwrap();
733+
let result = group_values_trait_obj.emit(EmitTo::First(1)).unwrap();
734+
assert_emitted_is_dict_array(&result);
779735
assert_eq!(
780736
group_values_trait_obj.len(),
781-
2,
782-
"After emitting 1 more, should have 2 left"
737+
4,
738+
"After emitting 1 more (c), should have 4 left (d,a,b,e)"
783739
);
784740

785-
let batch3 = create_dict_array(vec![2, 5, 6], vec!["a", "f", "g"]);
741+
let batch3 = create_dict_array(vec![0, 1, 2], vec!["a", "f", "g"]);
786742
let mut groups_vector3 = Vec::new();
787743
group_values_trait_obj
788744
.intern(&[batch3], &mut groups_vector3)
789745
.unwrap();
790746
assert_eq!(
791747
group_values_trait_obj.len(),
792-
4,
793-
"After third intern, should have 4 groups"
748+
6,
749+
"After third intern: 4 remaining (d,a,b,e) + 2 new from batch3 (f,g) = 6 groups (a already exists)"
794750
);
795751

796-
let _result = group_values_trait_obj.emit(EmitTo::All).unwrap();
752+
let result = group_values_trait_obj.emit(EmitTo::All).unwrap();
753+
assert_emitted_is_dict_array(&result);
797754
assert!(
798755
group_values_trait_obj.is_empty(),
799756
"After emitting all, should be empty"

datafusion/physical-plan/src/aggregates/row_hash.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,11 +1744,12 @@ mod dictionary_aggregation {
17441744
Field::new("event_id", DataType::Int64, false),
17451745
]));
17461746

1747-
let batch = RecordBatch::try_new(schema.clone(), vec![region_col, event_id_col])?;
1747+
let batch =
1748+
RecordBatch::try_new(Arc::clone(&schema), vec![region_col, event_id_col])?;
17481749

17491750
let exec = Arc::new(TestMemoryExec::try_new(
17501751
&[vec![batch]],
1751-
schema.clone(),
1752+
Arc::clone(&schema),
17521753
None,
17531754
)?);
17541755

@@ -1766,7 +1767,7 @@ mod dictionary_aggregation {
17661767
)],
17671768
vec![None],
17681769
exec,
1769-
schema,
1770+
Arc::clone(&schema),
17701771
)?;
17711772

17721773
let task_ctx = Arc::new(TaskContext::default());
@@ -1782,7 +1783,7 @@ mod dictionary_aggregation {
17821783
// verify we got 3 groups - one per distinct region
17831784
let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
17841785
assert_eq!(total_rows, 3);
1785-
dbg!("record batches: {:#?}", batches);
1786+
dbg!("record batches: {batches:#?}");
17861787

17871788
Ok(())
17881789
}

0 commit comments

Comments
 (0)