Skip to content

Commit 680d0e6

Browse files
committed
Merge branch 'pr-21589' into rich-t-kid/Dictionary-encoding-Hash-optmize
2 parents bb33fe6 + 82edbdf commit 680d0e6

File tree

8 files changed

+143
-546
lines changed

8 files changed

+143
-546
lines changed

datafusion/physical-plan/Cargo.toml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,3 @@ required-features = ["test_utils"]
106106
harness = false
107107
name = "aggregate_vectorized"
108108
required-features = ["test_utils"]
109-
110-
[[bench]]
111-
name = "single_column_aggr"
112-
harness = false
113-
114-
[profile.profiling]
115-
inherits = "release"
116-
debug = true

datafusion/physical-plan/benches/single_column_aggr.rs

Lines changed: 0 additions & 266 deletions
This file was deleted.
-159 KB
Binary file not shown.

datafusion/physical-plan/src/aggregates/group_values/mod.rs

Lines changed: 18 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818
//! [`GroupValues`] trait for storing and interning group keys
1919
2020
use arrow::array::types::{
21-
Date32Type, Date64Type, Decimal128Type, Time32MillisecondType, Time32SecondType,
22-
Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
23-
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
21+
Date32Type, Date64Type, Decimal128Type, Int8Type, Int16Type, Int32Type, Int64Type,
22+
Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
23+
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
24+
TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
2425
};
2526
use arrow::array::{ArrayRef, downcast_primitive};
2627
use arrow::datatypes::{DataType, SchemaRef, TimeUnit};
@@ -51,6 +52,11 @@ mod metrics;
5152
mod null_builder;
5253

5354
pub(crate) use metrics::GroupByMetrics;
55+
macro_rules! make_dict {
56+
($t:ty, $value_type:expr) => {
57+
Ok(Box::new(GroupValuesDictionary::<$t>::new($value_type)))
58+
};
59+
}
5460

5561
/// Stores the group values during hash aggregation.
5662
///
@@ -200,49 +206,16 @@ pub fn new_group_values(
200206
DataType::Dictionary(key_type, value_type) => {
201207
if supported_single_dictionary_value(value_type) {
202208
return match key_type.as_ref() {
203-
// TODO: turn this into a macro
204-
DataType::Int8 => {
205-
Ok(Box::new(GroupValuesDictionary::<
206-
arrow::datatypes::Int8Type,
207-
>::new(value_type)))
208-
}
209-
DataType::Int16 => {
210-
Ok(Box::new(GroupValuesDictionary::<
211-
arrow::datatypes::Int16Type,
212-
>::new(value_type)))
213-
}
214-
DataType::Int32 => {
215-
Ok(Box::new(GroupValuesDictionary::<
216-
arrow::datatypes::Int32Type,
217-
>::new(value_type)))
218-
}
219-
DataType::Int64 => {
220-
Ok(Box::new(GroupValuesDictionary::<
221-
arrow::datatypes::Int64Type,
222-
>::new(value_type)))
223-
}
224-
DataType::UInt8 => {
225-
Ok(Box::new(GroupValuesDictionary::<
226-
arrow::datatypes::UInt8Type,
227-
>::new(value_type)))
228-
}
229-
DataType::UInt16 => {
230-
Ok(Box::new(GroupValuesDictionary::<
231-
arrow::datatypes::UInt16Type,
232-
>::new(value_type)))
233-
}
234-
DataType::UInt32 => {
235-
Ok(Box::new(GroupValuesDictionary::<
236-
arrow::datatypes::UInt32Type,
237-
>::new(value_type)))
238-
}
239-
DataType::UInt64 => {
240-
Ok(Box::new(GroupValuesDictionary::<
241-
arrow::datatypes::UInt64Type,
242-
>::new(value_type)))
243-
}
209+
DataType::Int8 => make_dict!(Int8Type, value_type),
210+
DataType::Int16 => make_dict!(Int16Type, value_type),
211+
DataType::Int32 => make_dict!(Int32Type, value_type),
212+
DataType::Int64 => make_dict!(Int64Type, value_type),
213+
DataType::UInt8 => make_dict!(UInt8Type, value_type),
214+
DataType::UInt16 => make_dict!(UInt16Type, value_type),
215+
DataType::UInt32 => make_dict!(UInt32Type, value_type),
216+
DataType::UInt64 => make_dict!(UInt64Type, value_type),
244217
_ => Err(datafusion_common::DataFusionError::NotImplemented(
245-
format!("Unsupported dictionary key type: {key_type:?}",),
218+
format!("Unsupported dictionary key type: {key_type:?}"),
246219
)),
247220
};
248221
}

0 commit comments

Comments
 (0)