Skip to content

Commit 40a43c6

Browse files
committed
tranistioned from scalarValue to raw hashes
1 parent 203efc4 commit 40a43c6

4 files changed

Lines changed: 441 additions & 226 deletions

File tree

datafusion/physical-plan/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,4 +113,4 @@ harness = false
113113

114114
[profile.profiling]
115115
inherits = "release"
116-
debug = true
116+
debug = true

datafusion/physical-plan/benches/single_column_aggr.rs

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use arrow::array::{ArrayRef, StringArray, StringDictionaryBuilder};
1+
use arrow::array::{ArrayRef, StringDictionaryBuilder};
22
use arrow::datatypes::{DataType, Field, Schema, UInt8Type};
33
use criterion::{Criterion, criterion_group, criterion_main};
44
use datafusion_expr::EmitTo;
@@ -30,7 +30,6 @@ enum NullRate {
3030
enum GroupType {
3131
Dictionary,
3232
GroupValueRows,
33-
Utf8,
3433
}
3534
fn create_string_values(cardinality: &Cardinality) -> Vec<String> {
3635
let num_values = match cardinality {
@@ -100,53 +99,37 @@ fn generate_group_values(kind: GroupType) -> Box<dyn GroupValues> {
10099
// call custom path directly
101100
Box::new(GroupValuesDictionary::<UInt8Type>::new(&DataType::Utf8))
102101
}
103-
GroupType::Utf8 => {
104-
//let batch = create_batch(batch_size, cardinality);
105-
//let array = StringArray::from(batch);
106-
// Create GroupValues implementation for Utf8 type
107-
let schema = Arc::new(Schema::new(vec![Field::new(
108-
"group_col",
109-
DataType::Utf8,
110-
false,
111-
)]));
112-
new_group_values(schema, &GroupOrdering::None).unwrap()
113-
}
114102
}
115103
}
116104

117105
fn bench_single_column_group_values(c: &mut Criterion) {
118-
let group_types = [GroupType::GroupValueRows, GroupType::Dictionary];
106+
let group_types = [GroupType::GroupValueRows, GroupType::Dictionary];
119107
let cardinalities = [
120108
Cardinality::Xsmall,
121-
/*
122109
Cardinality::Small,
123-
Cardinality::Medium,*/
110+
Cardinality::Medium,
124111
Cardinality::Large,
125112
];
126-
let batch_sizes = [
127-
/*BatchSize::Small, BatchSize::Medium, */ BatchSize::Large,
128-
];
113+
let batch_sizes = [BatchSize::Small, BatchSize::Medium, BatchSize::Large];
129114
let null_rates = [
130115
NullRate::Zero,
131-
/*NullRate::Low, NullRate::Medium,*/ NullRate::High,
116+
NullRate::Low,
117+
NullRate::Medium,
118+
NullRate::High,
132119
];
133120

134121
for cardinality in &cardinalities {
135122
for batch_size in &batch_sizes {
136123
for null_rate in &null_rates {
137124
for group_type in &group_types {
138125
let group_name = format!(
139-
"{:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}",
126+
"t1_{:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}",
140127
group_type, cardinality, batch_size, null_rate
141128
);
142129

143130
let string_vec = create_batch(batch_size, cardinality);
144131
let nullable_values = introduce_nulls(string_vec, null_rate);
145132
let col_ref = match group_type {
146-
GroupType::Utf8 => {
147-
Arc::new(StringArray::from(nullable_values.clone()))
148-
as ArrayRef
149-
}
150133
GroupType::Dictionary | GroupType::GroupValueRows => {
151134
strings_to_dict_array(nullable_values.clone())
152135
}
@@ -168,7 +151,7 @@ fn bench_single_column_group_values(c: &mut Criterion) {
168151
);
169152
});
170153

171-
/* Second benchmark that alternates between intern and emit to simulate more realistic usage patterns where the same group values is used across multiple batches of the same grouping column
154+
// Second benchmark that alternates between intern and emit to simulate more realistic usage patterns where the same group values is used across multiple batches of the same grouping column
172155
let multi_batch_name = format!(
173156
"multi_batch/{:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}",
174157
group_type, cardinality, batch_size, null_rate
@@ -200,7 +183,7 @@ fn bench_single_column_group_values(c: &mut Criterion) {
200183
},
201184
criterion::BatchSize::SmallInput,
202185
);
203-
});*/
186+
});
204187
}
205188
}
206189
}
@@ -209,7 +192,7 @@ fn bench_single_column_group_values(c: &mut Criterion) {
209192

210193
fn bench_repeated_intern_prefab_cols(c: &mut Criterion) {
211194
let cardinality = Cardinality::Small;
212-
let batch_size = BatchSize::Small;
195+
let batch_size = BatchSize::Large;
213196
let null_rate = NullRate::Low;
214197
let group_types = [GroupType::GroupValueRows, GroupType::Dictionary];
215198

@@ -218,9 +201,6 @@ fn bench_repeated_intern_prefab_cols(c: &mut Criterion) {
218201
let string_vec = create_batch(&batch_size, &cardinality);
219202
let nullable_values = introduce_nulls(string_vec, &null_rate);
220203
let col_ref = match group_type {
221-
GroupType::Utf8 => {
222-
Arc::new(StringArray::from(nullable_values.clone())) as ArrayRef
223-
}
224204
GroupType::Dictionary | GroupType::GroupValueRows => {
225205
strings_to_dict_array(nullable_values.clone())
226206
}

datafusion/physical-plan/src/aggregates/group_values/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,6 @@ pub fn new_group_values(
197197
DataType::Boolean => {
198198
return Ok(Box::new(GroupValuesBoolean::new()));
199199
}
200-
/*
201200
DataType::Dictionary(key_type, value_type) => {
202201
if supported_single_dictionary_value(value_type) {
203202
return match key_type.as_ref() {
@@ -247,7 +246,7 @@ pub fn new_group_values(
247246
)),
248247
};
249248
}
250-
}*/
249+
}
251250
_ => {}
252251
}
253252
}

0 commit comments

Comments
 (0)