Skip to content

Commit deec858

Browse files
committed
fixed regressions & added test
1 parent 40a43c6 commit deec858

2 files changed

Lines changed: 438 additions & 264 deletions

File tree

datafusion/physical-plan/benches/single_column_aggr.rs

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
118
use arrow::array::{ArrayRef, StringDictionaryBuilder};
219
use arrow::datatypes::{DataType, Field, Schema, UInt8Type};
320
use criterion::{Criterion, criterion_group, criterion_main};
@@ -39,7 +56,7 @@ fn create_string_values(cardinality: &Cardinality) -> Vec<String> {
3956
Cardinality::Large => 200,
4057
};
4158
(0..num_values)
42-
.map(|i| format!("group_value_{:06}", i))
59+
.map(|i| format!("group_value_{i:06}"))
4360
.collect()
4461
}
4562
fn create_batch(batch_size: &BatchSize, cardinality: &Cardinality) -> Vec<String> {
@@ -84,7 +101,7 @@ fn introduce_nulls(values: Vec<String>, null_rate: &NullRate) -> Vec<Option<Stri
84101
.collect()
85102
}
86103

87-
fn generate_group_values(kind: GroupType) -> Box<dyn GroupValues> {
104+
fn generate_group_values(kind: &GroupType) -> Box<dyn GroupValues> {
88105
match kind {
89106
GroupType::GroupValueRows => {
90107
// we know this is going to hit the fallback path I.E GroupValueRows, but for the sake of avoiding making private items public call the public api
@@ -123,8 +140,7 @@ fn bench_single_column_group_values(c: &mut Criterion) {
123140
for null_rate in &null_rates {
124141
for group_type in &group_types {
125142
let group_name = format!(
126-
"t1_{:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}",
127-
group_type, cardinality, batch_size, null_rate
143+
"t1_{group_type:?}_cardinality_{cardinality:?}_batch_{batch_size:?}_null_rate_{null_rate:?}"
128144
);
129145

130146
let string_vec = create_batch(batch_size, cardinality);
@@ -138,7 +154,7 @@ fn bench_single_column_group_values(c: &mut Criterion) {
138154
b.iter_batched(
139155
|| {
140156
//create fresh group values for each iteration
141-
let gv = generate_group_values(group_type.clone());
157+
let gv = generate_group_values(group_type);
142158
let col = col_ref.clone();
143159
(gv, col)
144160
},
@@ -153,14 +169,13 @@ fn bench_single_column_group_values(c: &mut Criterion) {
153169

154170
// Second benchmark that alternates between intern and emit to simulate more realistic usage patterns where the same group values is used across multiple batches of the same grouping column
155171
let multi_batch_name = format!(
156-
"multi_batch/{:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}",
157-
group_type, cardinality, batch_size, null_rate
172+
"multi_batch/{group_type:?}_cardinality_{cardinality:?}_batch_{batch_size:?}_null_rate_{null_rate:?}"
158173
);
159174
c.bench_function(&multi_batch_name, |b| {
160175
b.iter_batched(
161176
|| {
162177
// setup - create 3 batches to simulate multiple record batches
163-
let gv = generate_group_values(group_type.clone());
178+
let gv = generate_group_values(group_type);
164179
let batch1 = col_ref.clone();
165180
let batch2 = col_ref.clone();
166181
let batch3 = col_ref.clone();
@@ -213,12 +228,11 @@ fn bench_repeated_intern_prefab_cols(c: &mut Criterion) {
213228
let arr4 = col_ref.clone();
214229

215230
let group_name = format!(
216-
"repeated_intern/{:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}",
217-
group_type, cardinality, batch_size, null_rate
231+
"repeated_intern/{group_type:?}_cardinality_{cardinality:?}_batch_{batch_size:?}_null_rate_{null_rate:?}"
218232
);
219233
c.bench_function(&group_name, |b| {
220234
b.iter_batched(
221-
|| generate_group_values(group_type.clone()),
235+
|| generate_group_values(&group_type),
222236
|mut group_values| {
223237
let mut groups = Vec::new();
224238

0 commit comments

Comments
 (0)