1- use arrow:: array:: { ArrayRef , StringArray , StringDictionaryBuilder } ;
1+ use arrow:: array:: { ArrayRef , StringDictionaryBuilder } ;
22use arrow:: datatypes:: { DataType , Field , Schema , UInt8Type } ;
33use criterion:: { Criterion , criterion_group, criterion_main} ;
44use datafusion_expr:: EmitTo ;
@@ -30,7 +30,6 @@ enum NullRate {
3030enum GroupType {
3131 Dictionary ,
3232 GroupValueRows ,
33- Utf8 ,
3433}
3534fn create_string_values ( cardinality : & Cardinality ) -> Vec < String > {
3635 let num_values = match cardinality {
@@ -100,53 +99,37 @@ fn generate_group_values(kind: GroupType) -> Box<dyn GroupValues> {
10099 // call custom path directly
101100 Box :: new ( GroupValuesDictionary :: < UInt8Type > :: new ( & DataType :: Utf8 ) )
102101 }
103- GroupType :: Utf8 => {
104- //let batch = create_batch(batch_size, cardinality);
105- //let array = StringArray::from(batch);
106- // Create GroupValues implementation for Utf8 type
107- let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
108- "group_col" ,
109- DataType :: Utf8 ,
110- false ,
111- ) ] ) ) ;
112- new_group_values ( schema, & GroupOrdering :: None ) . unwrap ( )
113- }
114102 }
115103}
116104
117105fn bench_single_column_group_values ( c : & mut Criterion ) {
118- let group_types = [ GroupType :: GroupValueRows , GroupType :: Dictionary ] ;
106+ let group_types = [ GroupType :: GroupValueRows , GroupType :: Dictionary ] ;
119107 let cardinalities = [
120108 Cardinality :: Xsmall ,
121- /*
122109 Cardinality :: Small ,
123- Cardinality::Medium,*/
110+ Cardinality :: Medium ,
124111 Cardinality :: Large ,
125112 ] ;
126- let batch_sizes = [
127- /*BatchSize::Small, BatchSize::Medium, */ BatchSize :: Large ,
128- ] ;
113+ let batch_sizes = [ BatchSize :: Small , BatchSize :: Medium , BatchSize :: Large ] ;
129114 let null_rates = [
130115 NullRate :: Zero ,
131- /*NullRate::Low, NullRate::Medium,*/ NullRate :: High ,
116+ NullRate :: Low ,
117+ NullRate :: Medium ,
118+ NullRate :: High ,
132119 ] ;
133120
134121 for cardinality in & cardinalities {
135122 for batch_size in & batch_sizes {
136123 for null_rate in & null_rates {
137124 for group_type in & group_types {
138125 let group_name = format ! (
139- "{:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}" ,
126+ "t1_ {:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}" ,
140127 group_type, cardinality, batch_size, null_rate
141128 ) ;
142129
143130 let string_vec = create_batch ( batch_size, cardinality) ;
144131 let nullable_values = introduce_nulls ( string_vec, null_rate) ;
145132 let col_ref = match group_type {
146- GroupType :: Utf8 => {
147- Arc :: new ( StringArray :: from ( nullable_values. clone ( ) ) )
148- as ArrayRef
149- }
150133 GroupType :: Dictionary | GroupType :: GroupValueRows => {
151134 strings_to_dict_array ( nullable_values. clone ( ) )
152135 }
@@ -168,7 +151,7 @@ fn bench_single_column_group_values(c: &mut Criterion) {
168151 ) ;
169152 } ) ;
170153
171- /* Second benchmark that alternates between intern and emit to simulate more realistic usage patterns where the same group values is used across multiple batches of the same grouping column
154+ // Second benchmark that alternates between intern and emit to simulate more realistic usage patterns where the same group values is used across multiple batches of the same grouping column
172155 let multi_batch_name = format ! (
173156 "multi_batch/{:?}_cardinality_{:?}_batch_{:?}_null_rate_{:?}" ,
174157 group_type, cardinality, batch_size, null_rate
@@ -200,7 +183,7 @@ fn bench_single_column_group_values(c: &mut Criterion) {
200183 } ,
201184 criterion:: BatchSize :: SmallInput ,
202185 ) ;
203- });*/
186+ } ) ;
204187 }
205188 }
206189 }
@@ -209,7 +192,7 @@ fn bench_single_column_group_values(c: &mut Criterion) {
209192
210193fn bench_repeated_intern_prefab_cols ( c : & mut Criterion ) {
211194 let cardinality = Cardinality :: Small ;
212- let batch_size = BatchSize :: Small ;
195+ let batch_size = BatchSize :: Large ;
213196 let null_rate = NullRate :: Low ;
214197 let group_types = [ GroupType :: GroupValueRows , GroupType :: Dictionary ] ;
215198
@@ -218,9 +201,6 @@ fn bench_repeated_intern_prefab_cols(c: &mut Criterion) {
218201 let string_vec = create_batch ( & batch_size, & cardinality) ;
219202 let nullable_values = introduce_nulls ( string_vec, & null_rate) ;
220203 let col_ref = match group_type {
221- GroupType :: Utf8 => {
222- Arc :: new ( StringArray :: from ( nullable_values. clone ( ) ) ) as ArrayRef
223- }
224204 GroupType :: Dictionary | GroupType :: GroupValueRows => {
225205 strings_to_dict_array ( nullable_values. clone ( ) )
226206 }
0 commit comments