@@ -21,80 +21,62 @@ use arrow::util::bench_util::{
2121 create_string_array_with_len, create_string_view_array_with_len,
2222} ;
2323use criterion:: { Criterion , SamplingMode , criterion_group, criterion_main} ;
24- use datafusion_common:: DataFusionError ;
2524use datafusion_common:: config:: ConfigOptions ;
25+ use datafusion_common:: { DataFusionError , ScalarValue } ;
2626use datafusion_expr:: { ColumnarValue , ScalarFunctionArgs } ;
2727use datafusion_functions:: unicode;
2828use std:: hint:: black_box;
2929use std:: sync:: Arc ;
3030
31+ fn make_i64_arg ( value : i64 , size : usize , as_scalar : bool ) -> ColumnarValue {
32+ if as_scalar {
33+ ColumnarValue :: Scalar ( ScalarValue :: from ( value) )
34+ } else {
35+ ColumnarValue :: Array ( Arc :: new ( Int64Array :: from ( vec ! [ value; size] ) ) )
36+ }
37+ }
38+
3139fn create_args_without_count < O : OffsetSizeTrait > (
3240 size : usize ,
3341 str_len : usize ,
3442 start_half_way : bool ,
3543 force_view_types : bool ,
44+ scalar_start : bool ,
3645) -> Vec < ColumnarValue > {
37- let start_array = Arc :: new ( Int64Array :: from (
38- ( 0 ..size)
39- . map ( |_| {
40- if start_half_way {
41- ( str_len / 2 ) as i64
42- } else {
43- 1i64
44- }
45- } )
46- . collect :: < Vec < _ > > ( ) ,
47- ) ) ;
48-
49- if force_view_types {
50- let string_array =
51- Arc :: new ( create_string_view_array_with_len ( size, 0.1 , str_len, false ) ) ;
52- vec ! [
53- ColumnarValue :: Array ( string_array) ,
54- ColumnarValue :: Array ( start_array) ,
55- ]
46+ let start_val = if start_half_way {
47+ ( str_len / 2 ) as i64
5648 } else {
57- let string_array =
58- Arc :: new ( create_string_array_with_len :: < O > ( size, 0.1 , str_len) ) ;
49+ 1i64
50+ } ;
51+ let start = make_i64_arg ( start_val, size, scalar_start) ;
5952
60- vec ! [
61- ColumnarValue :: Array ( string_array) ,
62- ColumnarValue :: Array ( Arc :: clone( & start_array) as ArrayRef ) ,
63- ]
64- }
53+ let string_array: ArrayRef = if force_view_types {
54+ Arc :: new ( create_string_view_array_with_len ( size, 0.1 , str_len, false ) )
55+ } else {
56+ Arc :: new ( create_string_array_with_len :: < O > ( size, 0.1 , str_len) )
57+ } ;
58+
59+ vec ! [ ColumnarValue :: Array ( string_array) , start]
6560}
6661
6762fn create_args_with_count < O : OffsetSizeTrait > (
6863 size : usize ,
6964 str_len : usize ,
7065 count_max : usize ,
7166 force_view_types : bool ,
67+ scalar_args : bool ,
7268) -> Vec < ColumnarValue > {
73- let start_array =
74- Arc :: new ( Int64Array :: from ( ( 0 ..size) . map ( |_| 1 ) . collect :: < Vec < _ > > ( ) ) ) ;
7569 let count = count_max. min ( str_len) as i64 ;
76- let count_array = Arc :: new ( Int64Array :: from (
77- ( 0 ..size) . map ( |_| count) . collect :: < Vec < _ > > ( ) ,
78- ) ) ;
79-
80- if force_view_types {
81- let string_array =
82- Arc :: new ( create_string_view_array_with_len ( size, 0.1 , str_len, false ) ) ;
83- vec ! [
84- ColumnarValue :: Array ( string_array) ,
85- ColumnarValue :: Array ( start_array) ,
86- ColumnarValue :: Array ( count_array) ,
87- ]
70+ let start = make_i64_arg ( 1i64 , size, scalar_args) ;
71+ let count = make_i64_arg ( count, size, scalar_args) ;
72+
73+ let string_array: ArrayRef = if force_view_types {
74+ Arc :: new ( create_string_view_array_with_len ( size, 0.1 , str_len, false ) )
8875 } else {
89- let string_array =
90- Arc :: new ( create_string_array_with_len :: < O > ( size, 0.1 , str_len) ) ;
91-
92- vec ! [
93- ColumnarValue :: Array ( string_array) ,
94- ColumnarValue :: Array ( Arc :: clone( & start_array) as ArrayRef ) ,
95- ColumnarValue :: Array ( Arc :: clone( & count_array) as ArrayRef ) ,
96- ]
97- }
76+ Arc :: new ( create_string_array_with_len :: < O > ( size, 0.1 , str_len) )
77+ } ;
78+
79+ vec ! [ ColumnarValue :: Array ( string_array) , start, count]
9880}
9981
10082#[ expect( clippy:: needless_pass_by_value) ]
@@ -122,22 +104,22 @@ fn criterion_benchmark(c: &mut Criterion) {
122104 for size in [ 1024 , 4096 ] {
123105 // string_len = 12, substring_len=6 (see `create_args_without_count`)
124106 let len = 12 ;
125- let mut group = c. benchmark_group ( "SHORTER THAN 12 " ) ;
107+ let mut group = c. benchmark_group ( "substr, no count, short strings " ) ;
126108 group. sampling_mode ( SamplingMode :: Flat ) ;
127109 group. sample_size ( 10 ) ;
128110
129- let args = create_args_without_count :: < i32 > ( size, len, true , true ) ;
111+ let args = create_args_without_count :: < i32 > ( size, len, true , true , false ) ;
130112 group. bench_function (
131113 format ! ( "substr_string_view [size={size}, strlen={len}]" ) ,
132114 |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
133115 ) ;
134116
135- let args = create_args_without_count :: < i32 > ( size, len, false , false ) ;
117+ let args = create_args_without_count :: < i32 > ( size, len, false , false , false ) ;
136118 group. bench_function ( format ! ( "substr_string [size={size}, strlen={len}]" ) , |b| {
137119 b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) )
138120 } ) ;
139121
140- let args = create_args_without_count :: < i64 > ( size, len, true , false ) ;
122+ let args = create_args_without_count :: < i64 > ( size, len, true , false , false ) ;
141123 group. bench_function (
142124 format ! ( "substr_large_string [size={size}, strlen={len}]" ) ,
143125 |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
@@ -148,23 +130,23 @@ fn criterion_benchmark(c: &mut Criterion) {
148130 // string_len = 128, start=1, count=64, substring_len=64
149131 let len = 128 ;
150132 let count = 64 ;
151- let mut group = c. benchmark_group ( "LONGER THAN 12 " ) ;
133+ let mut group = c. benchmark_group ( "substr, with count, long strings " ) ;
152134 group. sampling_mode ( SamplingMode :: Flat ) ;
153135 group. sample_size ( 10 ) ;
154136
155- let args = create_args_with_count :: < i32 > ( size, len, count, true ) ;
137+ let args = create_args_with_count :: < i32 > ( size, len, count, true , false ) ;
156138 group. bench_function (
157139 format ! ( "substr_string_view [size={size}, count={count}, strlen={len}]" , ) ,
158140 |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
159141 ) ;
160142
161- let args = create_args_with_count :: < i32 > ( size, len, count, false ) ;
143+ let args = create_args_with_count :: < i32 > ( size, len, count, false , false ) ;
162144 group. bench_function (
163145 format ! ( "substr_string [size={size}, count={count}, strlen={len}]" , ) ,
164146 |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
165147 ) ;
166148
167- let args = create_args_with_count :: < i64 > ( size, len, count, false ) ;
149+ let args = create_args_with_count :: < i64 > ( size, len, count, false , false ) ;
168150 group. bench_function (
169151 format ! ( "substr_large_string [size={size}, count={count}, strlen={len}]" , ) ,
170152 |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
@@ -175,29 +157,116 @@ fn criterion_benchmark(c: &mut Criterion) {
175157 // string_len = 128, start=1, count=6, substring_len=6
176158 let len = 128 ;
177159 let count = 6 ;
178- let mut group = c. benchmark_group ( "SRC_LEN > 12, SUB_LEN < 12 " ) ;
160+ let mut group = c. benchmark_group ( "substr, short count, long strings " ) ;
179161 group. sampling_mode ( SamplingMode :: Flat ) ;
180162 group. sample_size ( 10 ) ;
181163
182- let args = create_args_with_count :: < i32 > ( size, len, count, true ) ;
164+ let args = create_args_with_count :: < i32 > ( size, len, count, true , false ) ;
183165 group. bench_function (
184166 format ! ( "substr_string_view [size={size}, count={count}, strlen={len}]" , ) ,
185167 |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
186168 ) ;
187169
188- let args = create_args_with_count :: < i32 > ( size, len, count, false ) ;
170+ let args = create_args_with_count :: < i32 > ( size, len, count, false , false ) ;
189171 group. bench_function (
190172 format ! ( "substr_string [size={size}, count={count}, strlen={len}]" , ) ,
191173 |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
192174 ) ;
193175
194- let args = create_args_with_count :: < i64 > ( size, len, count, false ) ;
176+ let args = create_args_with_count :: < i64 > ( size, len, count, false , false ) ;
195177 group. bench_function (
196178 format ! ( "substr_large_string [size={size}, count={count}, strlen={len}]" , ) ,
197179 |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
198180 ) ;
199181
200182 group. finish ( ) ;
183+
184+ // Scalar start, no count, short strings
185+ let len = 12 ;
186+ let mut group =
187+ c. benchmark_group ( "substr, scalar start, no count, short strings" ) ;
188+ group. sampling_mode ( SamplingMode :: Flat ) ;
189+ group. sample_size ( 10 ) ;
190+
191+ let args = create_args_without_count :: < i32 > ( size, len, true , true , true ) ;
192+ group. bench_function (
193+ format ! ( "substr_string_view [size={size}, strlen={len}]" ) ,
194+ |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
195+ ) ;
196+
197+ let args = create_args_without_count :: < i32 > ( size, len, false , false , true ) ;
198+ group. bench_function ( format ! ( "substr_string [size={size}, strlen={len}]" ) , |b| {
199+ b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) )
200+ } ) ;
201+
202+ group. finish ( ) ;
203+
204+ // Scalar start, no count, long strings
205+ let len = 128 ;
206+ let mut group = c. benchmark_group ( "substr, scalar start, no count, long strings" ) ;
207+ group. sampling_mode ( SamplingMode :: Flat ) ;
208+ group. sample_size ( 10 ) ;
209+
210+ let args = create_args_without_count :: < i32 > ( size, len, true , true , true ) ;
211+ group. bench_function (
212+ format ! ( "substr_string_view [size={size}, strlen={len}]" ) ,
213+ |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
214+ ) ;
215+
216+ let args = create_args_without_count :: < i32 > ( size, len, false , false , true ) ;
217+ group. bench_function ( format ! ( "substr_string [size={size}, strlen={len}]" ) , |b| {
218+ b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) )
219+ } ) ;
220+
221+ group. finish ( ) ;
222+
223+ // Scalar start and count, short strings
224+ let len = 12 ;
225+ let count = 6 ;
226+ let mut group = c. benchmark_group ( "substr, scalar args, short strings" ) ;
227+ group. sampling_mode ( SamplingMode :: Flat ) ;
228+ group. sample_size ( 10 ) ;
229+
230+ let args = create_args_with_count :: < i32 > ( size, len, count, true , true ) ;
231+ group. bench_function (
232+ format ! ( "substr_string_view [size={size}, count={count}, strlen={len}]" ) ,
233+ |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
234+ ) ;
235+
236+ let args = create_args_with_count :: < i32 > ( size, len, count, false , true ) ;
237+ group. bench_function (
238+ format ! ( "substr_string [size={size}, count={count}, strlen={len}]" ) ,
239+ |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
240+ ) ;
241+
242+ group. finish ( ) ;
243+
244+ // Scalar start and count, long strings
245+ let len = 128 ;
246+ let count = 64 ;
247+ let mut group = c. benchmark_group ( "substr, scalar args, long strings" ) ;
248+ group. sampling_mode ( SamplingMode :: Flat ) ;
249+ group. sample_size ( 10 ) ;
250+
251+ let args = create_args_with_count :: < i32 > ( size, len, count, true , true ) ;
252+ group. bench_function (
253+ format ! ( "substr_string_view [size={size}, count={count}, strlen={len}]" ) ,
254+ |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
255+ ) ;
256+
257+ let args = create_args_with_count :: < i32 > ( size, len, count, false , true ) ;
258+ group. bench_function (
259+ format ! ( "substr_string [size={size}, count={count}, strlen={len}]" ) ,
260+ |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
261+ ) ;
262+
263+ let args = create_args_with_count :: < i64 > ( size, len, count, false , true ) ;
264+ group. bench_function (
265+ format ! ( "substr_large_string [size={size}, count={count}, strlen={len}]" ) ,
266+ |b| b. iter ( || black_box ( invoke_substr_with_args ( args. clone ( ) , size) ) ) ,
267+ ) ;
268+
269+ group. finish ( ) ;
201270 }
202271}
203272
0 commit comments