@@ -24,6 +24,7 @@ use arrow::util::bench_util::{
2424 create_string_array_with_len, create_string_view_array_with_len,
2525} ;
2626use criterion:: { Criterion , SamplingMode , criterion_group, criterion_main} ;
27+ use datafusion_common:: ScalarValue ;
2728use datafusion_common:: config:: ConfigOptions ;
2829use datafusion_expr:: { ColumnarValue , ScalarFunctionArgs } ;
2930use datafusion_functions:: unicode;
@@ -172,6 +173,32 @@ fn create_pad_args<O: OffsetSizeTrait>(
172173 }
173174}
174175
176+ /// Create args for pad benchmark with scalar length and fill (common pattern:
177+ /// `lpad(column, 20, '0')`).
178+ fn create_scalar_pad_args < O : OffsetSizeTrait > (
179+ size : usize ,
180+ str_len : usize ,
181+ target_len : i64 ,
182+ fill : & str ,
183+ use_string_view : bool ,
184+ ) -> Vec < ColumnarValue > {
185+ if use_string_view {
186+ let string_array = create_string_view_array_with_len ( size, 0.1 , str_len, false ) ;
187+ vec ! [
188+ ColumnarValue :: Array ( Arc :: new( string_array) ) ,
189+ ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( target_len) ) ) ,
190+ ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( fill. to_string( ) ) ) ) ,
191+ ]
192+ } else {
193+ let string_array = create_string_array_with_len :: < O > ( size, 0.1 , str_len) ;
194+ vec ! [
195+ ColumnarValue :: Array ( Arc :: new( string_array) ) ,
196+ ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( target_len) ) ) ,
197+ ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( fill. to_string( ) ) ) ) ,
198+ ]
199+ }
200+ }
201+
175202fn criterion_benchmark ( c : & mut Criterion ) {
176203 for size in [ 1024 , 4096 ] {
177204 let mut group = c. benchmark_group ( format ! ( "lpad size={size}" ) ) ;
@@ -336,6 +363,118 @@ fn criterion_benchmark(c: &mut Criterion) {
336363 } ,
337364 ) ;
338365
366+ // --- Scalar length + fill benchmarks ---
367+
368+ // Utf8 with scalar length and fill (3-arg)
369+ let args = create_scalar_pad_args :: < i32 > ( size, 5 , 20 , "x" , false ) ;
370+ let arg_fields = args
371+ . iter ( )
372+ . enumerate ( )
373+ . map ( |( idx, arg) | {
374+ Field :: new ( format ! ( "arg_{idx}" ) , arg. data_type ( ) , true ) . into ( )
375+ } )
376+ . collect :: < Vec < _ > > ( ) ;
377+
378+ group. bench_function (
379+ format ! ( "lpad utf8 scalar [size={size}, str_len=5, target=20, fill='x']" ) ,
380+ |b| {
381+ b. iter ( || {
382+ let args_cloned = args. clone ( ) ;
383+ black_box ( unicode:: lpad ( ) . invoke_with_args ( ScalarFunctionArgs {
384+ args : args_cloned,
385+ arg_fields : arg_fields. clone ( ) ,
386+ number_rows : size,
387+ return_field : Field :: new ( "f" , DataType :: Utf8 , true ) . into ( ) ,
388+ config_options : Arc :: clone ( & config_options) ,
389+ } ) )
390+ } )
391+ } ,
392+ ) ;
393+
394+ // StringView with scalar length and fill (3-arg)
395+ let args = create_scalar_pad_args :: < i32 > ( size, 5 , 20 , "x" , true ) ;
396+ let arg_fields = args
397+ . iter ( )
398+ . enumerate ( )
399+ . map ( |( idx, arg) | {
400+ Field :: new ( format ! ( "arg_{idx}" ) , arg. data_type ( ) , true ) . into ( )
401+ } )
402+ . collect :: < Vec < _ > > ( ) ;
403+
404+ group. bench_function (
405+ format ! (
406+ "lpad stringview scalar [size={size}, str_len=5, target=20, fill='x']"
407+ ) ,
408+ |b| {
409+ b. iter ( || {
410+ let args_cloned = args. clone ( ) ;
411+ black_box ( unicode:: lpad ( ) . invoke_with_args ( ScalarFunctionArgs {
412+ args : args_cloned,
413+ arg_fields : arg_fields. clone ( ) ,
414+ number_rows : size,
415+ return_field : Field :: new ( "f" , DataType :: Utf8 , true ) . into ( ) ,
416+ config_options : Arc :: clone ( & config_options) ,
417+ } ) )
418+ } )
419+ } ,
420+ ) ;
421+
422+ // Utf8 with scalar length and unicode fill
423+ let args = create_scalar_pad_args :: < i32 > ( size, 5 , 20 , "é" , false ) ;
424+ let arg_fields = args
425+ . iter ( )
426+ . enumerate ( )
427+ . map ( |( idx, arg) | {
428+ Field :: new ( format ! ( "arg_{idx}" ) , arg. data_type ( ) , true ) . into ( )
429+ } )
430+ . collect :: < Vec < _ > > ( ) ;
431+
432+ group. bench_function (
433+ format ! (
434+ "lpad utf8 scalar unicode [size={size}, str_len=5, target=20, fill='é']"
435+ ) ,
436+ |b| {
437+ b. iter ( || {
438+ let args_cloned = args. clone ( ) ;
439+ black_box ( unicode:: lpad ( ) . invoke_with_args ( ScalarFunctionArgs {
440+ args : args_cloned,
441+ arg_fields : arg_fields. clone ( ) ,
442+ number_rows : size,
443+ return_field : Field :: new ( "f" , DataType :: Utf8 , true ) . into ( ) ,
444+ config_options : Arc :: clone ( & config_options) ,
445+ } ) )
446+ } )
447+ } ,
448+ ) ;
449+
450+ // Utf8 with scalar truncation (str_len > target) and unicode fill
451+ let args = create_scalar_pad_args :: < i32 > ( size, 20 , 5 , "é" , false ) ;
452+ let arg_fields = args
453+ . iter ( )
454+ . enumerate ( )
455+ . map ( |( idx, arg) | {
456+ Field :: new ( format ! ( "arg_{idx}" ) , arg. data_type ( ) , true ) . into ( )
457+ } )
458+ . collect :: < Vec < _ > > ( ) ;
459+
460+ group. bench_function (
461+ format ! (
462+ "lpad utf8 scalar truncate [size={size}, str_len=20, target=5, fill='é']"
463+ ) ,
464+ |b| {
465+ b. iter ( || {
466+ let args_cloned = args. clone ( ) ;
467+ black_box ( unicode:: lpad ( ) . invoke_with_args ( ScalarFunctionArgs {
468+ args : args_cloned,
469+ arg_fields : arg_fields. clone ( ) ,
470+ number_rows : size,
471+ return_field : Field :: new ( "f" , DataType :: Utf8 , true ) . into ( ) ,
472+ config_options : Arc :: clone ( & config_options) ,
473+ } ) )
474+ } )
475+ } ,
476+ ) ;
477+
339478 group. finish ( ) ;
340479 }
341480
@@ -502,6 +641,118 @@ fn criterion_benchmark(c: &mut Criterion) {
502641 } ,
503642 ) ;
504643
644+ // --- Scalar length + fill benchmarks ---
645+
646+ // Utf8 with scalar length and fill (3-arg)
647+ let args = create_scalar_pad_args :: < i32 > ( size, 5 , 20 , "x" , false ) ;
648+ let arg_fields = args
649+ . iter ( )
650+ . enumerate ( )
651+ . map ( |( idx, arg) | {
652+ Field :: new ( format ! ( "arg_{idx}" ) , arg. data_type ( ) , true ) . into ( )
653+ } )
654+ . collect :: < Vec < _ > > ( ) ;
655+
656+ group. bench_function (
657+ format ! ( "rpad utf8 scalar [size={size}, str_len=5, target=20, fill='x']" ) ,
658+ |b| {
659+ b. iter ( || {
660+ let args_cloned = args. clone ( ) ;
661+ black_box ( unicode:: rpad ( ) . invoke_with_args ( ScalarFunctionArgs {
662+ args : args_cloned,
663+ arg_fields : arg_fields. clone ( ) ,
664+ number_rows : size,
665+ return_field : Field :: new ( "f" , DataType :: Utf8 , true ) . into ( ) ,
666+ config_options : Arc :: clone ( & config_options) ,
667+ } ) )
668+ } )
669+ } ,
670+ ) ;
671+
672+ // StringView with scalar length and fill (3-arg)
673+ let args = create_scalar_pad_args :: < i32 > ( size, 5 , 20 , "x" , true ) ;
674+ let arg_fields = args
675+ . iter ( )
676+ . enumerate ( )
677+ . map ( |( idx, arg) | {
678+ Field :: new ( format ! ( "arg_{idx}" ) , arg. data_type ( ) , true ) . into ( )
679+ } )
680+ . collect :: < Vec < _ > > ( ) ;
681+
682+ group. bench_function (
683+ format ! (
684+ "rpad stringview scalar [size={size}, str_len=5, target=20, fill='x']"
685+ ) ,
686+ |b| {
687+ b. iter ( || {
688+ let args_cloned = args. clone ( ) ;
689+ black_box ( unicode:: rpad ( ) . invoke_with_args ( ScalarFunctionArgs {
690+ args : args_cloned,
691+ arg_fields : arg_fields. clone ( ) ,
692+ number_rows : size,
693+ return_field : Field :: new ( "f" , DataType :: Utf8 , true ) . into ( ) ,
694+ config_options : Arc :: clone ( & config_options) ,
695+ } ) )
696+ } )
697+ } ,
698+ ) ;
699+
700+ // Utf8 with scalar length and unicode fill
701+ let args = create_scalar_pad_args :: < i32 > ( size, 5 , 20 , "é" , false ) ;
702+ let arg_fields = args
703+ . iter ( )
704+ . enumerate ( )
705+ . map ( |( idx, arg) | {
706+ Field :: new ( format ! ( "arg_{idx}" ) , arg. data_type ( ) , true ) . into ( )
707+ } )
708+ . collect :: < Vec < _ > > ( ) ;
709+
710+ group. bench_function (
711+ format ! (
712+ "rpad utf8 scalar unicode [size={size}, str_len=5, target=20, fill='é']"
713+ ) ,
714+ |b| {
715+ b. iter ( || {
716+ let args_cloned = args. clone ( ) ;
717+ black_box ( unicode:: rpad ( ) . invoke_with_args ( ScalarFunctionArgs {
718+ args : args_cloned,
719+ arg_fields : arg_fields. clone ( ) ,
720+ number_rows : size,
721+ return_field : Field :: new ( "f" , DataType :: Utf8 , true ) . into ( ) ,
722+ config_options : Arc :: clone ( & config_options) ,
723+ } ) )
724+ } )
725+ } ,
726+ ) ;
727+
728+ // Utf8 with scalar truncation (str_len > target) and unicode fill
729+ let args = create_scalar_pad_args :: < i32 > ( size, 20 , 5 , "é" , false ) ;
730+ let arg_fields = args
731+ . iter ( )
732+ . enumerate ( )
733+ . map ( |( idx, arg) | {
734+ Field :: new ( format ! ( "arg_{idx}" ) , arg. data_type ( ) , true ) . into ( )
735+ } )
736+ . collect :: < Vec < _ > > ( ) ;
737+
738+ group. bench_function (
739+ format ! (
740+ "rpad utf8 scalar truncate [size={size}, str_len=20, target=5, fill='é']"
741+ ) ,
742+ |b| {
743+ b. iter ( || {
744+ let args_cloned = args. clone ( ) ;
745+ black_box ( unicode:: rpad ( ) . invoke_with_args ( ScalarFunctionArgs {
746+ args : args_cloned,
747+ arg_fields : arg_fields. clone ( ) ,
748+ number_rows : size,
749+ return_field : Field :: new ( "f" , DataType :: Utf8 , true ) . into ( ) ,
750+ config_options : Arc :: clone ( & config_options) ,
751+ } ) )
752+ } )
753+ } ,
754+ ) ;
755+
505756 group. finish ( ) ;
506757 }
507758}
0 commit comments