Skip to content

Commit 12314c5

Browse files
authored
perf: Optimize array_to_string to avoid a copy (#20639)
## Which issue does this PR close? N/A ## Rationale for this change `array_to_string` used a temporary buffer to convert array elements to a string, before then copying that buffer into a `StringBuilder` via `append_value`. We can skip this copy by arranging to write directly into the `StringBuilder's buffer because it implements `fmt::Write`. This is a 12-18% performance improvement on the `array_to_string` benchmarks. ## What changes are included in this PR? * Optimization * Change the signature of the `append` closure to return `Result`, which avoids a lot of `unwrap` clutter ## Are these changes tested? Yes -- covered by existing tests and benchmarks. ## Are there any user-facing changes? No.
1 parent a5f490e commit 12314c5

1 file changed

Lines changed: 39 additions & 41 deletions

File tree

datafusion/functions-nested/src/string.rs

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use datafusion_common::utils::ListCoercion;
2929
use datafusion_common::{DataFusionError, Result, not_impl_err};
3030

3131
use std::any::Any;
32-
use std::fmt::Write;
32+
use std::fmt::{self, Write};
3333

3434
use crate::utils::make_scalar_function;
3535
use arrow::array::{
@@ -324,7 +324,6 @@ fn generate_string_array<O: OffsetSizeTrait>(
324324
null_strings: &[Option<&str>],
325325
) -> Result<StringArray> {
326326
let mut builder = StringBuilder::with_capacity(list_arr.len(), 0);
327-
let mut buf = String::new();
328327

329328
for ((arr, &delimiter), &null_string) in list_arr
330329
.iter()
@@ -336,17 +335,16 @@ fn generate_string_array<O: OffsetSizeTrait>(
336335
continue;
337336
};
338337

339-
buf.clear();
340338
let mut first = true;
341-
compute_array_to_string(&mut buf, &arr, delimiter, null_string, &mut first)?;
342-
builder.append_value(&buf);
339+
compute_array_to_string(&mut builder, &arr, delimiter, null_string, &mut first)?;
340+
builder.append_value("");
343341
}
344342

345343
Ok(builder.finish())
346344
}
347345

348346
fn compute_array_to_string(
349-
buf: &mut String,
347+
w: &mut impl Write,
350348
arr: &ArrayRef,
351349
delimiter: &str,
352350
null_string: Option<&str>,
@@ -358,7 +356,7 @@ fn compute_array_to_string(
358356
for i in 0..$list_array.len() {
359357
if !$list_array.is_null(i) {
360358
compute_array_to_string(
361-
buf,
359+
w,
362360
&$list_array.value(i),
363361
delimiter,
364362
null_string,
@@ -368,9 +366,9 @@ fn compute_array_to_string(
368366
if *first {
369367
*first = false;
370368
} else {
371-
buf.push_str(delimiter);
369+
w.write_str(delimiter)?;
372370
}
373-
buf.push_str(ns);
371+
w.write_str(ns)?;
374372
}
375373
}
376374
};
@@ -399,71 +397,69 @@ fn compute_array_to_string(
399397
DataFusionError::from(e)
400398
.context("Casting dictionary to values in compute_array_to_string")
401399
})?;
402-
compute_array_to_string(buf, &values, delimiter, null_string, first)
400+
compute_array_to_string(w, &values, delimiter, null_string, first)
403401
}
404402
Null => Ok(()),
405403
data_type => {
406404
macro_rules! str_leaf {
407405
($ARRAY_TYPE:ident) => {
408406
write_leaf_to_string(
409-
buf,
407+
w,
410408
downcast_arg!(arr, $ARRAY_TYPE),
411409
delimiter,
412410
null_string,
413411
first,
414-
|buf, x: &str| buf.push_str(x),
415-
)
412+
|w, x: &str| w.write_str(x),
413+
)?
416414
};
417415
}
418416
macro_rules! bool_leaf {
419417
($ARRAY_TYPE:ident) => {
420418
write_leaf_to_string(
421-
buf,
419+
w,
422420
downcast_arg!(arr, $ARRAY_TYPE),
423421
delimiter,
424422
null_string,
425423
first,
426-
|buf, x: bool| {
424+
|w, x: bool| {
427425
if x {
428-
buf.push_str("true");
426+
w.write_str("true")
429427
} else {
430-
buf.push_str("false");
428+
w.write_str("false")
431429
}
432430
},
433-
)
431+
)?
434432
};
435433
}
436434
macro_rules! int_leaf {
437435
($ARRAY_TYPE:ident) => {
438436
write_leaf_to_string(
439-
buf,
437+
w,
440438
downcast_arg!(arr, $ARRAY_TYPE),
441439
delimiter,
442440
null_string,
443441
first,
444-
|buf, x| {
442+
|w, x| {
445443
let mut itoa_buf = itoa::Buffer::new();
446-
buf.push_str(itoa_buf.format(x));
444+
w.write_str(itoa_buf.format(x))
447445
},
448-
)
446+
)?
449447
};
450448
}
451449
macro_rules! float_leaf {
452450
($ARRAY_TYPE:ident) => {
453451
write_leaf_to_string(
454-
buf,
452+
w,
455453
downcast_arg!(arr, $ARRAY_TYPE),
456454
delimiter,
457455
null_string,
458456
first,
459-
|buf, x| {
460-
// TODO: Consider switching to a more efficient
461-
// floating point display library (e.g., ryu). This
462-
// might result in some differences in the output
463-
// format, however.
464-
write!(buf, "{}", x).unwrap();
465-
},
466-
)
457+
// TODO: Consider switching to a more efficient
458+
// floating point display library (e.g., ryu). This
459+
// might result in some differences in the output
460+
// format, however.
461+
|w, x| write!(w, "{}", x),
462+
)?
467463
};
468464
}
469465
match data_type {
@@ -487,7 +483,7 @@ fn compute_array_to_string(
487483
.context("Casting to string in array_to_string")
488484
})?;
489485
return compute_array_to_string(
490-
buf,
486+
w,
491487
&str_arr,
492488
delimiter,
493489
null_string,
@@ -506,17 +502,18 @@ fn compute_array_to_string(
506502
}
507503

508504
/// Appends the string representation of each element in a leaf (non-list)
509-
/// array to `buf`, separated by `delimiter`. Null elements are rendered
505+
/// array to `w`, separated by `delimiter`. Null elements are rendered
510506
/// using `null_string` if provided, or skipped otherwise. The `append`
511-
/// closure controls how each non-null element is written to the buffer.
512-
fn write_leaf_to_string<'a, A, T>(
513-
buf: &mut String,
507+
/// closure controls how each non-null element is written.
508+
fn write_leaf_to_string<'a, W: Write, A, T>(
509+
w: &mut W,
514510
arr: &'a A,
515511
delimiter: &str,
516512
null_string: Option<&str>,
517513
first: &mut bool,
518-
append: impl Fn(&mut String, T),
519-
) where
514+
append: impl Fn(&mut W, T) -> fmt::Result,
515+
) -> Result<()>
516+
where
520517
&'a A: IntoIterator<Item = Option<T>>,
521518
{
522519
for x in arr {
@@ -528,14 +525,15 @@ fn write_leaf_to_string<'a, A, T>(
528525
if *first {
529526
*first = false;
530527
} else {
531-
buf.push_str(delimiter);
528+
w.write_str(delimiter)?;
532529
}
533530

534531
match x {
535-
Some(x) => append(buf, x),
536-
None => buf.push_str(null_string.unwrap()),
532+
Some(x) => append(w, x)?,
533+
None => w.write_str(null_string.unwrap())?,
537534
}
538535
}
536+
Ok(())
539537
}
540538

541539
/// String_to_array SQL function

0 commit comments

Comments
 (0)