Skip to content

Commit 9c85ac6

Browse files
authored
perf: Fix quadratic behavior of to_array_of_size (#20459)
## Which issue does this PR close? - Closes #20458. - Closes #18159. ## Rationale for this change When `array_to_size(n)` was called on a `List`-like object containing a `StringViewArray` with `b` data buffers, the previous implementation returned a list containing a `StringViewArray` with `n*b` buffers, which results in catastrophically bad performance if `b` grows even somewhat large. This issue was previously noticed causing poor nested loop join performance. #18161 adjusted the NLJ code to avoid calling `to_array_of_size` for this reason, but didn't attempt to fix the underlying issue in `to_array_of_size`. This PR doesn't attempt to revert the change to the NLJ code: the special-case code added in #18161 is still slightly faster than `to_array_of_size` after this optimization. It might be possible to address that in a future PR. ## What changes are included in this PR? * Instead of using `repeat_n` + `concat` to merge together `n` copies of the `StringViewArray`, we instead use `take`, which preserves the same number of buffers as the input `StringViewArray`. * Add a new benchmark for this situation * Add more unit tests for `to_array_of_size` ## Are these changes tested? Yes and benchmarked. ## Are there any user-facing changes? No. ## AI usage Iterated on the problem with Claude Code; I understand the problem and the solution.
1 parent a9c0901 commit 9c85ac6

4 files changed

Lines changed: 204 additions & 10 deletions

File tree

datafusion/common/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ sql = ["sqlparser"]
5757
harness = false
5858
name = "with_hashes"
5959

60+
[[bench]]
61+
harness = false
62+
name = "scalar_to_array"
63+
6064
[dependencies]
6165
ahash = { workspace = true }
6266
apache-avro = { workspace = true, features = [
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Benchmarks for `ScalarValue::to_array_of_size`, focusing on List
19+
//! scalars.
20+
21+
use arrow::array::{Array, ArrayRef, AsArray, StringViewBuilder};
22+
use arrow::datatypes::{DataType, Field};
23+
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
24+
use datafusion_common::ScalarValue;
25+
use datafusion_common::utils::SingleRowListArrayBuilder;
26+
use std::sync::Arc;
27+
28+
/// Build a `ScalarValue::List` of `num_elements` Utf8View strings whose
29+
/// inner StringViewArray has `num_buffers` data buffers.
30+
fn make_list_scalar(num_elements: usize, num_buffers: usize) -> ScalarValue {
31+
let elements_per_buffer = num_elements.div_ceil(num_buffers);
32+
33+
let mut small_arrays: Vec<ArrayRef> = Vec::new();
34+
let mut remaining = num_elements;
35+
for buf_idx in 0..num_buffers {
36+
let count = remaining.min(elements_per_buffer);
37+
if count == 0 {
38+
break;
39+
}
40+
let start = buf_idx * elements_per_buffer;
41+
let mut builder = StringViewBuilder::with_capacity(count);
42+
for i in start..start + count {
43+
builder.append_value(format!("{i:024x}"));
44+
}
45+
small_arrays.push(Arc::new(builder.finish()) as ArrayRef);
46+
remaining -= count;
47+
}
48+
49+
let refs: Vec<&dyn Array> = small_arrays.iter().map(|a| a.as_ref()).collect();
50+
let concated = arrow::compute::concat(&refs).unwrap();
51+
52+
let list_array = SingleRowListArrayBuilder::new(concated)
53+
.with_field(&Field::new_list_field(DataType::Utf8View, true))
54+
.build_list_array();
55+
ScalarValue::List(Arc::new(list_array))
56+
}
57+
58+
/// We want to measure the cost of doing the conversion and then also accessing
59+
/// the results, to model what would happen during query evaluation.
60+
fn consume_list_array(arr: &ArrayRef) {
61+
let list_arr = arr.as_list::<i32>();
62+
let mut total_len: usize = 0;
63+
for i in 0..list_arr.len() {
64+
let inner = list_arr.value(i);
65+
let sv = inner.as_string_view();
66+
for j in 0..sv.len() {
67+
total_len += sv.value(j).len();
68+
}
69+
}
70+
std::hint::black_box(total_len);
71+
}
72+
73+
fn bench_list_to_array_of_size(c: &mut Criterion) {
74+
let mut group = c.benchmark_group("list_to_array_of_size");
75+
76+
let num_elements = 1245;
77+
let scalar_1buf = make_list_scalar(num_elements, 1);
78+
let scalar_50buf = make_list_scalar(num_elements, 50);
79+
80+
for batch_size in [256, 1024] {
81+
group.bench_with_input(
82+
BenchmarkId::new("1_buffer", batch_size),
83+
&batch_size,
84+
|b, &sz| {
85+
b.iter(|| {
86+
let arr = scalar_1buf.to_array_of_size(sz).unwrap();
87+
consume_list_array(&arr);
88+
});
89+
},
90+
);
91+
group.bench_with_input(
92+
BenchmarkId::new("50_buffers", batch_size),
93+
&batch_size,
94+
|b, &sz| {
95+
b.iter(|| {
96+
let arr = scalar_50buf.to_array_of_size(sz).unwrap();
97+
consume_list_array(&arr);
98+
});
99+
},
100+
);
101+
}
102+
103+
group.finish();
104+
}
105+
106+
criterion_group!(benches, bench_list_to_array_of_size);
107+
criterion_main!(benches);

datafusion/common/src/scalar/mod.rs

Lines changed: 89 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3008,7 +3008,7 @@ impl ScalarValue {
30083008
///
30093009
/// Errors if `self` is
30103010
/// - a decimal that fails be converted to a decimal array of size
3011-
/// - a `FixedsizeList` that fails to be concatenated into an array of size
3011+
/// - a `FixedSizeList` that fails to be concatenated into an array of size
30123012
/// - a `List` that fails to be concatenated into an array of size
30133013
/// - a `Dictionary` that fails be converted to a dictionary array of size
30143014
pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
@@ -3434,13 +3434,22 @@ impl ScalarValue {
34343434
}
34353435
}
34363436

3437+
/// Repeats the rows of `arr` `size` times, producing an array with
3438+
/// `arr.len() * size` total rows.
34373439
fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
3438-
let arrays = repeat_n(arr, size).collect::<Vec<_>>();
3439-
let ret = match !arrays.is_empty() {
3440-
true => arrow::compute::concat(arrays.as_slice())?,
3441-
false => arr.slice(0, 0),
3442-
};
3443-
Ok(ret)
3440+
if size == 0 {
3441+
return Ok(arr.slice(0, 0));
3442+
}
3443+
3444+
// Examples: given `arr = [[A, B, C]]` and `size = 3`, `indices = [0, 0, 0]` and
3445+
// the result is `[[A, B, C], [A, B, C], [A, B, C]]`.
3446+
//
3447+
// Given `arr = [[A, B], [C]]` and `size = 2`, `indices = [0, 1, 0, 1]` and the
3448+
// result is `[[A, B], [C], [A, B], [C]]`. (But in practice, we are always called
3449+
// with `arr.len() == 1`.)
3450+
let n = arr.len() as u32;
3451+
let indices = UInt32Array::from_iter_values((0..size).flat_map(|_| 0..n));
3452+
Ok(arrow::compute::take(arr, &indices, None)?)
34443453
}
34453454

34463455
/// Retrieve ScalarValue for each row in `array`
@@ -5532,6 +5541,79 @@ mod tests {
55325541
assert_eq!(empty_array.len(), 0);
55335542
}
55345543

5544+
#[test]
5545+
fn test_to_array_of_size_list_size_one() {
5546+
// size=1 takes the fast path (Arc::clone)
5547+
let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5548+
Some(10),
5549+
Some(20),
5550+
])]);
5551+
let sv = ScalarValue::List(Arc::new(arr.clone()));
5552+
let result = sv.to_array_of_size(1).unwrap();
5553+
assert_eq!(result.as_list::<i32>(), &arr);
5554+
}
5555+
5556+
#[test]
5557+
fn test_to_array_of_size_list_empty_inner() {
5558+
// A list scalar containing an empty list: [[]]
5559+
let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![])]);
5560+
let sv = ScalarValue::List(Arc::new(arr));
5561+
let result = sv.to_array_of_size(3).unwrap();
5562+
let result_list = result.as_list::<i32>();
5563+
assert_eq!(result_list.len(), 3);
5564+
for i in 0..3 {
5565+
assert_eq!(result_list.value(i).len(), 0);
5566+
}
5567+
}
5568+
5569+
#[test]
5570+
fn test_to_array_of_size_large_list() {
5571+
let arr =
5572+
LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
5573+
Some(100),
5574+
Some(200),
5575+
])]);
5576+
let sv = ScalarValue::LargeList(Arc::new(arr));
5577+
let result = sv.to_array_of_size(3).unwrap();
5578+
let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![
5579+
Some(vec![Some(100), Some(200)]),
5580+
Some(vec![Some(100), Some(200)]),
5581+
Some(vec![Some(100), Some(200)]),
5582+
]);
5583+
assert_eq!(result.as_list::<i64>(), &expected);
5584+
}
5585+
5586+
#[test]
5587+
fn test_list_to_array_of_size_multi_row() {
5588+
// Call list_to_array_of_size directly with arr.len() > 1
5589+
let arr = Int32Array::from(vec![Some(10), None, Some(30)]);
5590+
let result = ScalarValue::list_to_array_of_size(&arr, 3).unwrap();
5591+
let result = result.as_primitive::<Int32Type>();
5592+
assert_eq!(
5593+
result.iter().collect::<Vec<_>>(),
5594+
vec![
5595+
Some(10),
5596+
None,
5597+
Some(30),
5598+
Some(10),
5599+
None,
5600+
Some(30),
5601+
Some(10),
5602+
None,
5603+
Some(30),
5604+
]
5605+
);
5606+
}
5607+
5608+
#[test]
5609+
fn test_to_array_of_size_null_list() {
5610+
let dt = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
5611+
let sv = ScalarValue::try_from(&dt).unwrap();
5612+
let result = sv.to_array_of_size(3).unwrap();
5613+
assert_eq!(result.len(), 3);
5614+
assert_eq!(result.null_count(), 3);
5615+
}
5616+
55355617
/// See https://github.com/apache/datafusion/issues/18870
55365618
#[test]
55375619
fn test_to_array_of_size_for_none_fsb() {

datafusion/physical-plan/src/joins/nested_loop_join.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2011,9 +2011,10 @@ fn build_row_join_batch(
20112011
// Broadcast the single build-side row to match the filtered
20122012
// probe-side batch length
20132013
let original_left_array = build_side_batch.column(column_index.index);
2014-
// Avoid using `ScalarValue::to_array_of_size()` for `List(Utf8View)` to avoid
2015-
// deep copies for buffers inside `Utf8View` array. See below for details.
2016-
// https://github.com/apache/datafusion/issues/18159
2014+
2015+
// Use `arrow::compute::take` directly for `List(Utf8View)` rather
2016+
// than going through `ScalarValue::to_array_of_size()`, which
2017+
// avoids some intermediate allocations.
20172018
//
20182019
// In other cases, `to_array_of_size()` is faster.
20192020
match original_left_array.data_type() {

0 commit comments

Comments
 (0)