Skip to content

Commit 720aaff

Browse files
authored
perf: Use bulk-NULL builder in replace (#21849)
## Which issue does this PR close? - Closes #21848. ## Rationale for this change We can use the new bulk-NULL string builder APIs to reduce NULL handling overhead in `replace`. There is a further opportunity for optimization once arrow/arrow-rs#9692 lands. Benchmarks: - `replace size=1024/replace_string_ascii_single [str_len=32]`: 17.42 µs → 14.35 µs, **−17.71%** - `replace size=1024/replace_string_view [str_len=32]`: 21.23 µs → 18.89 µs, **−11.05%** - `replace size=1024/replace_string [str_len=32]`: 20.42 µs → 18.51 µs, **−9.36%** - `replace size=1024/replace_large_string [str_len=32]`: 20.83 µs → 18.42 µs, **−11.60%** - `replace size=1024/replace_string_ascii_single [str_len=128]`: 10.61 µs → 10.37 µs, **−2.22%** - `replace size=1024/replace_string_view [str_len=128]`: 28.37 µs → 28.03 µs, **−1.21%** - `replace size=1024/replace_string [str_len=128]`: 27.96 µs → 28.03 µs, **+0.25%** (noise) - `replace size=1024/replace_large_string [str_len=128]`: 28.15 µs → 27.95 µs, **−0.70%** (noise) - `replace size=4096/replace_string_ascii_single [str_len=32]`: 68.40 µs → 56.15 µs, **−17.94%** - `replace size=4096/replace_string_view [str_len=32]`: 85.95 µs → 76.84 µs, **−10.61%** - `replace size=4096/replace_string [str_len=32]`: 82.79 µs → 74.71 µs, **−9.76%** - `replace size=4096/replace_large_string [str_len=32]`: 85.32 µs → 75.14 µs, **−11.94%** - `replace size=4096/replace_string_ascii_single [str_len=128]`: 38.35 µs → 39.10 µs, **+1.96%** (small regression) - `replace size=4096/replace_string_view [str_len=128]`: 132.77 µs → 128.08 µs, **−3.53%** - `replace size=4096/replace_string [str_len=128]`: 127.71 µs → 128.29 µs, **+0.46%** (noise) - `replace size=4096/replace_large_string [str_len=128]`: 131.97 µs → 128.68 µs, **−2.49%** ## What changes are included in this PR? * Use bulk-NULL string builders in `replace` ## Are these changes tested? Yes, covered by existing tests. ## Are there any user-facing changes? No.
1 parent 4876cdc commit 720aaff

2 files changed

Lines changed: 150 additions & 131 deletions

File tree

datafusion/functions/benches/replace.rs

Lines changed: 83 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::OffsetSizeTrait;
18+
use arrow::array::{GenericStringArray, OffsetSizeTrait, StringViewArray};
1919
use arrow::datatypes::{DataType, Field};
2020
use arrow::util::bench_util::{
2121
create_string_array_with_len, create_string_view_array_with_len,
@@ -29,31 +29,58 @@ use std::hint::black_box;
2929
use std::sync::Arc;
3030
use std::time::Duration;
3131

32+
/// Build a string array, dropping the null buffer when `null_density == 0.0`
33+
fn make_string_array<O: OffsetSizeTrait>(
34+
size: usize,
35+
null_density: f32,
36+
str_len: usize,
37+
) -> GenericStringArray<O> {
38+
let arr = create_string_array_with_len::<O>(size, null_density, str_len);
39+
if null_density == 0.0 {
40+
let (offsets, values, _) = arr.into_parts();
41+
GenericStringArray::<O>::new(offsets, values, None)
42+
} else {
43+
arr
44+
}
45+
}
46+
47+
fn make_string_view_array(
48+
size: usize,
49+
null_density: f32,
50+
str_len: usize,
51+
) -> StringViewArray {
52+
let arr = create_string_view_array_with_len(size, null_density, str_len, false);
53+
if null_density == 0.0 {
54+
let (views, buffers, _) = arr.into_parts();
55+
StringViewArray::new(views, buffers, None)
56+
} else {
57+
arr
58+
}
59+
}
60+
3261
fn create_args<O: OffsetSizeTrait>(
3362
size: usize,
3463
str_len: usize,
3564
force_view_types: bool,
3665
from_len: usize,
3766
to_len: usize,
67+
null_density: f32,
3868
) -> Vec<ColumnarValue> {
69+
// Apply `null_density` only to the string column; `from` and `to` are
70+
// typically not NULL in real-world workloads.
3971
if force_view_types {
40-
let string_array =
41-
Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
42-
let from_array = Arc::new(create_string_view_array_with_len(
43-
size, 0.1, from_len, false,
44-
));
45-
let to_array =
46-
Arc::new(create_string_view_array_with_len(size, 0.1, to_len, false));
72+
let string_array = Arc::new(make_string_view_array(size, null_density, str_len));
73+
let from_array = Arc::new(make_string_view_array(size, 0.0, from_len));
74+
let to_array = Arc::new(make_string_view_array(size, 0.0, to_len));
4775
vec![
4876
ColumnarValue::Array(string_array),
4977
ColumnarValue::Array(from_array),
5078
ColumnarValue::Array(to_array),
5179
]
5280
} else {
53-
let string_array =
54-
Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
55-
let from_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, from_len));
56-
let to_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, to_len));
81+
let string_array = Arc::new(make_string_array::<O>(size, null_density, str_len));
82+
let from_array = Arc::new(make_string_array::<O>(size, 0.0, from_len));
83+
let to_array = Arc::new(make_string_array::<O>(size, 0.0, to_len));
5784

5885
vec![
5986
ColumnarValue::Array(string_array),
@@ -90,98 +117,50 @@ fn criterion_benchmark(c: &mut Criterion) {
90117
group.sample_size(10);
91118
group.measurement_time(Duration::from_secs(10));
92119

93-
// ASCII single character replacement (fast path)
94-
let str_len = 32;
95-
let args = create_args::<i32>(size, str_len, false, 1, 1);
96-
group.bench_function(
97-
format!("replace_string_ascii_single [size={size}, str_len={str_len}]"),
98-
|b| {
99-
b.iter(|| {
100-
let args_cloned = args.clone();
101-
black_box(invoke_replace_with_args(args_cloned, size))
102-
})
103-
},
104-
);
105-
106-
// Multi-character strings (general path)
107-
let args = create_args::<i32>(size, str_len, true, 3, 5);
108-
group.bench_function(
109-
format!("replace_string_view [size={size}, str_len={str_len}]"),
110-
|b| {
111-
b.iter(|| {
112-
let args_cloned = args.clone();
113-
black_box(invoke_replace_with_args(args_cloned, size))
114-
})
115-
},
116-
);
117-
118-
let args = create_args::<i32>(size, str_len, false, 3, 5);
119-
group.bench_function(
120-
format!("replace_string [size={size}, str_len={str_len}]"),
121-
|b| {
122-
b.iter(|| {
123-
let args_cloned = args.clone();
124-
black_box(invoke_replace_with_args(args_cloned, size))
125-
})
126-
},
127-
);
128-
129-
let args = create_args::<i64>(size, str_len, false, 3, 5);
130-
group.bench_function(
131-
format!("replace_large_string [size={size}, str_len={str_len}]"),
132-
|b| {
133-
b.iter(|| {
134-
let args_cloned = args.clone();
135-
black_box(invoke_replace_with_args(args_cloned, size))
136-
})
137-
},
138-
);
139-
140-
// Larger strings
141-
let str_len = 128;
142-
let args = create_args::<i32>(size, str_len, false, 1, 1);
143-
group.bench_function(
144-
format!("replace_string_ascii_single [size={size}, str_len={str_len}]"),
145-
|b| {
146-
b.iter(|| {
147-
let args_cloned = args.clone();
148-
black_box(invoke_replace_with_args(args_cloned, size))
149-
})
150-
},
151-
);
152-
153-
let args = create_args::<i32>(size, str_len, true, 3, 5);
154-
group.bench_function(
155-
format!("replace_string_view [size={size}, str_len={str_len}]"),
156-
|b| {
157-
b.iter(|| {
158-
let args_cloned = args.clone();
159-
black_box(invoke_replace_with_args(args_cloned, size))
160-
})
161-
},
162-
);
163-
164-
let args = create_args::<i32>(size, str_len, false, 3, 5);
165-
group.bench_function(
166-
format!("replace_string [size={size}, str_len={str_len}]"),
167-
|b| {
168-
b.iter(|| {
169-
let args_cloned = args.clone();
170-
black_box(invoke_replace_with_args(args_cloned, size))
171-
})
172-
},
173-
);
174-
175-
let args = create_args::<i64>(size, str_len, false, 3, 5);
176-
group.bench_function(
177-
format!("replace_large_string [size={size}, str_len={str_len}]"),
178-
|b| {
179-
b.iter(|| {
180-
let args_cloned = args.clone();
181-
black_box(invoke_replace_with_args(args_cloned, size))
182-
})
183-
},
184-
);
120+
for &nulls in &[0.0_f32, 0.2] {
121+
for &str_len in &[32_usize, 128] {
122+
// ASCII single character replacement (fast path)
123+
let args = create_args::<i32>(size, str_len, false, 1, 1, nulls);
124+
group.bench_function(
125+
format!(
126+
"replace_string_ascii_single [size={size}, str_len={str_len}, nulls={nulls}]"
127+
),
128+
|b| {
129+
b.iter(|| {
130+
let args_cloned = args.clone();
131+
black_box(invoke_replace_with_args(args_cloned, size))
132+
})
133+
},
134+
);
135+
136+
// Multi-character strings (general path)
137+
let args = create_args::<i32>(size, str_len, true, 3, 5, nulls);
138+
group.bench_function(
139+
format!(
140+
"replace_string_view [size={size}, str_len={str_len}, nulls={nulls}]"
141+
),
142+
|b| {
143+
b.iter(|| {
144+
let args_cloned = args.clone();
145+
black_box(invoke_replace_with_args(args_cloned, size))
146+
})
147+
},
148+
);
149+
150+
let args = create_args::<i32>(size, str_len, false, 3, 5, nulls);
151+
group.bench_function(
152+
format!(
153+
"replace_string [size={size}, str_len={str_len}, nulls={nulls}]"
154+
),
155+
|b| {
156+
b.iter(|| {
157+
let args_cloned = args.clone();
158+
black_box(invoke_replace_with_args(args_cloned, size))
159+
})
160+
},
161+
);
162+
}
163+
}
185164

186165
group.finish();
187166
}

datafusion/functions/src/string/replace.rs

Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717

1818
use std::sync::Arc;
1919

20-
use arrow::array::{ArrayRef, GenericStringBuilder, OffsetSizeTrait};
20+
use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
21+
use arrow::buffer::NullBuffer;
2122
use arrow::datatypes::DataType;
2223

24+
use crate::strings::GenericStringArrayBuilder;
2325
use crate::utils::{make_scalar_function, utf8_to_str_type};
2426
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
2527
use datafusion_common::types::logical_string;
@@ -160,25 +162,44 @@ fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
160162
let from_array = as_string_view_array(&args[1])?;
161163
let to_array = as_string_view_array(&args[2])?;
162164

163-
let mut builder = GenericStringBuilder::<i32>::new();
165+
let len = string_array.len();
166+
let mut builder = GenericStringArrayBuilder::<i32>::with_capacity(len, 0);
164167
let mut buffer = String::new();
168+
let nulls = NullBuffer::union(
169+
NullBuffer::union(string_array.nulls(), from_array.nulls()).as_ref(),
170+
to_array.nulls(),
171+
);
165172

166-
for ((string, from), to) in string_array
167-
.iter()
168-
.zip(from_array.iter())
169-
.zip(to_array.iter())
170-
{
171-
match (string, from, to) {
172-
(Some(string), Some(from), Some(to)) => {
173-
buffer.clear();
174-
replace_into_string(&mut buffer, string, from, to);
175-
builder.append_value(&buffer);
173+
// Hoist the nulls.is_some() check out of the loop. LLVM does not always
174+
// unswitch this loop on its own (the Utf8View body is large enough to
175+
// exceed its cost-benefit threshold).
176+
if let Some(nulls_ref) = nulls.as_ref() {
177+
for i in 0..len {
178+
if nulls_ref.is_null(i) {
179+
builder.append_placeholder();
180+
continue;
176181
}
177-
_ => builder.append_null(),
182+
// SAFETY: union of input nulls is non-null at i, so each input is too.
183+
let string = unsafe { string_array.value_unchecked(i) };
184+
let from = unsafe { from_array.value_unchecked(i) };
185+
let to = unsafe { to_array.value_unchecked(i) };
186+
buffer.clear();
187+
replace_into_string(&mut buffer, string, from, to);
188+
builder.append_value(&buffer);
189+
}
190+
} else {
191+
for i in 0..len {
192+
// SAFETY: i < len, and no input has a null buffer.
193+
let string = unsafe { string_array.value_unchecked(i) };
194+
let from = unsafe { from_array.value_unchecked(i) };
195+
let to = unsafe { to_array.value_unchecked(i) };
196+
buffer.clear();
197+
replace_into_string(&mut buffer, string, from, to);
198+
builder.append_value(&buffer);
178199
}
179200
}
180201

181-
Ok(Arc::new(builder.finish()) as ArrayRef)
202+
Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)
182203
}
183204

184205
/// Replaces all occurrences in string of substring from with substring to.
@@ -188,25 +209,44 @@ fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
188209
let from_array = as_generic_string_array::<T>(&args[1])?;
189210
let to_array = as_generic_string_array::<T>(&args[2])?;
190211

191-
let mut builder = GenericStringBuilder::<T>::new();
212+
let len = string_array.len();
213+
let mut builder = GenericStringArrayBuilder::<T>::with_capacity(len, 0);
192214
let mut buffer = String::new();
215+
let nulls = NullBuffer::union(
216+
NullBuffer::union(string_array.nulls(), from_array.nulls()).as_ref(),
217+
to_array.nulls(),
218+
);
193219

194-
for ((string, from), to) in string_array
195-
.iter()
196-
.zip(from_array.iter())
197-
.zip(to_array.iter())
198-
{
199-
match (string, from, to) {
200-
(Some(string), Some(from), Some(to)) => {
201-
buffer.clear();
202-
replace_into_string(&mut buffer, string, from, to);
203-
builder.append_value(&buffer);
220+
// Hoist the nulls.is_some() check out of the loop. LLVM unswitches this
221+
// automatically today, but kept explicit so the no-nulls fast path is not
222+
// contingent on the optimizer's cost heuristic.
223+
if let Some(nulls_ref) = nulls.as_ref() {
224+
for i in 0..len {
225+
if nulls_ref.is_null(i) {
226+
builder.append_placeholder();
227+
continue;
204228
}
205-
_ => builder.append_null(),
229+
// SAFETY: union of input nulls is non-null at i, so each input is too.
230+
let string = unsafe { string_array.value_unchecked(i) };
231+
let from = unsafe { from_array.value_unchecked(i) };
232+
let to = unsafe { to_array.value_unchecked(i) };
233+
buffer.clear();
234+
replace_into_string(&mut buffer, string, from, to);
235+
builder.append_value(&buffer);
236+
}
237+
} else {
238+
for i in 0..len {
239+
// SAFETY: i < len, and no input has a null buffer.
240+
let string = unsafe { string_array.value_unchecked(i) };
241+
let from = unsafe { from_array.value_unchecked(i) };
242+
let to = unsafe { to_array.value_unchecked(i) };
243+
buffer.clear();
244+
replace_into_string(&mut buffer, string, from, to);
245+
builder.append_value(&buffer);
206246
}
207247
}
208248

209-
Ok(Arc::new(builder.finish()) as ArrayRef)
249+
Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)
210250
}
211251

212252
/// Helper function to perform string replacement into a reusable String buffer

0 commit comments

Comments
 (0)