Skip to content

Commit 0401a47

Browse files
authored
fix: Avoid integer overflow in substr() (#20199)
## Rationale for this change Evaluating `SELECT SUBSTR('', 2, 9223372036854775807);` yields (in a debug build): ``` thread 'main' (41414592) panicked at datafusion/functions/src/string/split_part.rs:236:47: attempt to negate with overflow stack backtrace: 0: __rustc::rust_begin_unwind at /rustc/ded5c06cf21d2b93bffd5d884aa6e96934ee4234/library/std/src/panicking.rs:698:5 1: core::panicking::panic_fmt at /rustc/ded5c06cf21d2b93bffd5d884aa6e96934ee4234/library/core/src/panicking.rs:80:14 2: core::panicking::panic_const::panic_const_neg_overflow at /rustc/ded5c06cf21d2b93bffd5d884aa6e96934ee4234/library/core/src/panicking.rs:180:17 3: datafusion_functions::string::split_part::split_part_impl::{{closure}} at ./datafusion/functions/src/string/split_part.rs:236:47 4: core::iter::traits::iterator::Iterator::try_for_each::call::{{closure}} at /Users/neilconway/.rustup/toolchains/1.92.0-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:2485:26 [...] ``` Found via fuzzing. ## Are these changes tested? Yes, added a unit test.
1 parent 85cdf53 commit 0401a47

1 file changed

Lines changed: 22 additions & 7 deletions

File tree

datafusion/functions/src/unicode/substr.rs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,10 @@ pub fn get_true_start_end(
185185
let start = start.checked_sub(1).unwrap_or(start);
186186

187187
let end = match count {
188-
Some(count) => start + count as i64,
188+
Some(count) => {
189+
let count_i64 = i64::try_from(count).unwrap_or(i64::MAX);
190+
start.saturating_add(count_i64)
191+
}
189192
None => input.len() as i64,
190193
};
191194
let count_to_end = count.is_some();
@@ -247,19 +250,19 @@ pub fn enable_ascii_fast_path<'a, V: StringArrayType<'a>>(
247250

248251
// HACK: can be simplified if function has specialized
249252
// implementation for `ScalarValue` (implement without `make_scalar_function()`)
250-
let avg_prefix_len = start
253+
let total_prefix_len = start
251254
.iter()
252255
.zip(count.iter())
253256
.take(n_sample)
254257
.map(|(start, count)| {
255258
let start = start.unwrap_or(0);
256259
let count = count.unwrap_or(0);
257260
// To get substring, need to decode from 0 to start+count instead of start to start+count
258-
start + count
261+
start.saturating_add(count)
259262
})
260-
.sum::<i64>();
263+
.fold(0i64, |acc, val| acc.saturating_add(val));
261264

262-
avg_prefix_len as f64 / n_sample as f64 <= short_prefix_threshold
265+
(total_prefix_len as f64 / n_sample as f64) <= short_prefix_threshold
263266
}
264267
None => false,
265268
};
@@ -810,7 +813,7 @@ mod tests {
810813
SubstrFunc::new(),
811814
vec![
812815
ColumnarValue::Scalar(ScalarValue::from("abc")),
813-
ColumnarValue::Scalar(ScalarValue::from(-9223372036854775808i64)),
816+
ColumnarValue::Scalar(ScalarValue::from(i64::MIN)),
814817
],
815818
Ok(Some("abc")),
816819
&str,
@@ -821,14 +824,26 @@ mod tests {
821824
SubstrFunc::new(),
822825
vec![
823826
ColumnarValue::Scalar(ScalarValue::from("overflow")),
824-
ColumnarValue::Scalar(ScalarValue::from(-9223372036854775808i64)),
827+
ColumnarValue::Scalar(ScalarValue::from(i64::MIN)),
825828
ColumnarValue::Scalar(ScalarValue::from(1i64)),
826829
],
827830
exec_err!("negative overflow when calculating skip value"),
828831
&str,
829832
Utf8View,
830833
StringViewArray
831834
);
835+
test_function!(
836+
SubstrFunc::new(),
837+
vec![
838+
ColumnarValue::Scalar(ScalarValue::from("large count")),
839+
ColumnarValue::Scalar(ScalarValue::from(2i64)),
840+
ColumnarValue::Scalar(ScalarValue::from(i64::MAX)),
841+
],
842+
Ok(Some("arge count")),
843+
&str,
844+
Utf8View,
845+
StringViewArray
846+
);
832847

833848
Ok(())
834849
}

0 commit comments

Comments
 (0)