Skip to content

Commit aebae99

Browse files
authored
fix: Add custom nullability for Spark ILIKE function (#19206)
Fixes #19174 This PR adds custom nullability handling for the Spark ILIKE function. Previously, the function was using the default `is_nullable` which always returns `true`, which is not correct. ## Changes - Implemented `return_field_from_args()` to handle custom nullability logic - The result is nullable if **any** of the input arguments is nullable - This matches Spark's behavior where `ILIKE(NULL, pattern)` or `ILIKE(str, NULL)` returns `NULL` - Updated `return_type()` to use `internal_err!` pattern to enforce use of `return_field_from_args` - Added comprehensive nullability tests covering all combinations: - Non-nullable when both inputs are non-nullable - Nullable when first input is nullable - Nullable when second input is nullable - Nullable when both inputs are nullable ## Test Plan All existing tests pass: running 2 tests test function::string::ilike::tests::test_ilike_nullability ... ok test function::string::ilike::tests::test_ilike_invoke ... ok test result: ok. 2 passed; 0 failed; 0 ignored The implementation follows the same pattern used by other Spark functions in the codebase (like `shuffle` and `array`)
1 parent 06f27e9 commit aebae99

1 file changed

Lines changed: 84 additions & 6 deletions

File tree

  • datafusion/spark/src/function/string

datafusion/spark/src/function/string/ilike.rs

Lines changed: 84 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
use arrow::array::ArrayRef;
1919
use arrow::compute::ilike;
20-
use arrow::datatypes::DataType;
21-
use datafusion_common::{exec_err, Result};
20+
use arrow::datatypes::{DataType, Field};
21+
use datafusion_common::{exec_err, internal_err, Result};
2222
use datafusion_expr::ColumnarValue;
23-
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
23+
use datafusion_expr::{
24+
ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
25+
};
2426
use datafusion_functions::utils::make_scalar_function;
2527
use std::any::Any;
2628
use std::sync::Arc;
@@ -60,7 +62,14 @@ impl ScalarUDFImpl for SparkILike {
6062
}
6163

6264
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
63-
Ok(DataType::Boolean)
65+
internal_err!("return_field_from_args should be used instead")
66+
}
67+
68+
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<Arc<Field>> {
69+
// ILIKE returns a boolean value
70+
// The result is nullable if any of the input arguments is nullable
71+
let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
72+
Ok(Arc::new(Field::new("ilike", DataType::Boolean, nullable)))
6473
}
6574

6675
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -83,9 +92,9 @@ mod tests {
8392
use super::*;
8493
use crate::function::utils::test::test_scalar_function;
8594
use arrow::array::{Array, BooleanArray};
86-
use arrow::datatypes::DataType::Boolean;
95+
use arrow::datatypes::{DataType::Boolean, Field};
8796
use datafusion_common::{Result, ScalarValue};
88-
use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
97+
use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarUDFImpl};
8998

9099
macro_rules! test_ilike_string_invoke {
91100
($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
@@ -170,4 +179,73 @@ mod tests {
170179

171180
Ok(())
172181
}
182+
183+
#[test]
184+
fn test_ilike_nullability() {
185+
let ilike = SparkILike::new();
186+
187+
// Test with non-nullable arguments
188+
let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
189+
let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
190+
191+
let result = ilike
192+
.return_field_from_args(ReturnFieldArgs {
193+
arg_fields: &[
194+
Arc::clone(&non_nullable_field1),
195+
Arc::clone(&non_nullable_field2),
196+
],
197+
scalar_arguments: &[None, None],
198+
})
199+
.unwrap();
200+
201+
// The result should not be nullable when both inputs are non-nullable
202+
assert!(!result.is_nullable());
203+
assert_eq!(result.data_type(), &Boolean);
204+
205+
// Test with first argument nullable
206+
let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
207+
208+
let result = ilike
209+
.return_field_from_args(ReturnFieldArgs {
210+
arg_fields: &[
211+
Arc::clone(&nullable_field1),
212+
Arc::clone(&non_nullable_field2),
213+
],
214+
scalar_arguments: &[None, None],
215+
})
216+
.unwrap();
217+
218+
// The result should be nullable when first input is nullable
219+
assert!(result.is_nullable());
220+
assert_eq!(result.data_type(), &Boolean);
221+
222+
// Test with second argument nullable
223+
let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
224+
225+
let result = ilike
226+
.return_field_from_args(ReturnFieldArgs {
227+
arg_fields: &[
228+
Arc::clone(&non_nullable_field1),
229+
Arc::clone(&nullable_field2),
230+
],
231+
scalar_arguments: &[None, None],
232+
})
233+
.unwrap();
234+
235+
// The result should be nullable when second input is nullable
236+
assert!(result.is_nullable());
237+
assert_eq!(result.data_type(), &Boolean);
238+
239+
// Test with both arguments nullable
240+
let result = ilike
241+
.return_field_from_args(ReturnFieldArgs {
242+
arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
243+
scalar_arguments: &[None, None],
244+
})
245+
.unwrap();
246+
247+
// The result should be nullable when both inputs are nullable
248+
assert!(result.is_nullable());
249+
assert_eq!(result.data_type(), &Boolean);
250+
}
173251
}

0 commit comments

Comments
 (0)