Skip to content

Commit 183cdf0

Browse files
authored
fix: Handle NULL inputs correctly in find_in_set() (apache#20209)
The previous coding returned an array of the wrong type, leading to an assertion failure. ## Rationale for this change ``` create table tt (x string); insert into tt values ('a'), ('b,a'), ('c'), (null); select find_in_set(tt.x, null) from tt; select find_in_set(null, tt.x) from tt; ``` Yields: ``` Internal error: Assertion failed: result_data_type == *expected_type: Function 'find_in_set' returned value of type 'Utf8View' while the following type was promised at planning time and expected: 'Int32'. This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues Internal error: Assertion failed: result_data_type == *expected_type: Function 'find_in_set' returned value of type 'Utf8View' while the following type was promised at planning time and expected: 'Int32'. This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues ``` ## Are these changes tested? Yes, added an SLT test.
1 parent bc747a0 commit 183cdf0

2 files changed

Lines changed: 19 additions & 18 deletions

File tree

datafusion/functions/src/unicode/find_in_set.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,8 @@ impl ScalarUDFImpl for FindInSetFunc {
9898
}
9999

100100
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
101-
let ScalarFunctionArgs { args, .. } = args;
102-
103-
let [string, str_list] = take_function_args(self.name(), args)?;
101+
let return_field = args.return_field;
102+
let [string, str_list] = take_function_args(self.name(), args.args)?;
104103

105104
match (string, str_list) {
106105
// both inputs are scalars
@@ -141,7 +140,7 @@ impl ScalarUDFImpl for FindInSetFunc {
141140
) => {
142141
let result_array = match str_list_literal {
143142
// find_in_set(column_a, null) = null
144-
None => new_null_array(str_array.data_type(), str_array.len()),
143+
None => new_null_array(return_field.data_type(), str_array.len()),
145144
Some(str_list_literal) => {
146145
let str_list = str_list_literal.split(',').collect::<Vec<&str>>();
147146
let result = match str_array.data_type() {
@@ -190,7 +189,7 @@ impl ScalarUDFImpl for FindInSetFunc {
190189
let res = match string_literal {
191190
// find_in_set(null, column_b) = null
192191
None => {
193-
new_null_array(str_list_array.data_type(), str_list_array.len())
192+
new_null_array(return_field.data_type(), str_list_array.len())
194193
}
195194
Some(string) => {
196195
let result = match str_list_array.data_type() {

datafusion/sqllogictest/test_files/string/string_query.slt.part

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -993,25 +993,27 @@ NULL NULL NULL NULL
993993
# Test FIND_IN_SET
994994
# --------------------------------------
995995

996-
query IIII
996+
query IIIIII
997997
SELECT
998998
FIND_IN_SET(ascii_1, 'a,b,c,d'),
999999
FIND_IN_SET(ascii_1, 'Andrew,Xiangpeng,Raphael'),
10001000
FIND_IN_SET(unicode_1, 'a,b,c,d'),
1001-
FIND_IN_SET(unicode_1, 'datafusion📊🔥,datafusion数据融合,datafusionДатаФусион')
1001+
FIND_IN_SET(unicode_1, 'datafusion📊🔥,datafusion数据融合,datafusionДатаФусион'),
1002+
FIND_IN_SET(NULL, unicode_1),
1003+
FIND_IN_SET(unicode_1, NULL)
10021004
FROM test_basic_operator;
10031005
----
1004-
0 1 0 1
1005-
0 2 0 2
1006-
0 3 0 3
1007-
0 0 0 0
1008-
0 0 0 0
1009-
0 0 0 0
1010-
0 0 0 0
1011-
0 0 0 0
1012-
0 0 0 0
1013-
NULL NULL NULL NULL
1014-
NULL NULL NULL NULL
1006+
0 1 0 1 NULL NULL
1007+
0 2 0 2 NULL NULL
1008+
0 3 0 3 NULL NULL
1009+
0 0 0 0 NULL NULL
1010+
0 0 0 0 NULL NULL
1011+
0 0 0 0 NULL NULL
1012+
0 0 0 0 NULL NULL
1013+
0 0 0 0 NULL NULL
1014+
0 0 0 0 NULL NULL
1015+
NULL NULL NULL NULL NULL NULL
1016+
NULL NULL NULL NULL NULL NULL
10151017

10161018
# --------------------------------------
10171019
# Test || operator

0 commit comments

Comments
 (0)