Skip to content

Commit dabfa4d

Browse files
fix: string_to_array('', delim) returns empty array for PostgreSQL compatibility (apache#21104)
## Problem `string_to_array` was returning incorrect results for empty string input — both when the delimiter is non-empty and when the delimiter is itself an empty string. This diverges from PostgreSQL behavior. | Query | DataFusion (before) | PostgreSQL (expected) | |---|---|---| | `string_to_array('', ',')` | `['']` | `{}` | | `string_to_array('', '')` | `['']` | `{}` | | `string_to_array('', ',', 'x')` | `['']` | `{}` | | `string_to_array('', '', 'x')` | `['']` | `{}` | Results from datafusion-cli <img width="1435" height="104" alt="Screenshot 2026-03-23 at 9 14 08 AM" src="https://github.com/user-attachments/assets/2eaae366-7f8a-4220-87d2-f0b311c26712" /> **Root cause:** Rust's `str::split()` on an empty string always yields one empty-string element, so `"".split(",")` produces `[""]`. Additionally, the empty-delimiter branch unconditionally appended the (empty) string value. This is subtle because Arrow's text display format appears not to quote strings, so `[""]` renders as `[]` — indistinguishable from an actual empty array. Using `cardinality()` reveals the current length is 1, not 0. **PostgreSQL reference:** [db-fiddle](https://www.db-fiddle.com/f/oCF8EPaZFkDNKSg28rVVWy/3) ## Fix In `datafusion/functions-nested/src/string.rs`: - **Non-empty delimiter** `(Some(string), Some(delimiter))`: added `if !string.is_empty()` guard to skip splitting when input is empty. - **Empty delimiter** `(Some(string), Some(""))`: added `if !string.is_empty()` guard so the string value is only appended when non-empty. Both the plain variant and the `null_value` variant are fixed (4 checks total). ## Tests Added sqllogictest cases in `datafusion/sqllogictest/test_files/array.slt` using `cardinality()` to unambiguously verify the arrays are truly empty (not just displaying as empty): ```sql SELECT cardinality(string_to_array('', ',')) -- 0 SELECT cardinality(string_to_array('', '')) -- 0 SELECT cardinality(string_to_array('', ',', 'x')) -- 0 SELECT cardinality(string_to_array('', '', 'x')) -- 0 ``` Each test covers one of the four `is_empty` guard checks. All four fail without the fix (returning 1 instead of 0). (cherry picked from commit cdaecf0)
1 parent 9723464 commit dabfa4d

File tree

2 files changed

+59
-35
lines changed

2 files changed

+59
-35
lines changed

datafusion/functions-nested/src/string.rs

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -721,53 +721,57 @@ where
721721
let mut list_builder = ListBuilder::new(string_builder);
722722

723723
match null_value_array {
724-
None => {
725-
string_array.iter().zip(delimiter_array.iter()).for_each(
726-
|(string, delimiter)| {
727-
match (string, delimiter) {
728-
(Some(string), Some("")) => {
729-
list_builder.values().append_value(string);
730-
list_builder.append(true);
731-
}
732-
(Some(string), Some(delimiter)) => {
733-
string.split(delimiter).for_each(|s| {
734-
list_builder.values().append_value(s);
735-
});
736-
list_builder.append(true);
737-
}
738-
(Some(string), None) => {
739-
string.chars().map(|c| c.to_string()).for_each(|c| {
740-
list_builder.values().append_value(c.as_str());
741-
});
742-
list_builder.append(true);
743-
}
744-
_ => list_builder.append(false), // null value
724+
None => string_array.iter().zip(delimiter_array.iter()).for_each(
725+
|(string, delimiter)| match (string, delimiter) {
726+
(Some(string), Some("")) => {
727+
if !string.is_empty() {
728+
list_builder.values().append_value(string);
745729
}
746-
},
747-
)
748-
}
730+
list_builder.append(true);
731+
}
732+
(Some(string), Some(delimiter)) => {
733+
if !string.is_empty() {
734+
string.split(delimiter).for_each(|s| {
735+
list_builder.values().append_value(s);
736+
});
737+
}
738+
list_builder.append(true);
739+
}
740+
(Some(string), None) => {
741+
string.chars().map(|c| c.to_string()).for_each(|c| {
742+
list_builder.values().append_value(c.as_str());
743+
});
744+
list_builder.append(true);
745+
}
746+
_ => list_builder.append(false),
747+
},
748+
),
749749
Some(null_value_array) => string_array
750750
.iter()
751751
.zip(delimiter_array.iter())
752752
.zip(null_value_array.iter())
753753
.for_each(|((string, delimiter), null_value)| {
754754
match (string, delimiter) {
755755
(Some(string), Some("")) => {
756-
if Some(string) == null_value {
757-
list_builder.values().append_null();
758-
} else {
759-
list_builder.values().append_value(string);
756+
if !string.is_empty() {
757+
if Some(string) == null_value {
758+
list_builder.values().append_null();
759+
} else {
760+
list_builder.values().append_value(string);
761+
}
760762
}
761763
list_builder.append(true);
762764
}
763765
(Some(string), Some(delimiter)) => {
764-
string.split(delimiter).for_each(|s| {
765-
if Some(s) == null_value {
766-
list_builder.values().append_null();
767-
} else {
768-
list_builder.values().append_value(s);
769-
}
770-
});
766+
if !string.is_empty() {
767+
string.split(delimiter).for_each(|s| {
768+
if Some(s) == null_value {
769+
list_builder.values().append_null();
770+
} else {
771+
list_builder.values().append_value(s);
772+
}
773+
});
774+
}
771775
list_builder.append(true);
772776
}
773777
(Some(string), None) => {

datafusion/sqllogictest/test_files/array.slt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8040,6 +8040,26 @@ SELECT string_to_array('abcxxxdef', 'xxx')
80408040
----
80418041
[abc, def]
80428042

8043+
query I
8044+
SELECT cardinality(string_to_array('', ','))
8045+
----
8046+
0
8047+
8048+
query I
8049+
SELECT cardinality(string_to_array('', ''))
8050+
----
8051+
0
8052+
8053+
query I
8054+
SELECT cardinality(string_to_array('', ',', 'x'))
8055+
----
8056+
0
8057+
8058+
query I
8059+
SELECT cardinality(string_to_array('', '', 'x'))
8060+
----
8061+
0
8062+
80438063
query ?
80448064
SELECT string_to_array('abc', '')
80458065
----

0 commit comments

Comments
 (0)