Skip to content

Commit cdaecf0

Browse files
fix: string_to_array('', delim) returns empty array for PostgreSQL compatibility (#21104)
## Problem `string_to_array` was returning incorrect results for empty string input — both when the delimiter is non-empty and when the delimiter is itself an empty string. This diverges from PostgreSQL behavior. | Query | DataFusion (before) | PostgreSQL (expected) | |---|---|---| | `string_to_array('', ',')` | `['']` | `{}` | | `string_to_array('', '')` | `['']` | `{}` | | `string_to_array('', ',', 'x')` | `['']` | `{}` | | `string_to_array('', '', 'x')` | `['']` | `{}` | Results from datafusion-cli <img width="1435" height="104" alt="Screenshot 2026-03-23 at 9 14 08 AM" src="https://github.com/user-attachments/assets/2eaae366-7f8a-4220-87d2-f0b311c26712" /> **Root cause:** Rust's `str::split()` on an empty string always yields one empty-string element, so `"".split(",")` produces `[""]`. Additionally, the empty-delimiter branch unconditionally appended the (empty) string value. This is subtle because Arrow's text display format appears not to quote strings, so `[""]` renders as `[]` — indistinguishable from an actual empty array. Using `cardinality()` reveals the current length is 1, not 0. **PostgreSQL reference:** [db-fiddle](https://www.db-fiddle.com/f/oCF8EPaZFkDNKSg28rVVWy/3) ## Fix In `datafusion/functions-nested/src/string.rs`: - **Non-empty delimiter** `(Some(string), Some(delimiter))`: added `if !string.is_empty()` guard to skip splitting when input is empty. - **Empty delimiter** `(Some(string), Some(""))`: added `if !string.is_empty()` guard so the string value is only appended when non-empty. Both the plain variant and the `null_value` variant are fixed (4 checks total). ## Tests Added sqllogictest cases in `datafusion/sqllogictest/test_files/array.slt` using `cardinality()` to unambiguously verify the arrays are truly empty (not just displaying as empty): ```sql SELECT cardinality(string_to_array('', ',')) -- 0 SELECT cardinality(string_to_array('', '')) -- 0 SELECT cardinality(string_to_array('', ',', 'x')) -- 0 SELECT cardinality(string_to_array('', '', 'x')) -- 0 ``` Each test covers one of the four `is_empty` guard checks. All four fail without the fix (returning 1 instead of 0).
1 parent 2b986c8 commit cdaecf0

File tree

2 files changed

+59
-35
lines changed

2 files changed

+59
-35
lines changed

datafusion/functions-nested/src/string.rs

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -727,53 +727,57 @@ where
727727
let mut list_builder = ListBuilder::new(string_builder);
728728

729729
match null_value_array {
730-
None => {
731-
string_array.iter().zip(delimiter_array.iter()).for_each(
732-
|(string, delimiter)| {
733-
match (string, delimiter) {
734-
(Some(string), Some("")) => {
735-
list_builder.values().append_value(string);
736-
list_builder.append(true);
737-
}
738-
(Some(string), Some(delimiter)) => {
739-
string.split(delimiter).for_each(|s| {
740-
list_builder.values().append_value(s);
741-
});
742-
list_builder.append(true);
743-
}
744-
(Some(string), None) => {
745-
string.chars().map(|c| c.to_string()).for_each(|c| {
746-
list_builder.values().append_value(c.as_str());
747-
});
748-
list_builder.append(true);
749-
}
750-
_ => list_builder.append(false), // null value
730+
None => string_array.iter().zip(delimiter_array.iter()).for_each(
731+
|(string, delimiter)| match (string, delimiter) {
732+
(Some(string), Some("")) => {
733+
if !string.is_empty() {
734+
list_builder.values().append_value(string);
751735
}
752-
},
753-
)
754-
}
736+
list_builder.append(true);
737+
}
738+
(Some(string), Some(delimiter)) => {
739+
if !string.is_empty() {
740+
string.split(delimiter).for_each(|s| {
741+
list_builder.values().append_value(s);
742+
});
743+
}
744+
list_builder.append(true);
745+
}
746+
(Some(string), None) => {
747+
string.chars().map(|c| c.to_string()).for_each(|c| {
748+
list_builder.values().append_value(c.as_str());
749+
});
750+
list_builder.append(true);
751+
}
752+
_ => list_builder.append(false),
753+
},
754+
),
755755
Some(null_value_array) => string_array
756756
.iter()
757757
.zip(delimiter_array.iter())
758758
.zip(null_value_array.iter())
759759
.for_each(|((string, delimiter), null_value)| {
760760
match (string, delimiter) {
761761
(Some(string), Some("")) => {
762-
if Some(string) == null_value {
763-
list_builder.values().append_null();
764-
} else {
765-
list_builder.values().append_value(string);
762+
if !string.is_empty() {
763+
if Some(string) == null_value {
764+
list_builder.values().append_null();
765+
} else {
766+
list_builder.values().append_value(string);
767+
}
766768
}
767769
list_builder.append(true);
768770
}
769771
(Some(string), Some(delimiter)) => {
770-
string.split(delimiter).for_each(|s| {
771-
if Some(s) == null_value {
772-
list_builder.values().append_null();
773-
} else {
774-
list_builder.values().append_value(s);
775-
}
776-
});
772+
if !string.is_empty() {
773+
string.split(delimiter).for_each(|s| {
774+
if Some(s) == null_value {
775+
list_builder.values().append_null();
776+
} else {
777+
list_builder.values().append_value(s);
778+
}
779+
});
780+
}
777781
list_builder.append(true);
778782
}
779783
(Some(string), None) => {

datafusion/sqllogictest/test_files/array.slt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9109,6 +9109,26 @@ SELECT string_to_array('abcxxxdef', 'xxx')
91099109
----
91109110
[abc, def]
91119111

9112+
query I
9113+
SELECT cardinality(string_to_array('', ','))
9114+
----
9115+
0
9116+
9117+
query I
9118+
SELECT cardinality(string_to_array('', ''))
9119+
----
9120+
0
9121+
9122+
query I
9123+
SELECT cardinality(string_to_array('', ',', 'x'))
9124+
----
9125+
0
9126+
9127+
query I
9128+
SELECT cardinality(string_to_array('', '', 'x'))
9129+
----
9130+
0
9131+
91129132
query ?
91139133
SELECT string_to_array('abc', '')
91149134
----

0 commit comments

Comments
 (0)