Skip to content

Commit a4d555f

Browse files
fix: string_to_array('', delim) returns empty array for PostgreSQL compatibility
Rust's str::split() on an empty string always yields one empty-string element, so "".split(",") produces [""]. The empty-delimiter branch also unconditionally appended the (empty) string value. Both cases now guard with !string.is_empty() to return a truly empty array, matching PostgreSQL behavior. Tests use cardinality() to unambiguously verify the result since Arrow's text format renders [""] identically to [].
1 parent 15bc6bd commit a4d555f

File tree

2 files changed

+59
-35
lines changed

2 files changed

+59
-35
lines changed

datafusion/functions-nested/src/string.rs

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -733,53 +733,57 @@ where
733733
let mut list_builder = ListBuilder::new(string_builder);
734734

735735
match null_value_array {
736-
None => {
737-
string_array.iter().zip(delimiter_array.iter()).for_each(
738-
|(string, delimiter)| {
739-
match (string, delimiter) {
740-
(Some(string), Some("")) => {
741-
list_builder.values().append_value(string);
742-
list_builder.append(true);
743-
}
744-
(Some(string), Some(delimiter)) => {
745-
string.split(delimiter).for_each(|s| {
746-
list_builder.values().append_value(s);
747-
});
748-
list_builder.append(true);
749-
}
750-
(Some(string), None) => {
751-
string.chars().map(|c| c.to_string()).for_each(|c| {
752-
list_builder.values().append_value(c.as_str());
753-
});
754-
list_builder.append(true);
755-
}
756-
_ => list_builder.append(false), // null value
736+
None => string_array.iter().zip(delimiter_array.iter()).for_each(
737+
|(string, delimiter)| match (string, delimiter) {
738+
(Some(string), Some("")) => {
739+
if !string.is_empty() {
740+
list_builder.values().append_value(string);
757741
}
758-
},
759-
)
760-
}
742+
list_builder.append(true);
743+
}
744+
(Some(string), Some(delimiter)) => {
745+
if !string.is_empty() {
746+
string.split(delimiter).for_each(|s| {
747+
list_builder.values().append_value(s);
748+
});
749+
}
750+
list_builder.append(true);
751+
}
752+
(Some(string), None) => {
753+
string.chars().map(|c| c.to_string()).for_each(|c| {
754+
list_builder.values().append_value(c.as_str());
755+
});
756+
list_builder.append(true);
757+
}
758+
_ => list_builder.append(false),
759+
},
760+
),
761761
Some(null_value_array) => string_array
762762
.iter()
763763
.zip(delimiter_array.iter())
764764
.zip(null_value_array.iter())
765765
.for_each(|((string, delimiter), null_value)| {
766766
match (string, delimiter) {
767767
(Some(string), Some("")) => {
768-
if Some(string) == null_value {
769-
list_builder.values().append_null();
770-
} else {
771-
list_builder.values().append_value(string);
768+
if !string.is_empty() {
769+
if Some(string) == null_value {
770+
list_builder.values().append_null();
771+
} else {
772+
list_builder.values().append_value(string);
773+
}
772774
}
773775
list_builder.append(true);
774776
}
775777
(Some(string), Some(delimiter)) => {
776-
string.split(delimiter).for_each(|s| {
777-
if Some(s) == null_value {
778-
list_builder.values().append_null();
779-
} else {
780-
list_builder.values().append_value(s);
781-
}
782-
});
778+
if !string.is_empty() {
779+
string.split(delimiter).for_each(|s| {
780+
if Some(s) == null_value {
781+
list_builder.values().append_null();
782+
} else {
783+
list_builder.values().append_value(s);
784+
}
785+
});
786+
}
783787
list_builder.append(true);
784788
}
785789
(Some(string), None) => {

datafusion/sqllogictest/test_files/array.slt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8623,6 +8623,26 @@ SELECT string_to_array('abcxxxdef', 'xxx')
86238623
----
86248624
[abc, def]
86258625

8626+
query I
8627+
SELECT cardinality(string_to_array('', ','))
8628+
----
8629+
0
8630+
8631+
query I
8632+
SELECT cardinality(string_to_array('', ''))
8633+
----
8634+
0
8635+
8636+
query I
8637+
SELECT cardinality(string_to_array('', ',', 'x'))
8638+
----
8639+
0
8640+
8641+
query I
8642+
SELECT cardinality(string_to_array('', '', 'x'))
8643+
----
8644+
0
8645+
86268646
query ?
86278647
SELECT string_to_array('abc', '')
86288648
----

0 commit comments

Comments
 (0)