Skip to content

Commit a83d162

Browse files
fix: string_to_array('', delim) returns empty array for PostgreSQL compatibility
Rust's str::split() on an empty string always yields one empty-string element, so "".split(",") produces [""]. The empty-delimiter branch also unconditionally appended the (empty) string value. Both cases now guard with !string.is_empty() to return a truly empty array, matching PostgreSQL behavior. Tests use cardinality() to unambiguously verify the result since Arrow's text format renders [""] identically to [].
1 parent 15bc6bd commit a83d162

2 files changed

Lines changed: 44 additions & 16 deletions

File tree

datafusion/functions-nested/src/string.rs

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -738,13 +738,17 @@ where
738738
|(string, delimiter)| {
739739
match (string, delimiter) {
740740
(Some(string), Some("")) => {
741-
list_builder.values().append_value(string);
741+
if !string.is_empty() {
742+
list_builder.values().append_value(string);
743+
}
742744
list_builder.append(true);
743745
}
744746
(Some(string), Some(delimiter)) => {
745-
string.split(delimiter).for_each(|s| {
746-
list_builder.values().append_value(s);
747-
});
747+
if !string.is_empty() {
748+
string.split(delimiter).for_each(|s| {
749+
list_builder.values().append_value(s);
750+
});
751+
}
748752
list_builder.append(true);
749753
}
750754
(Some(string), None) => {
@@ -753,7 +757,7 @@ where
753757
});
754758
list_builder.append(true);
755759
}
756-
_ => list_builder.append(false), // null value
760+
_ => list_builder.append(false),
757761
}
758762
},
759763
)
@@ -765,21 +769,25 @@ where
765769
.for_each(|((string, delimiter), null_value)| {
766770
match (string, delimiter) {
767771
(Some(string), Some("")) => {
768-
if Some(string) == null_value {
769-
list_builder.values().append_null();
770-
} else {
771-
list_builder.values().append_value(string);
772+
if !string.is_empty() {
773+
if Some(string) == null_value {
774+
list_builder.values().append_null();
775+
} else {
776+
list_builder.values().append_value(string);
777+
}
772778
}
773779
list_builder.append(true);
774780
}
775781
(Some(string), Some(delimiter)) => {
776-
string.split(delimiter).for_each(|s| {
777-
if Some(s) == null_value {
778-
list_builder.values().append_null();
779-
} else {
780-
list_builder.values().append_value(s);
781-
}
782-
});
782+
if !string.is_empty() {
783+
string.split(delimiter).for_each(|s| {
784+
if Some(s) == null_value {
785+
list_builder.values().append_null();
786+
} else {
787+
list_builder.values().append_value(s);
788+
}
789+
});
790+
}
783791
list_builder.append(true);
784792
}
785793
(Some(string), None) => {

datafusion/sqllogictest/test_files/array.slt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8623,6 +8623,26 @@ SELECT string_to_array('abcxxxdef', 'xxx')
86238623
----
86248624
[abc, def]
86258625

8626+
query I
8627+
SELECT cardinality(string_to_array('', ','))
8628+
----
8629+
0
8630+
8631+
query I
8632+
SELECT cardinality(string_to_array('', ''))
8633+
----
8634+
0
8635+
8636+
query I
8637+
SELECT cardinality(string_to_array('', ',', 'x'))
8638+
----
8639+
0
8640+
8641+
query I
8642+
SELECT cardinality(string_to_array('', '', 'x'))
8643+
----
8644+
0
8645+
86268646
query ?
86278647
SELECT string_to_array('abc', '')
86288648
----

0 commit comments

Comments
 (0)