Skip to content

Commit 57b275a

Browse files
authored
feat: correct struct column names for arrays_zip return type (#20886)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> For `arrays_zip` the return type is slightly different comparing to DuckDB, namely the struct field names are different DuckDB expects a ``` struct { 1: ...., 2: ...., 3: ...., n: ...... } ``` DF got ``` struct { c0: ...., c1: ...., c2: ...., cn: ...... } ``` Some future work - for Spark the field names are 0 based numeration ``` struct { 0: ...., 1: ...., 2: ...., n: ...... } ``` - Closes #. ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? Rename structure field names for `arrays_zip` return type <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
1 parent 8b412de commit 57b275a

2 files changed

Lines changed: 33 additions & 33 deletions

File tree

datafusion/functions-nested/src/arrays_zip.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ impl ScalarUDFImpl for ArraysZip {
125125
return exec_err!("arrays_zip expects array arguments, got {dt}");
126126
}
127127
};
128-
fields.push(Field::new(format!("c{i}"), element_type, true));
128+
fields.push(Field::new(format!("{}", i + 1), element_type, true));
129129
}
130130

131131
Ok(List(Arc::new(Field::new_list_field(
@@ -227,7 +227,7 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
227227
let struct_fields: Fields = element_types
228228
.iter()
229229
.enumerate()
230-
.map(|(i, dt)| Field::new(format!("c{i}"), dt.clone(), true))
230+
.map(|(i, dt)| Field::new(format!("{}", i + 1), dt.clone(), true))
231231
.collect::<Vec<_>>()
232232
.into();
233233

datafusion/sqllogictest/test_files/array.slt

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7112,61 +7112,61 @@ from array_distinct_table_2D_fixed;
71127112
query ?
71137113
select arrays_zip([1, 2, 3], [2, 3, 4]);
71147114
----
7115-
[{c0: 1, c1: 2}, {c0: 2, c1: 3}, {c0: 3, c1: 4}]
7115+
[{1: 1, 2: 2}, {1: 2, 2: 3}, {1: 3, 2: 4}]
71167116

71177117
# Spark example: arrays_zip(array(1, 2), array(2, 3), array(3, 4))
71187118
query ?
71197119
select arrays_zip([1, 2], [2, 3], [3, 4]);
71207120
----
7121-
[{c0: 1, c1: 2, c2: 3}, {c0: 2, c1: 3, c2: 4}]
7121+
[{1: 1, 2: 2, 3: 3}, {1: 2, 2: 3, 3: 4}]
71227122

71237123
# basic: two integer arrays of equal length
71247124
query ?
71257125
select arrays_zip([1, 2, 3], [10, 20, 30]);
71267126
----
7127-
[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
7127+
[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
71287128

71297129
# basic: two arrays with different element types (int + string)
71307130
query ?
71317131
select arrays_zip([1, 2, 3], ['a', 'b', 'c']);
71327132
----
7133-
[{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}]
7133+
[{1: 1, 2: a}, {1: 2, 2: b}, {1: 3, 2: c}]
71347134

71357135
# three arrays of equal length
71367136
query ?
71377137
select arrays_zip([1, 2, 3], [10, 20, 30], [100, 200, 300]);
71387138
----
7139-
[{c0: 1, c1: 10, c2: 100}, {c0: 2, c1: 20, c2: 200}, {c0: 3, c1: 30, c2: 300}]
7139+
[{1: 1, 2: 10, 3: 100}, {1: 2, 2: 20, 3: 200}, {1: 3, 2: 30, 3: 300}]
71407140

71417141
# four arrays of equal length
71427142
query ?
71437143
select arrays_zip([1], [2], [3], [4]);
71447144
----
7145-
[{c0: 1, c1: 2, c2: 3, c3: 4}]
7145+
[{1: 1, 2: 2, 3: 3, 4: 4}]
71467146

71477147
# mixed element types: float + boolean
71487148
query ?
71497149
select arrays_zip([1.5, 2.5], [true, false]);
71507150
----
7151-
[{c0: 1.5, c1: true}, {c0: 2.5, c1: false}]
7151+
[{1: 1.5, 2: true}, {1: 2.5, 2: false}]
71527152

71537153
# different length arrays: shorter array padded with NULLs
71547154
query ?
71557155
select arrays_zip([1, 2], [3, 4, 5]);
71567156
----
7157-
[{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: NULL, c1: 5}]
7157+
[{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}]
71587158

71597159
# different length arrays: first longer
71607160
query ?
71617161
select arrays_zip([1, 2, 3], [10]);
71627162
----
7163-
[{c0: 1, c1: 10}, {c0: 2, c1: NULL}, {c0: 3, c1: NULL}]
7163+
[{1: 1, 2: 10}, {1: 2, 2: NULL}, {1: 3, 2: NULL}]
71647164

71657165
# different length: one single element, other three elements
71667166
query ?
71677167
select arrays_zip([1], ['a', 'b', 'c']);
71687168
----
7169-
[{c0: 1, c1: a}, {c0: NULL, c1: b}, {c0: NULL, c1: c}]
7169+
[{1: 1, 2: a}, {1: NULL, 2: b}, {1: NULL, 2: c}]
71707170

71717171
# empty arrays
71727172
query ?
@@ -7178,19 +7178,19 @@ select arrays_zip([], []);
71787178
query ?
71797179
select arrays_zip([], [1, 2, 3]);
71807180
----
7181-
[{c0: NULL, c1: 1}, {c0: NULL, c1: 2}, {c0: NULL, c1: 3}]
7181+
[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}]
71827182

71837183
# NULL elements inside arrays
71847184
query ?
71857185
select arrays_zip([1, NULL, 3], ['a', 'b', 'c']);
71867186
----
7187-
[{c0: 1, c1: a}, {c0: NULL, c1: b}, {c0: 3, c1: c}]
7187+
[{1: 1, 2: a}, {1: NULL, 2: b}, {1: 3, 2: c}]
71887188

71897189
# all NULL elements
71907190
query ?
71917191
select arrays_zip([NULL::int, NULL, NULL], [NULL::text, NULL, NULL]);
71927192
----
7193-
[{c0: NULL, c1: NULL}, {c0: NULL, c1: NULL}, {c0: NULL, c1: NULL}]
7193+
[{1: NULL, 2: NULL}, {1: NULL, 2: NULL}, {1: NULL, 2: NULL}]
71947194

71957195
# both args are NULL (entire list null)
71967196
query ?
@@ -7202,35 +7202,35 @@ NULL
72027202
query ?
72037203
select arrays_zip(NULL::int[], [1, 2, 3]);
72047204
----
7205-
[{c0: NULL, c1: 1}, {c0: NULL, c1: 2}, {c0: NULL, c1: 3}]
7205+
[{1: NULL, 2: 1}, {1: NULL, 2: 2}, {1: NULL, 2: 3}]
72067206

72077207
# real array + NULL list
72087208
query ?
72097209
select arrays_zip([1, 2], NULL::text[]);
72107210
----
7211-
[{c0: 1, c1: NULL}, {c0: 2, c1: NULL}]
7211+
[{1: 1, 2: NULL}, {1: 2, 2: NULL}]
72127212

72137213
# column-level test with multiple rows
72147214
query ?
72157215
select arrays_zip(a, b) from (values ([1, 2], [10, 20]), ([3, 4, 5], [30]), ([6], [60, 70])) as t(a, b);
72167216
----
7217-
[{c0: 1, c1: 10}, {c0: 2, c1: 20}]
7218-
[{c0: 3, c1: 30}, {c0: 4, c1: NULL}, {c0: 5, c1: NULL}]
7219-
[{c0: 6, c1: 60}, {c0: NULL, c1: 70}]
7217+
[{1: 1, 2: 10}, {1: 2, 2: 20}]
7218+
[{1: 3, 2: 30}, {1: 4, 2: NULL}, {1: 5, 2: NULL}]
7219+
[{1: 6, 2: 60}, {1: NULL, 2: 70}]
72207220

72217221
# column-level test with NULL rows
72227222
query ?
72237223
select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b);
72247224
----
7225-
[{c0: 1, c1: 10}, {c0: 2, c1: 20}]
7226-
[{c0: NULL, c1: 30}, {c0: NULL, c1: 40}]
7227-
[{c0: 5, c1: NULL}, {c0: 6, c1: NULL}]
7225+
[{1: 1, 2: 10}, {1: 2, 2: 20}]
7226+
[{1: NULL, 2: 30}, {1: NULL, 2: 40}]
7227+
[{1: 5, 2: NULL}, {1: 6, 2: NULL}]
72287228

72297229
# alias: list_zip
72307230
query ?
72317231
select list_zip([1, 2], [3, 4]);
72327232
----
7233-
[{c0: 1, c1: 3}, {c0: 2, c1: 4}]
7233+
[{1: 1, 2: 3}, {1: 2, 2: 4}]
72347234

72357235
# column test: total values equal (3 each) but per-row lengths differ
72367236
# a: [1] b: [10, 20] → row 0: a has 1, b has 2
@@ -7239,14 +7239,14 @@ select list_zip([1, 2], [3, 4]);
72397239
query ?
72407240
select arrays_zip(a, b) from (values ([1], [10, 20]), ([2, 3], [30])) as t(a, b);
72417241
----
7242-
[{c0: 1, c1: 10}, {c0: NULL, c1: 20}]
7243-
[{c0: 2, c1: 30}, {c0: 3, c1: NULL}]
7242+
[{1: 1, 2: 10}, {1: NULL, 2: 20}]
7243+
[{1: 2, 2: 30}, {1: 3, 2: NULL}]
72447244

72457245
# single element arrays
72467246
query ?
72477247
select arrays_zip([42], ['hello']);
72487248
----
7249-
[{c0: 42, c1: hello}]
7249+
[{1: 42, 2: hello}]
72507250

72517251
# error: too few arguments
72527252
statement error
@@ -7259,7 +7259,7 @@ select arrays_zip(
72597259
arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
72607260
);
72617261
----
7262-
[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
7262+
[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
72637263

72647264
# arrays_zip with LargeList different lengths (padding)
72657265
query ?
@@ -7268,7 +7268,7 @@ select arrays_zip(
72687268
arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
72697269
);
72707270
----
7271-
[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: NULL, c1: 30}]
7271+
[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}]
72727272

72737273
# arrays_zip with FixedSizeList inputs
72747274
query ?
@@ -7277,7 +7277,7 @@ select arrays_zip(
72777277
arrow_cast(make_array(10, 20, 30), 'FixedSizeList(3, Int64)')
72787278
);
72797279
----
7280-
[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
7280+
[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
72817281

72827282
# arrays_zip mixing List and LargeList
72837283
query ?
@@ -7286,7 +7286,7 @@ select arrays_zip(
72867286
arrow_cast(make_array(10, 20, 30), 'LargeList(Int64)')
72877287
);
72887288
----
7289-
[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: 30}]
7289+
[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
72907290

72917291
# arrays_zip mixing List and FixedSizeList with different lengths (padding)
72927292
query ?
@@ -7295,7 +7295,7 @@ select arrays_zip(
72957295
arrow_cast(make_array(10, 20), 'FixedSizeList(2, Int64)')
72967296
);
72977297
----
7298-
[{c0: 1, c1: 10}, {c0: 2, c1: 20}, {c0: 3, c1: NULL}]
7298+
[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: NULL}]
72997299

73007300
# arrays_zip with LargeList and FixedSizeList mixed types
73017301
query ?
@@ -7304,7 +7304,7 @@ select arrays_zip(
73047304
arrow_cast(make_array('a', 'b'), 'FixedSizeList(2, Utf8)')
73057305
);
73067306
----
7307-
[{c0: 1, c1: a}, {c0: 2, c1: b}]
7307+
[{1: 1, 2: a}, {1: 2, 2: b}]
73087308

73097309
query ???
73107310
select array_intersect(column1, column2),

0 commit comments

Comments
 (0)