Skip to content

Commit ff4c630

Browse files
alambJefffrey
andauthored
[branch-52] fix: maintain inner list nullability for (#19948) (#20878)
- Part of #20855 - Closes #19947 on branch-52 This PR: - Backports #19948 from @Jefffrey to the branch-52 line Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
1 parent 28d012a commit ff4c630

2 files changed

Lines changed: 34 additions & 27 deletions

File tree

datafusion/functions-nested/src/sort.rs

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,14 @@
1818
//! [`ScalarUDFImpl`] definitions for array_sort function.
1919
2020
use crate::utils::make_scalar_function;
21-
use arrow::array::{
22-
Array, ArrayRef, GenericListArray, NullBufferBuilder, OffsetSizeTrait, new_null_array,
23-
};
21+
use arrow::array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait, new_null_array};
2422
use arrow::buffer::OffsetBuffer;
2523
use arrow::compute::SortColumn;
2624
use arrow::datatypes::{DataType, FieldRef};
2725
use arrow::{compute, compute::SortOptions};
2826
use datafusion_common::cast::{as_large_list_array, as_list_array, as_string_array};
2927
use datafusion_common::utils::ListCoercion;
30-
use datafusion_common::{Result, exec_err, plan_err};
28+
use datafusion_common::{Result, exec_err};
3129
use datafusion_expr::{
3230
ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
3331
ScalarUDFImpl, Signature, TypeSignature, Volatility,
@@ -134,18 +132,7 @@ impl ScalarUDFImpl for ArraySort {
134132
}
135133

136134
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
137-
match &arg_types[0] {
138-
DataType::Null => Ok(DataType::Null),
139-
DataType::List(field) => {
140-
Ok(DataType::new_list(field.data_type().clone(), true))
141-
}
142-
DataType::LargeList(field) => {
143-
Ok(DataType::new_large_list(field.data_type().clone(), true))
144-
}
145-
arg_type => {
146-
plan_err!("{} does not support type {arg_type}", self.name())
147-
}
148-
}
135+
Ok(arg_types[0].clone())
149136
}
150137

151138
fn invoke_with_args(
@@ -206,11 +193,11 @@ fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
206193
}
207194
DataType::List(field) => {
208195
let array = as_list_array(&args[0])?;
209-
array_sort_generic(array, field, sort_options)
196+
array_sort_generic(array, Arc::clone(field), sort_options)
210197
}
211198
DataType::LargeList(field) => {
212199
let array = as_large_list_array(&args[0])?;
213-
array_sort_generic(array, field, sort_options)
200+
array_sort_generic(array, Arc::clone(field), sort_options)
214201
}
215202
// Signature should prevent this arm ever occurring
216203
_ => exec_err!("array_sort expects list for first argument"),
@@ -219,18 +206,16 @@ fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
219206

220207
fn array_sort_generic<OffsetSize: OffsetSizeTrait>(
221208
list_array: &GenericListArray<OffsetSize>,
222-
field: &FieldRef,
209+
field: FieldRef,
223210
sort_options: Option<SortOptions>,
224211
) -> Result<ArrayRef> {
225212
let row_count = list_array.len();
226213

227214
let mut array_lengths = vec![];
228215
let mut arrays = vec![];
229-
let mut valid = NullBufferBuilder::new(row_count);
230216
for i in 0..row_count {
231217
if list_array.is_null(i) {
232218
array_lengths.push(0);
233-
valid.append_null();
234219
} else {
235220
let arr_ref = list_array.value(i);
236221

@@ -253,25 +238,22 @@ fn array_sort_generic<OffsetSize: OffsetSizeTrait>(
253238
};
254239
array_lengths.push(sorted_array.len());
255240
arrays.push(sorted_array);
256-
valid.append_non_null();
257241
}
258242
}
259243

260-
let buffer = valid.finish();
261-
262244
let elements = arrays
263245
.iter()
264246
.map(|a| a.as_ref())
265247
.collect::<Vec<&dyn Array>>();
266248

267249
let list_arr = if elements.is_empty() {
268-
GenericListArray::<OffsetSize>::new_null(Arc::clone(field), row_count)
250+
GenericListArray::<OffsetSize>::new_null(field, row_count)
269251
} else {
270252
GenericListArray::<OffsetSize>::new(
271-
Arc::clone(field),
253+
field,
272254
OffsetBuffer::from_lengths(array_lengths),
273255
Arc::new(compute::concat(elements.as_slice())?),
274-
buffer,
256+
list_array.nulls().cloned(),
275257
)
276258
};
277259
Ok(Arc::new(list_arr))

datafusion/sqllogictest/test_files/array.slt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2577,6 +2577,31 @@ NULL NULL
25772577
NULL NULL
25782578
NULL NULL
25792579

2580+
# maintains inner nullability
2581+
query ?T
2582+
select array_sort(column1), arrow_typeof(array_sort(column1))
2583+
from values
2584+
(arrow_cast([], 'List(non-null Int32)')),
2585+
(arrow_cast(NULL, 'List(non-null Int32)')),
2586+
(arrow_cast([1, 3, 5, -5], 'List(non-null Int32)'))
2587+
;
2588+
----
2589+
[] List(non-null Int32)
2590+
NULL List(non-null Int32)
2591+
[-5, 1, 3, 5] List(non-null Int32)
2592+
2593+
query ?T
2594+
select column1, arrow_typeof(column1)
2595+
from values (array_sort(arrow_cast([1, 3, 5, -5], 'LargeList(non-null Int32)')));
2596+
----
2597+
[-5, 1, 3, 5] LargeList(non-null Int32)
2598+
2599+
query ?T
2600+
select column1, arrow_typeof(column1)
2601+
from values (array_sort(arrow_cast([1, 3, 5, -5], 'FixedSizeList(4 x non-null Int32)')));
2602+
----
2603+
[-5, 1, 3, 5] List(non-null Int32)
2604+
25802605
query ?
25812606
select array_sort([struct('foo', 3), struct('foo', 1), struct('bar', 1)])
25822607
----

0 commit comments

Comments
 (0)