Skip to content

Commit 201c016

Browse files
neilconwaydd-david-levin
authored andcommitted
fix: cardinality() of an empty array should be zero (apache#20533)
## Which issue does this PR close? - Closes apache#20526. ## Rationale for this change Per Postgres and the SQL spec, `cardinality()` of an empty array should be zero; we previously returned `NULL`. Along the way, fix another bug: we previously returned `0` for the cardinality of an untyped `NULL` and `NULL` for the cardinality of a typed null (e.g., `NULL::int[]`). We should return `NULL` in both cases. ## What changes are included in this PR? Bug fixes, update SLT. ## Are these changes tested? Yes. ## Are there any user-facing changes? Yes: the behavior of `cardinality` has changed, albeit the previous behavior was incorrect. (cherry picked from commit e684994)
1 parent 76c91c1 commit 201c016

2 files changed

Lines changed: 22 additions & 6 deletions

File tree

datafusion/functions-nested/src/cardinality.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ impl ScalarUDFImpl for Cardinality {
120120
fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
121121
let [array] = take_function_args("cardinality", args)?;
122122
match array.data_type() {
123-
Null => Ok(Arc::new(UInt64Array::from_value(0, array.len()))),
123+
Null => Ok(Arc::new(UInt64Array::new_null(array.len()))),
124124
List(_) => {
125125
let list_array = as_list_array(array)?;
126126
generic_list_cardinality::<i32>(list_array)
@@ -152,9 +152,14 @@ fn generic_list_cardinality<O: OffsetSizeTrait>(
152152
) -> Result<ArrayRef> {
153153
let result = array
154154
.iter()
155-
.map(|arr| match crate::utils::compute_array_dims(arr)? {
156-
Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
157-
None => Ok(None),
155+
.map(|arr| match arr {
156+
Some(arr) if arr.is_empty() => Ok(Some(0u64)),
157+
arr => match crate::utils::compute_array_dims(arr)? {
158+
Some(vector) => {
159+
Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>()))
160+
}
161+
None => Ok(None),
162+
},
158163
})
159164
.collect::<Result<UInt64Array>>()?;
160165
Ok(Arc::new(result) as ArrayRef)

datafusion/sqllogictest/test_files/array.slt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5032,12 +5032,17 @@ select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(I
50325032
query II
50335033
select cardinality(make_array()), cardinality(make_array(make_array()))
50345034
----
5035-
NULL 0
5035+
0 0
5036+
5037+
query II
5038+
select cardinality([]), cardinality([]::int[]) as with_cast
5039+
----
5040+
0 0
50365041

50375042
query II
50385043
select cardinality(arrow_cast(make_array(), 'LargeList(Int64)')), cardinality(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))'))
50395044
----
5040-
NULL 0
5045+
0 0
50415046

50425047
#TODO
50435048
#https://github.com/apache/datafusion/issues/9158
@@ -5046,6 +5051,12 @@ NULL 0
50465051
#----
50475052
#NULL 0
50485053

5054+
# cardinality of NULL arrays should return NULL
5055+
query II
5056+
select cardinality(NULL), cardinality(arrow_cast(NULL, 'LargeList(Int64)'))
5057+
----
5058+
NULL NULL
5059+
50495060
# cardinality with columns
50505061
query III
50515062
select cardinality(column1), cardinality(column2), cardinality(column3) from arrays;

0 commit comments

Comments
 (0)