Skip to content

Commit 1fbbba5

Browse files
buraksennJefffrey
andauthored
feat: support '>', '<', '>=', '<=', '<>' in all operator (#21416)
## Which issue does this PR close? - Closes #2547 ## Rationale for this change Related with #20830 all operator does not support operators above. ## What changes are included in this PR? Adds support for other expressions and add tests This is a question actually I've checked behaviors of Postgresql and Duckdb about null semantics and continued with the Postgresql behavior. However, I'm not sure if we want this so also put Duckdb outputs. It would be great to have feedback on this | Query | PostgreSQL | This PR | DuckDB | |---|---|---|---| | `5 = ALL(NULL::INT[])` | `NULL` | `NULL` | `true` | | `5 <> ALL(NULL::INT[])` | `NULL` | `NULL` | `true` | | `5 > ALL(NULL::INT[])` | `NULL` | `NULL` | `true` | | `5 < ALL(NULL::INT[])` | `NULL` | `NULL` | `true` | | `5 >= ALL(NULL::INT[])` | `NULL` | `NULL` | `true` | | `5 <= ALL(NULL::INT[])` | `NULL` | `NULL` | `true` | ## Are these changes tested? Added slt tests for this and they all pass ## Are there any user-facing changes? Yes user's can now use all operator with this new expressions --------- Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
1 parent b75df6f commit 1fbbba5

2 files changed

Lines changed: 287 additions & 2 deletions

File tree

datafusion/sql/src/expr/mod.rs

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ use datafusion_expr::{
4040
};
4141

4242
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
43-
use datafusion_functions_nested::expr_fn::{array_has, array_max, array_min};
43+
use datafusion_functions_nested::expr_fn::{
44+
array_has, array_max, array_min, array_position, cardinality,
45+
};
4446

4547
mod binary_op;
4648
mod function;
@@ -635,7 +637,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
635637
schema,
636638
planner_context,
637639
),
638-
_ => not_impl_err!("ALL only supports subquery comparison currently"),
640+
_ => {
641+
let left_expr = self.sql_to_expr(*left, schema, planner_context)?;
642+
let right_expr = self.sql_to_expr(*right, schema, planner_context)?;
643+
plan_all_op(&left_expr, &right_expr, &compare_op)
644+
}
639645
},
640646
#[expect(deprecated)]
641647
SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard {
@@ -1297,6 +1303,64 @@ fn plan_any_op(
12971303
}
12981304
}
12991305

1306+
/// Plans `needle <compare_op> ALL(haystack)` with proper SQL NULL semantics.
1307+
///
1308+
/// CASE/WHEN structure:
1309+
/// WHEN arr IS NULL → NULL
1310+
/// WHEN empty → TRUE
1311+
/// WHEN lhs IS NULL → NULL
1312+
/// WHEN decisive_condition → FALSE
1313+
/// WHEN has_nulls → NULL
1314+
/// ELSE → TRUE
1315+
fn plan_all_op(
1316+
needle: &Expr,
1317+
haystack: &Expr,
1318+
compare_op: &BinaryOperator,
1319+
) -> Result<Expr> {
1320+
let null_arr_check = haystack.clone().is_null();
1321+
let empty_check = cardinality(haystack.clone()).eq(lit(0u64));
1322+
let null_lhs_check = needle.clone().is_null();
1323+
// DataFusion's array_position uses is_null() checks internally (not equality),
1324+
// so it can locate NULL elements even though NULL = NULL is NULL in standard SQL.
1325+
let has_nulls =
1326+
array_position(haystack.clone(), lit(ScalarValue::Null), lit(1i64)).is_not_null();
1327+
1328+
let decisive_condition = match compare_op {
1329+
BinaryOperator::NotEq => array_has(haystack.clone(), needle.clone()),
1330+
BinaryOperator::Eq => {
1331+
let all_equal = array_min(haystack.clone())
1332+
.eq(needle.clone())
1333+
.and(array_max(haystack.clone()).eq(needle.clone()));
1334+
Expr::Not(Box::new(all_equal))
1335+
}
1336+
BinaryOperator::Gt => {
1337+
Expr::Not(Box::new(needle.clone().gt(array_max(haystack.clone()))))
1338+
}
1339+
BinaryOperator::Lt => {
1340+
Expr::Not(Box::new(needle.clone().lt(array_min(haystack.clone()))))
1341+
}
1342+
BinaryOperator::GtEq => {
1343+
Expr::Not(Box::new(needle.clone().gt_eq(array_max(haystack.clone()))))
1344+
}
1345+
BinaryOperator::LtEq => {
1346+
Expr::Not(Box::new(needle.clone().lt_eq(array_min(haystack.clone()))))
1347+
}
1348+
_ => {
1349+
return plan_err!(
1350+
"Unsupported AllOp: '{compare_op}', only '=', '<>', '>', '<', '>=', '<=' are supported"
1351+
);
1352+
}
1353+
};
1354+
1355+
let null_bool = lit(ScalarValue::Boolean(None));
1356+
when(null_arr_check, null_bool.clone())
1357+
.when(empty_check, lit(true))
1358+
.when(null_lhs_check, null_bool.clone())
1359+
.when(decisive_condition, lit(false))
1360+
.when(has_nulls, null_bool)
1361+
.otherwise(lit(true))
1362+
}
1363+
13001364
#[cfg(test)]
13011365
mod tests {
13021366
use std::collections::HashMap;
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
## all operator
19+
20+
# = ALL: true when all elements equal val
21+
query B
22+
select 5 = ALL(make_array(5, 5, 5));
23+
----
24+
true
25+
26+
query B
27+
select 5 = ALL(make_array(5, 5, 3));
28+
----
29+
false
30+
31+
# <> ALL: true when val differs from every element
32+
query B
33+
select 5 <> ALL(make_array(1, 2, 3));
34+
----
35+
true
36+
37+
query B
38+
select 5 <> ALL(make_array(1, 2, 5));
39+
----
40+
false
41+
42+
# > ALL: true when val greater than all elements
43+
query B
44+
select 10 > ALL(make_array(1, 2, 3));
45+
----
46+
true
47+
48+
query B
49+
select 3 > ALL(make_array(1, 2, 3));
50+
----
51+
false
52+
53+
# < ALL: true when val less than all elements
54+
query B
55+
select 0 < ALL(make_array(1, 2, 3));
56+
----
57+
true
58+
59+
query B
60+
select 2 < ALL(make_array(1, 2, 3));
61+
----
62+
false
63+
64+
# >= ALL: true when val >= all elements
65+
query B
66+
select 5 >= ALL(make_array(1, 2, 5));
67+
----
68+
true
69+
70+
query B
71+
select 4 >= ALL(make_array(1, 2, 5));
72+
----
73+
false
74+
75+
# <= ALL: true when val <= all elements
76+
query B
77+
select 1 <= ALL(make_array(1, 2, 5));
78+
----
79+
true
80+
81+
query B
82+
select 2 <= ALL(make_array(1, 2, 5));
83+
----
84+
false
85+
86+
# Empty arrays: all operators return TRUE (vacuous truth)
87+
query B
88+
select 5 = ALL(arrow_cast(make_array(), 'List(Int64)'));
89+
----
90+
true
91+
92+
query B
93+
select 5 <> ALL(arrow_cast(make_array(), 'List(Int64)'));
94+
----
95+
true
96+
97+
query B
98+
select 5 > ALL(arrow_cast(make_array(), 'List(Int64)'));
99+
----
100+
true
101+
102+
query B
103+
select 5 < ALL(arrow_cast(make_array(), 'List(Int64)'));
104+
----
105+
true
106+
107+
query B
108+
select 5 >= ALL(arrow_cast(make_array(), 'List(Int64)'));
109+
----
110+
true
111+
112+
query B
113+
select 5 <= ALL(arrow_cast(make_array(), 'List(Int64)'));
114+
----
115+
true
116+
117+
# NULL LHS with empty array returns TRUE (vacuous truth)
118+
query B
119+
select NULL = ALL(arrow_cast(make_array(), 'List(Int64)'));
120+
----
121+
true
122+
123+
# NULL LHS with non-empty array returns NULL
124+
query B
125+
select NULL = ALL(make_array(1, 2, 3));
126+
----
127+
NULL
128+
129+
query B
130+
select NULL > ALL(make_array(1, 2, 3));
131+
----
132+
NULL
133+
134+
query B
135+
select NULL <> ALL(make_array(1, 2, 3));
136+
----
137+
NULL
138+
139+
# All-NULL arrays: returns NULL
140+
query B
141+
select 5 = ALL(make_array(NULL::INT, NULL::INT));
142+
----
143+
NULL
144+
145+
query B
146+
select 5 <> ALL(make_array(NULL::INT, NULL::INT));
147+
----
148+
NULL
149+
150+
query B
151+
select 5 > ALL(make_array(NULL::INT, NULL::INT));
152+
----
153+
NULL
154+
155+
query B
156+
select 5 < ALL(make_array(NULL::INT, NULL::INT));
157+
----
158+
NULL
159+
160+
query B
161+
select 5 >= ALL(make_array(NULL::INT, NULL::INT));
162+
----
163+
NULL
164+
165+
query B
166+
select 5 <= ALL(make_array(NULL::INT, NULL::INT));
167+
----
168+
NULL
169+
170+
# Mixed NULL + non-NULL (non-NULL elements satisfy, but NULLs present → NULL)
171+
query B
172+
select 5 > ALL(make_array(3, NULL));
173+
----
174+
NULL
175+
176+
query B
177+
select 5 >= ALL(make_array(5, NULL));
178+
----
179+
NULL
180+
181+
query B
182+
select 1 < ALL(make_array(3, NULL));
183+
----
184+
NULL
185+
186+
query B
187+
select 1 <= ALL(make_array(1, NULL));
188+
----
189+
NULL
190+
191+
# Mixed NULL + non-NULL (not satisfying condition → FALSE wins over NULL)
192+
query B
193+
select 5 > ALL(make_array(6, NULL));
194+
----
195+
false
196+
197+
query B
198+
select 5 < ALL(make_array(3, NULL));
199+
----
200+
false
201+
202+
query B
203+
select 5 = ALL(make_array(5, 3, NULL));
204+
----
205+
false
206+
207+
# NULL array input returns NULL
208+
query B
209+
select 5 = ALL(NULL::INT[]);
210+
----
211+
NULL
212+
213+
query B
214+
select 5 > ALL(NULL::INT[]);
215+
----
216+
NULL
217+
218+
query B
219+
select 5 < ALL(NULL::INT[]);
220+
----
221+
NULL

0 commit comments

Comments
 (0)