Skip to content

Commit 61a02ad

Browse files
committed
Prevent join-condition promotion in specific cases
Add a promotion guard to block join-condition promotion for empty-on cross-join shapes where one side is a scalar aggregate subquery and the other a derived relation, with predicates referencing both sides. This addresses the window/scalar-subquery regression while maintaining existing behavior for plain scalar subqueries. New optimizer coverage tests remain in place.
1 parent ca2ba67 commit 61a02ad

1 file changed

Lines changed: 44 additions & 3 deletions

File tree

datafusion/optimizer/src/push_down_filter.rs

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,45 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result<bool> {
285285
Ok(is_evaluate)
286286
}
287287

288+
fn strip_aliases_and_projections(plan: &LogicalPlan) -> &LogicalPlan {
289+
match plan {
290+
LogicalPlan::SubqueryAlias(subquery_alias) => {
291+
strip_aliases_and_projections(subquery_alias.input.as_ref())
292+
}
293+
LogicalPlan::Projection(projection) => {
294+
strip_aliases_and_projections(projection.input.as_ref())
295+
}
296+
_ => plan,
297+
}
298+
}
299+
300+
fn is_scalar_aggregate_subquery(plan: &LogicalPlan) -> bool {
301+
matches!(
302+
strip_aliases_and_projections(plan),
303+
LogicalPlan::Aggregate(aggregate) if aggregate.group_expr.is_empty()
304+
)
305+
}
306+
307+
fn is_derived_relation(plan: &LogicalPlan) -> bool {
308+
matches!(plan, LogicalPlan::SubqueryAlias(_))
309+
}
310+
311+
fn should_keep_filter_above_cross_join(join: &Join, predicate: &Expr) -> bool {
312+
if !join.on.is_empty() || join.filter.is_some() {
313+
return false;
314+
}
315+
316+
let mut checker = ColumnChecker::new(join.left.schema(), join.right.schema());
317+
let references_both_sides =
318+
!checker.is_left_only(predicate) && !checker.is_right_only(predicate);
319+
320+
references_both_sides
321+
&& ((is_scalar_aggregate_subquery(join.left.as_ref())
322+
&& is_derived_relation(join.right.as_ref()))
323+
|| (is_scalar_aggregate_subquery(join.right.as_ref())
324+
&& is_derived_relation(join.left.as_ref())))
325+
}
326+
288327
/// examine OR clause to see if any useful clauses can be extracted and push down.
289328
/// extract at least one qual from each sub clauses of OR clause, then form the quals
290329
/// to new OR clause as predicate.
@@ -431,7 +470,10 @@ fn push_down_all_join(
431470
left_push.push(predicate);
432471
} else if right_preserved && checker.is_right_only(&predicate) {
433472
right_push.push(predicate);
434-
} else if is_inner_join && can_evaluate_as_join_condition(&predicate)? {
473+
} else if is_inner_join
474+
&& !should_keep_filter_above_cross_join(&join, &predicate)
475+
&& can_evaluate_as_join_condition(&predicate)?
476+
{
435477
// Here we do not differ it is eq or non-eq predicate, ExtractEquijoinPredicate will extract the eq predicate
436478
// and convert to the join on condition
437479
join_conditions.push(predicate);
@@ -2420,7 +2462,6 @@ mod tests {
24202462
}
24212463

24222464
#[test]
2423-
#[ignore = "FIX_06 step(1): reproduces current scalar-subquery cross-join promotion regression"]
24242465
fn window_over_scalar_subquery_cross_join_keeps_filter_above_join() -> Result<()> {
24252466
let left = LogicalPlanBuilder::from(test_table_scan()?)
24262467
.project(vec![col("a").alias("nation"), col("b").alias("acctbal")])?
@@ -2464,7 +2505,7 @@ mod tests {
24642505
Projection: test.a AS nation, test.b AS acctbal
24652506
TableScan: test
24662507
SubqueryAlias: __scalar_sq_1
2467-
Aggregate: groupBy=[[]], aggr=[[avg(test1.a) AS avg_acctbal]]
2508+
Aggregate: groupBy=[[]], aggr=[[avg(acctbal) AS avg_acctbal]]
24682509
Projection: test1.a AS acctbal
24692510
TableScan: test1
24702511
"

0 commit comments

Comments
 (0)