@@ -45,6 +45,7 @@ use crate::optimizer::ApplyOrder;
4545use crate :: simplify_expressions:: simplify_predicates;
4646use crate :: utils:: { has_all_column_refs, is_restrict_null_predicate} ;
4747use crate :: { OptimizerConfig , OptimizerRule } ;
48+ use datafusion_expr:: ExpressionPlacement ;
4849
4950/// Optimizer rule for pushing (moving) filter expressions down in a plan so
5051/// they are applied as early as possible.
@@ -1295,10 +1296,13 @@ fn rewrite_projection(
12951296 predicates : Vec < Expr > ,
12961297 mut projection : Projection ,
12971298) -> Result < ( Transformed < LogicalPlan > , Option < Expr > ) > {
1298- // A projection is filter-commutable if it do not contain volatile predicates or contain volatile
1299- // predicates that are not used in the filter. However, we should re-writes all predicate expressions.
1300- // collect projection.
1301- let ( volatile_map, non_volatile_map) : ( HashMap < _ , _ > , HashMap < _ , _ > ) = projection
1299+ // Partition projection expressions into non-pushable vs pushable.
1300+ // Non-pushable expressions are volatile (must not be duplicated) or
1301+ // MoveTowardsLeafNodes (cheap expressions like get_field where re-inlining
1302+ // into a filter causes optimizer instability — ExtractLeafExpressions will
1303+ // undo the push-down, creating an infinite loop that runs until the
1304+ // iteration limit is hit).
1305+ let ( non_pushable_map, pushable_map) : ( HashMap < _ , _ > , HashMap < _ , _ > ) = projection
13021306 . schema
13031307 . iter ( )
13041308 . zip ( projection. expr . iter ( ) )
@@ -1308,12 +1312,15 @@ fn rewrite_projection(
13081312
13091313 ( qualified_name ( qualifier, field. name ( ) ) , expr)
13101314 } )
1311- . partition ( |( _, value) | value. is_volatile ( ) ) ;
1315+ . partition ( |( _, value) | {
1316+ value. is_volatile ( )
1317+ || value. placement ( ) == ExpressionPlacement :: MoveTowardsLeafNodes
1318+ } ) ;
13121319
13131320 let mut push_predicates = vec ! [ ] ;
13141321 let mut keep_predicates = vec ! [ ] ;
13151322 for expr in predicates {
1316- if contain ( & expr, & volatile_map ) {
1323+ if contain ( & expr, & non_pushable_map ) {
13171324 keep_predicates. push ( expr) ;
13181325 } else {
13191326 push_predicates. push ( expr) ;
@@ -1325,7 +1332,7 @@ fn rewrite_projection(
13251332 // re-write all filters based on this projection
13261333 // E.g. in `Filter: b\n Projection: a > 1 as b`, we can swap them, but the filter must be "a > 1"
13271334 let new_filter = LogicalPlan :: Filter ( Filter :: try_new (
1328- replace_cols_by_name ( expr, & non_volatile_map ) ?,
1335+ replace_cols_by_name ( expr, & pushable_map ) ?,
13291336 std:: mem:: take ( & mut projection. input ) ,
13301337 ) ?) ;
13311338
@@ -1336,7 +1343,10 @@ fn rewrite_projection(
13361343 conjunction ( keep_predicates) ,
13371344 ) )
13381345 }
1339- None => Ok ( ( Transformed :: no ( LogicalPlan :: Projection ( projection) ) , None ) ) ,
1346+ None => Ok ( (
1347+ Transformed :: no ( LogicalPlan :: Projection ( projection) ) ,
1348+ conjunction ( keep_predicates) ,
1349+ ) ) ,
13401350 }
13411351}
13421352
@@ -1446,6 +1456,7 @@ mod tests {
14461456 use crate :: assert_optimized_plan_eq_snapshot;
14471457 use crate :: optimizer:: Optimizer ;
14481458 use crate :: simplify_expressions:: SimplifyExpressions ;
1459+ use crate :: test:: udfs:: leaf_udf_expr;
14491460 use crate :: test:: * ;
14501461 use datafusion_expr:: test:: function_stub:: sum;
14511462 use insta:: assert_snapshot;
@@ -4221,4 +4232,68 @@ mod tests {
42214232 "
42224233 )
42234234 }
4235+
4236+ /// Test that filters are NOT pushed through MoveTowardsLeafNodes projections.
4237+ /// These are cheap expressions (like get_field) where re-inlining into a filter
4238+ /// has no benefit and causes optimizer instability — ExtractLeafExpressions will
4239+ /// undo the push-down, creating an infinite loop that runs until the iteration
4240+ /// limit is hit.
4241+ #[ test]
4242+ fn filter_not_pushed_through_move_towards_leaves_projection ( ) -> Result < ( ) > {
4243+ let table_scan = test_table_scan ( ) ?;
4244+
4245+ // Create a projection with a MoveTowardsLeafNodes expression
4246+ let proj = LogicalPlanBuilder :: from ( table_scan)
4247+ . project ( vec ! [
4248+ leaf_udf_expr( col( "a" ) ) . alias( "val" ) ,
4249+ col( "b" ) ,
4250+ col( "c" ) ,
4251+ ] ) ?
4252+ . build ( ) ?;
4253+
4254+ // Put a filter on the MoveTowardsLeafNodes column
4255+ let plan = LogicalPlanBuilder :: from ( proj)
4256+ . filter ( col ( "val" ) . gt ( lit ( 150i64 ) ) ) ?
4257+ . build ( ) ?;
4258+
4259+ // Filter should NOT be pushed through — val maps to a MoveTowardsLeafNodes expr
4260+ assert_optimized_plan_equal ! (
4261+ plan,
4262+ @r"
4263+ Filter: val > Int64(150)
4264+ Projection: leaf_udf(test.a) AS val, test.b, test.c
4265+ TableScan: test
4266+ "
4267+ )
4268+ }
4269+
4270+ /// Test mixed predicates: Column predicate pushed, MoveTowardsLeafNodes kept.
4271+ #[ test]
4272+ fn filter_mixed_predicates_partial_push ( ) -> Result < ( ) > {
4273+ let table_scan = test_table_scan ( ) ?;
4274+
4275+ // Create a projection with both MoveTowardsLeafNodes and Column expressions
4276+ let proj = LogicalPlanBuilder :: from ( table_scan)
4277+ . project ( vec ! [
4278+ leaf_udf_expr( col( "a" ) ) . alias( "val" ) ,
4279+ col( "b" ) ,
4280+ col( "c" ) ,
4281+ ] ) ?
4282+ . build ( ) ?;
4283+
4284+ // Filter with both: val > 150 (MoveTowardsLeafNodes) AND b > 5 (Column)
4285+ let plan = LogicalPlanBuilder :: from ( proj)
4286+ . filter ( col ( "val" ) . gt ( lit ( 150i64 ) ) . and ( col ( "b" ) . gt ( lit ( 5i64 ) ) ) ) ?
4287+ . build ( ) ?;
4288+
4289+ // val > 150 should be kept above, b > 5 should be pushed through
4290+ assert_optimized_plan_equal ! (
4291+ plan,
4292+ @r"
4293+ Filter: val > Int64(150)
4294+ Projection: leaf_udf(test.a) AS val, test.b, test.c
4295+ TableScan: test, full_filters=[test.b > Int64(5)]
4296+ "
4297+ )
4298+ }
42244299}
0 commit comments