@@ -51,7 +51,6 @@ use datafusion_execution::TaskContext;
5151use datafusion_expr:: ExpressionPlacement ;
5252use datafusion_physical_expr:: equivalence:: ProjectionMapping ;
5353use datafusion_physical_expr:: projection:: Projector ;
54- use datafusion_physical_expr:: utils:: collect_columns;
5554use datafusion_physical_expr_common:: physical_expr:: { PhysicalExprRef , fmt_sql} ;
5655use datafusion_physical_expr_common:: sort_expr:: {
5756 LexOrdering , LexRequirement , PhysicalSortExpr ,
@@ -1082,14 +1081,11 @@ fn try_unifying_projections(
10821081/// Collect all column indices from the given projection expressions.
10831082fn collect_column_indices ( exprs : & [ ProjectionExpr ] ) -> Vec < usize > {
10841083 // Collect column indices in a deterministic order that preserves the
1085- // projection's column ordering when possible. For simple Column
1086- // expressions, we use the column index directly (preserving the
1087- // projection's desired output order). For complex expressions with
1088- // multiple column references, we sort indices for determinism since
1089- // collect_columns returns a HashSet with non-deterministic iteration.
1084+ // projection's column ordering. For simple Column expressions, we use
1085+ // the column index directly. For complex expressions, we walk the
1086+ // expression tree to collect column references in traversal order.
10901087 // This allows the embedded projection to match the desired output
1091- // column order for simple column reorderings, avoiding a residual
1092- // ProjectionExec.
1088+ // column order, avoiding a residual ProjectionExec.
10931089 let mut seen = std:: collections:: HashSet :: new ( ) ;
10941090 let mut indices = Vec :: new ( ) ;
10951091 for proj_expr in exprs {
@@ -1099,18 +1095,20 @@ fn collect_column_indices(exprs: &[ProjectionExpr]) -> Vec<usize> {
10991095 indices. push ( col. index ( ) ) ;
11001096 }
11011097 } else {
1102- // Complex expression: collect all referenced columns in sorted
1103- // order for determinism.
1104- let mut expr_indices: Vec < usize > = collect_columns ( & proj_expr. expr )
1105- . into_iter ( )
1106- . map ( |c| c. index ( ) )
1107- . collect ( ) ;
1108- expr_indices. sort ( ) ;
1109- for idx in expr_indices {
1110- if seen. insert ( idx) {
1111- indices. push ( idx) ;
1112- }
1113- }
1098+ // Complex expression: collect all referenced columns in
1099+ // expression tree traversal order (deterministic) to preserve
1100+ // the natural ordering of column references.
1101+ proj_expr
1102+ . expr
1103+ . apply ( |expr| {
1104+ if let Some ( col) = expr. as_any ( ) . downcast_ref :: < Column > ( ) {
1105+ if seen. insert ( col. index ( ) ) {
1106+ indices. push ( col. index ( ) ) ;
1107+ }
1108+ }
1109+ Ok ( TreeNodeRecursion :: Continue )
1110+ } )
1111+ . expect ( "closure always returns OK" ) ;
11141112 }
11151113 }
11161114 indices
@@ -1226,7 +1224,8 @@ mod tests {
12261224 expr,
12271225 alias : "b-(1+a)" . to_string ( ) ,
12281226 } ] ) ;
1229- assert_eq ! ( column_indices, vec![ 1 , 7 ] ) ;
1227+ // Tree traversal order: b@7 is visited before a@1
1228+ assert_eq ! ( column_indices, vec![ 7 , 1 ] ) ;
12301229 Ok ( ( ) )
12311230 }
12321231
0 commit comments