Skip to content

Commit 95157ef

Browse files
Unparser drops ORDER BY alias when flattening Projection through SubqueryAlias (#21491)
## Which issue does this PR close? - Closes #21490. ## Rationale for this change When `plan_to_sql` encounters a `Projection → Sort → Projection → SubqueryAlias` plan where the outer Projection excludes a Sort column defined as an alias in the inner Projection (e.g. `Z AS c`), the function `rewrite_plan_for_sort_on_non_projected_fields` flattens the two Projections into one but only keeps the outer Projection's columns. This drops the alias definition, leaving `ORDER BY c` referencing a column that no longer exists in the generated SQL. ## What changes are included in this PR? In `rewrite_plan_for_sort_on_non_projected_fields` (`datafusion/sql/src/unparser/rewrite.rs`), when the inner Projection is trimmed to only the outer Projection's expressions, sort expressions that reference dropped aliases are now inlined to the underlying physical expression. For example, `ORDER BY c` becomes `ORDER BY t."Z"` when `c` was defined as `Z AS c` in the dropped inner Projection. ## Are these changes tested? Yes. A new regression test `test_sort_on_aliased_column_dropped_by_outer_projection` in `datafusion/sql/tests/cases/plan_to_sql.rs` constructs the exact plan shape that triggers the bug and asserts the correct SQL output: ```sql SELECT t."X" AS a, t."Y" AS b FROM phys_table AS t ORDER BY t."Z" DESC NULLS FIRST LIMIT 1 ``` All existing `plan_to_sql` tests (118 tests) continue to pass. ## Are there any user-facing changes? No API changes. Previously invalid SQL is now generated correctly. --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 643db7a commit 95157ef

2 files changed

Lines changed: 91 additions & 0 deletions

File tree

datafusion/sql/src/unparser/rewrite.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,48 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
254254
.map(|e| map.get(e).unwrap_or(e).clone())
255255
.collect::<Vec<_>>();
256256

257+
// The inner Projection may define aliases that the Sort references
258+
// but the outer Projection does not include. Since we are about to
259+
// replace the inner Projection's expressions with `new_exprs` (which
260+
// only contains the outer Projection's columns), those alias
261+
// definitions will be lost. To keep the Sort valid, rewrite any
262+
// sort expression that references a dropped alias so that it uses
263+
// the alias's underlying expression instead.
264+
let projected_aliases: HashSet<&str> = new_exprs
265+
.iter()
266+
.filter_map(|e| match e {
267+
Expr::Alias(alias) => Some(alias.name.as_str()),
268+
_ => None,
269+
})
270+
.collect();
271+
272+
let dropped_aliases: HashMap<String, Expr> = inner_p
273+
.expr
274+
.iter()
275+
.filter_map(|e| match e {
276+
Expr::Alias(alias)
277+
if !projected_aliases.contains(alias.name.as_str()) =>
278+
{
279+
Some((alias.name.clone(), (*alias.expr).clone()))
280+
}
281+
_ => None,
282+
})
283+
.collect();
284+
285+
if !dropped_aliases.is_empty() {
286+
for sort_expr in &mut sort.expr {
287+
let mut expr = sort_expr.expr.clone();
288+
while let Expr::Alias(alias) = expr {
289+
expr = *alias.expr;
290+
}
291+
if let Expr::Column(ref col) = expr
292+
&& let Some(underlying) = dropped_aliases.get(col.name())
293+
{
294+
sort_expr.expr = underlying.clone();
295+
}
296+
}
297+
}
298+
257299
inner_p.expr.clone_from(&new_exprs);
258300
sort.input = Arc::new(LogicalPlan::Projection(inner_p));
259301

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3008,6 +3008,55 @@ fn test_unparse_manual_join_with_subquery_aggregate() -> Result<()> {
30083008
Ok(())
30093009
}
30103010

3011+
/// Regression test for https://github.com/apache/datafusion/issues/21490
3012+
///
3013+
/// When the outer Projection excludes a Sort column whose definition only
3014+
/// exists as an alias in the inner Projection, the Unparser must inline the
3015+
/// underlying expression into ORDER BY rather than emitting the now-missing
3016+
/// alias name.
3017+
#[test]
3018+
fn test_sort_on_aliased_column_dropped_by_outer_projection() -> Result<()> {
3019+
let schema = Schema::new(vec![
3020+
Field::new("X", DataType::Utf8, true),
3021+
Field::new("Y", DataType::Utf8, true),
3022+
Field::new("Z", DataType::Utf8, true),
3023+
]);
3024+
3025+
// Build:
3026+
// Projection: [a, b] -- outer: excludes sort column "c"
3027+
// Sort: [c DESC, fetch=1] -- references alias "c"
3028+
// Projection: [X AS a, Y AS b, Z AS c] -- defines alias "c"
3029+
// SubqueryAlias: t
3030+
// TableScan: phys_table [X, Y, Z]
3031+
let plan = table_scan(Some("phys_table"), &schema, None)?
3032+
.alias("t")?
3033+
.project(vec![
3034+
Expr::Column(Column::new(Some(TableReference::bare("t")), "X")).alias("a"),
3035+
Expr::Column(Column::new(Some(TableReference::bare("t")), "Y")).alias("b"),
3036+
Expr::Column(Column::new(Some(TableReference::bare("t")), "Z")).alias("c"),
3037+
])?
3038+
.sort_with_limit(
3039+
vec![Expr::Column(Column::new_unqualified("c")).sort(false, true)],
3040+
Some(1),
3041+
)?
3042+
.project(vec![
3043+
Expr::Column(Column::new_unqualified("a")),
3044+
Expr::Column(Column::new_unqualified("b")),
3045+
])?
3046+
.build()?;
3047+
3048+
let unparser = Unparser::default();
3049+
let sql = unparser.plan_to_sql(&plan)?;
3050+
3051+
// ORDER BY must reference the physical column, not the dropped alias.
3052+
assert_snapshot!(
3053+
sql,
3054+
@r#"SELECT t."X" AS a, t."Y" AS b FROM phys_table AS t ORDER BY t."Z" DESC NULLS FIRST LIMIT 1"#
3055+
);
3056+
3057+
Ok(())
3058+
}
3059+
30113060
#[test]
30123061
fn snowflake_unnest_to_lateral_flatten_simple() -> Result<(), DataFusionError> {
30133062
let snowflake = SnowflakeDialect::new();

0 commit comments

Comments
 (0)