From 75cd4d79021e0a0bec6e245e1912b64202e8c209 Mon Sep 17 00:00:00 2001 From: Yonatan Striem-Amit Date: Sun, 22 Mar 2026 00:02:39 -0400 Subject: [PATCH 1/3] fix: preserve subquery structure when unparsing SubqueryAlias over Aggregate When the SQL unparser encountered a SubqueryAlias node whose direct child was an Aggregate (or other clause-building plan like Window, Sort, Limit, Union), it would flatten the subquery into a simple table alias, losing the aggregate entirely. For example, a plan representing: SELECT j1.col FROM j1 JOIN (SELECT max(id) AS m FROM j2) AS b ON j1.id = b.m would unparse to: SELECT j1.col FROM j1 INNER JOIN j2 AS b ON j1.id = b.m dropping the MAX aggregate and the subquery. Root cause: the SubqueryAlias handler in select_to_sql_recursively would call subquery_alias_inner_query_and_columns (which only unwraps Projection children) and unparse_table_scan_pushdown (which only handles TableScan/SubqueryAlias/Projection). When both returned nothing useful for an Aggregate child, the code recursed directly into the Aggregate, merging its GROUP BY into the outer SELECT instead of wrapping it in a derived subquery. The fix adds an early check: if the SubqueryAlias's direct child is a plan type that builds its own SELECT clauses (Aggregate, Window, Sort, Limit, Union), emit it as a derived subquery via self.derive() with the alias always attached, rather than falling through to the recursive path that would flatten it. --- datafusion/sql/src/unparser/plan.rs | 37 ++++++++++++++++ datafusion/sql/tests/cases/plan_to_sql.rs | 54 ++++++++++++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index ca8dfa431b4f5..73950b961e8ba 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -828,6 +828,27 @@ impl Unparser<'_> { Some(plan_alias.alias.clone()), select.already_projected(), )?; + + // If the SubqueryAlias directly wraps a plan that builds its + // own SELECT clauses (e.g. Aggregate adds GROUP BY, Window adds + // OVER, etc.) and unparse_table_scan_pushdown couldn't reduce it, + // we must emit a derived subquery: (SELECT ...) AS alias. + // Without this, the recursive handler would merge those clauses + // into the outer SELECT, losing the subquery structure entirely. + if unparsed_table_scan.is_none() + && Self::requires_derived_subquery(plan_alias.input.as_ref()) + { + return self.derive( + &plan_alias.input, + relation, + Some(self.new_table_alias( + plan_alias.alias.table().to_string(), + columns, + )), + false, + ); + } + // if the child plan is a TableScan with pushdown operations, we don't need to // create an additional subquery for it if !select.already_projected() && unparsed_table_scan.is_none() { @@ -1060,6 +1081,22 @@ impl Unparser<'_> { scan.projection.is_some() || !scan.filters.is_empty() || scan.fetch.is_some() } + /// Returns true if a plan, when used as the direct child of a SubqueryAlias, + /// must be emitted as a derived subquery `(SELECT ...) AS alias`. + /// + /// Plans like Aggregate or Window build their own SELECT clauses (GROUP BY, + /// window functions). + fn requires_derived_subquery(plan: &LogicalPlan) -> bool { + matches!( + plan, + LogicalPlan::Aggregate(_) + | LogicalPlan::Window(_) + | LogicalPlan::Sort(_) + | LogicalPlan::Limit(_) + | LogicalPlan::Union(_) + ) + } + /// Try to unparse a table scan with pushdown operations into a new subquery plan. /// If the table scan is without any pushdown operations, return None. fn unparse_table_scan_pushdown( diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index aefb404ba4106..24f9226636455 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -23,7 +23,7 @@ use datafusion_common::{ }; use datafusion_expr::expr::{WindowFunction, WindowFunctionParams}; use datafusion_expr::test::function_stub::{ - count_udaf, max_udaf, min_udaf, sum, sum_udaf, + count_udaf, max, max_udaf, min_udaf, sum, sum_udaf, }; use datafusion_expr::{ EmptyRelation, Expr, Extension, LogicalPlan, LogicalPlanBuilder, Union, @@ -2893,3 +2893,55 @@ fn test_json_access_3() { @r#"SELECT (j1.j1_string : 'field.inner1[''inner2'']') FROM j1"# ); } + +/// Test that unparsing a manually constructed join with a subquery aggregate +/// preserves the MAX aggregate function. +/// +/// Builds the equivalent of: +/// SELECT j1.j1_string FROM j1 +/// JOIN (SELECT max(j2_id) AS max_id FROM j2) AS b +/// ON j1.j1_id = b.max_id +#[test] +fn test_unparse_manual_join_with_subquery_aggregate() -> Result<()> { + let context = MockContextProvider { + state: MockSessionState::default(), + }; + let j1_schema = context + .get_table_source(TableReference::bare("j1"))? + .schema(); + let j2_schema = context + .get_table_source(TableReference::bare("j2"))? + .schema(); + + // Build the right side: SELECT max(j2_id) AS max_id FROM j2 + let right_scan = table_scan(Some("j2"), &j2_schema, None)?.build()?; + let right_agg = LogicalPlanBuilder::from(right_scan) + .aggregate(vec![] as Vec, vec![max(col("j2.j2_id")).alias("max_id")])? + .build()?; + let right_subquery = subquery_alias(right_agg, "b")?; + + // Build the full plan: SELECT j1.j1_string FROM j1 JOIN (...) AS b ON j1.j1_id = b.max_id + let left_scan = table_scan(Some("j1"), &j1_schema, None)?.build()?; + let plan = LogicalPlanBuilder::from(left_scan) + .join( + right_subquery, + datafusion_expr::JoinType::Inner, + ( + vec![Column::from_qualified_name("j1.j1_id")], + vec![Column::from_qualified_name("b.max_id")], + ), + None, + )? + .project(vec![col("j1.j1_string")])? + .build()?; + + let unparser = Unparser::default(); + let sql = unparser.plan_to_sql(&plan)?.to_string(); + let sql_upper = sql.to_uppercase(); + assert!( + sql_upper.contains("MAX("), + "Unparsed SQL should preserve the MAX aggregate function call, got: {sql}" + ); + + Ok(()) +} From 0d223f9fea321ca4667aca57c5bca483cc201aa9 Mon Sep 17 00:00:00 2001 From: Yonatan Striem-Amit Date: Fri, 27 Mar 2026 12:41:57 -0400 Subject: [PATCH 2/3] fix: preserve subquery structure when unparsing SubqueryAlias over Aggregate When the SQL unparser encountered a SubqueryAlias node whose direct child was an Aggregate (or other clause-building plan like Window, Sort, Limit, Union), it would flatten the subquery into a simple table alias, losing the aggregate entirely. For example, a plan representing: SELECT j1.col FROM j1 JOIN (SELECT max(id) AS m FROM j2) AS b ON j1.id = b.m would unparse to: SELECT j1.col FROM j1 INNER JOIN j2 AS b ON j1.id = b.m dropping the MAX aggregate and the subquery. Root cause: the SubqueryAlias handler in select_to_sql_recursively would call subquery_alias_inner_query_and_columns (which only unwraps Projection children) and unparse_table_scan_pushdown (which only handles TableScan/SubqueryAlias/Projection). When both returned nothing useful for an Aggregate child, the code recursed directly into the Aggregate, merging its GROUP BY into the outer SELECT instead of wrapping it in a derived subquery. The fix adds an early check: if the SubqueryAlias's direct child is a plan type that builds its own SELECT clauses (Aggregate, Window, Sort, Limit, Union), emit it as a derived subquery via self.derive() with the alias always attached, rather than falling through to the recursive path that would flatten it. Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/sql/src/unparser/plan.rs | 2 +- datafusion/sql/tests/cases/plan_to_sql.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 73950b961e8ba..0b9f24bc87327 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -1085,7 +1085,7 @@ impl Unparser<'_> { /// must be emitted as a derived subquery `(SELECT ...) AS alias`. /// /// Plans like Aggregate or Window build their own SELECT clauses (GROUP BY, - /// window functions). + /// window functions). fn requires_derived_subquery(plan: &LogicalPlan) -> bool { matches!( plan, diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 24f9226636455..db94e32c8d913 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -2916,7 +2916,10 @@ fn test_unparse_manual_join_with_subquery_aggregate() -> Result<()> { // Build the right side: SELECT max(j2_id) AS max_id FROM j2 let right_scan = table_scan(Some("j2"), &j2_schema, None)?.build()?; let right_agg = LogicalPlanBuilder::from(right_scan) - .aggregate(vec![] as Vec, vec![max(col("j2.j2_id")).alias("max_id")])? + .aggregate( + vec![] as Vec, + vec![max(col("j2.j2_id")).alias("max_id")], + )? .build()?; let right_subquery = subquery_alias(right_agg, "b")?; From 42f7f64e6a9cd400dc03e10a498bc32ffd67be57 Mon Sep 17 00:00:00 2001 From: Yonatan Striem-Amit Date: Fri, 3 Apr 2026 22:21:44 -0400 Subject: [PATCH 3/3] Fixes in PR --- datafusion/sql/src/unparser/plan.rs | 33 +++++++++++++++++++---- datafusion/sql/tests/cases/plan_to_sql.rs | 14 ++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 0b9f24bc87327..c0b77b9dba88a 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -829,17 +829,40 @@ impl Unparser<'_> { select.already_projected(), )?; - // If the SubqueryAlias directly wraps a plan that builds its - // own SELECT clauses (e.g. Aggregate adds GROUP BY, Window adds + // If the (possibly rewritten) inner plan builds its own + // SELECT clauses (e.g. Aggregate adds GROUP BY, Window adds // OVER, etc.) and unparse_table_scan_pushdown couldn't reduce it, // we must emit a derived subquery: (SELECT ...) AS alias. // Without this, the recursive handler would merge those clauses // into the outer SELECT, losing the subquery structure entirely. - if unparsed_table_scan.is_none() - && Self::requires_derived_subquery(plan_alias.input.as_ref()) + if unparsed_table_scan.is_none() && Self::requires_derived_subquery(plan) { + // When the dialect does not support column aliases in + // table aliases (e.g. SQLite), inject the aliases into + // the inner projection before wrapping as a derived + // subquery. + if !columns.is_empty() + && !self.dialect.supports_column_alias_in_table_alias() + { + let Ok(rewritten_plan) = + inject_column_aliases_into_subquery(plan.clone(), columns) + else { + return internal_err!( + "Failed to transform SubqueryAlias plan" + ); + }; + return self.derive( + &rewritten_plan, + relation, + Some(self.new_table_alias( + plan_alias.alias.table().to_string(), + vec![], + )), + false, + ); + } return self.derive( - &plan_alias.input, + plan, relation, Some(self.new_table_alias( plan_alias.alias.table().to_string(), diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index db94e32c8d913..9aff555825776 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -2894,6 +2894,20 @@ fn test_json_access_3() { ); } +/// Roundtrip test for a subquery aggregate with column aliases. +/// Ensures that `subquery_alias_inner_query_and_columns` unwrapping +/// a Projection -> Aggregate still triggers the derived-subquery path. +#[test] +fn roundtrip_subquery_aggregate_with_column_alias() -> Result<(), DataFusionError> { + roundtrip_statement_with_dialect_helper!( + sql: "SELECT id FROM (SELECT max(j1_id) FROM j1) AS c(id)", + parser_dialect: GenericDialect {}, + unparser_dialect: UnparserDefaultDialect {}, + expected: @"SELECT c.id FROM (SELECT max(j1.j1_id) FROM j1) AS c (id)", + ); + Ok(()) +} + /// Test that unparsing a manually constructed join with a subquery aggregate /// preserves the MAX aggregate function. ///