From bbe2324c667bcb80ffa4b29315c40f44c2ddeb87 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Mon, 30 Mar 2026 10:34:58 -0400 Subject: [PATCH] Revert "Fix/support duplicate column names #6543 (#21126)" This reverts commit aadae6bda34f8b5bb33ef7d70b44404e5730e2ea. --- datafusion/sql/src/select.rs | 9 +- datafusion/sql/src/utils.rs | 33 ---- datafusion/sql/tests/sql_integration.rs | 54 ++---- .../sqllogictest/test_files/aggregate.slt | 19 +- .../test_files/duplicate_column_alias.slt | 175 ------------------ datafusion/sqllogictest/test_files/unnest.slt | 10 +- 6 files changed, 35 insertions(+), 265 deletions(-) delete mode 100644 datafusion/sqllogictest/test_files/duplicate_column_alias.slt diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 52aabff925014..6aa7ff599d0c5 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -23,9 +23,8 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use crate::query::to_order_by_exprs_with_select; use crate::utils::{ CheckColumnsMustReferenceAggregatePurpose, CheckColumnsSatisfyExprsPurpose, - check_columns_satisfy_exprs, deduplicate_select_expr_names, extract_aliases, - rebase_expr, resolve_aliases_to_exprs, resolve_columns, resolve_positions_to_exprs, - rewrite_recursive_unnests_bottom_up, + check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs, + resolve_columns, resolve_positions_to_exprs, rewrite_recursive_unnests_bottom_up, }; use datafusion_common::error::DataFusionErrorBuilder; @@ -110,10 +109,6 @@ impl SqlToRel<'_, S> { planner_context, )?; - // Auto-suffix duplicate expression names (e.g. cov, cov → cov, cov:1) - // before projection so that the unique-name constraint is satisfied. - let select_exprs = deduplicate_select_expr_names(select_exprs); - // Having and group by clause may reference aliases defined in select projection let projected_plan = self.project(base_plan.clone(), select_exprs)?; let select_exprs = projected_plan.expressions(); diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index d31bfc65ae280..1a76dd69f46c5 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -33,7 +33,6 @@ use datafusion_expr::builder::get_struct_unnested_columns; use datafusion_expr::expr::{ Alias, GroupingSet, Unnest, WindowFunction, WindowFunctionParams, }; -use datafusion_expr::select_expr::SelectExpr; use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs}; use datafusion_expr::{ ColumnUnnestList, Expr, ExprSchemable, LogicalPlan, col, expr_vec_fmt, @@ -634,38 +633,6 @@ fn push_projection_dedupl(projection: &mut Vec, expr: Expr) { projection.push(expr); } } - -/// Auto-suffix duplicate SELECT expression names with `:{count}`. -/// -/// The first occurrence keeps its original name so that ORDER BY / HAVING -/// references resolve correctly. Wildcards are left untouched because they -/// are expanded later in `project_with_validation`. -/// -/// Duplicates are detected by the schema name of each expression, which -/// identifies logically identical expressions before column normalization. -pub(crate) fn deduplicate_select_expr_names(exprs: Vec) -> Vec { - let mut seen: HashMap = HashMap::new(); - exprs - .into_iter() - .map(|select_expr| match select_expr { - SelectExpr::Expression(expr) => { - let name = expr.schema_name().to_string(); - let count = seen.entry(name.clone()).or_insert(0); - let result = if *count > 0 { - let (_qualifier, field_name) = expr.qualified_name(); - SelectExpr::Expression(expr.alias(format!("{field_name}:{count}"))) - } else { - SelectExpr::Expression(expr) - }; - *count += 1; - result - } - // Leave wildcards alone — they are expanded later - other => other, - }) - .collect() -} - /// The context is we want to rewrite unnest() into InnerProjection->Unnest->OuterProjection /// Given an expression which contains unnest expr as one of its children, /// Try transform depends on unnest type diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 2581a0028af85..950a79ddb0b5e 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -789,13 +789,11 @@ fn select_column_does_not_exist() { #[test] fn select_repeated_column() { let sql = "SELECT age, age FROM person"; - let plan = logical_plan(sql).unwrap(); + let err = logical_plan(sql).expect_err("query should have failed"); + assert_snapshot!( - plan, - @r" - Projection: person.age, person.age AS age:1 - TableScan: person - " + err.strip_backtrace(), + @r#"Error during planning: Projections require unique expression names but the expression "person.age" at position 0 and "person.age" at position 1 have the same name. Consider aliasing ("AS") one of them."# ); } @@ -1533,14 +1531,11 @@ fn select_simple_aggregate_column_does_not_exist() { #[test] fn select_simple_aggregate_repeated_aggregate() { let sql = "SELECT MIN(age), MIN(age) FROM person"; - let plan = logical_plan(sql).unwrap(); + let err = logical_plan(sql).expect_err("query should have failed"); + assert_snapshot!( - plan, - @r" - Projection: min(person.age), min(person.age) AS min(person.age):1 - Aggregate: groupBy=[[]], aggr=[[min(person.age)]] - TableScan: person - " + err.strip_backtrace(), + @r#"Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 0 and "min(person.age)" at position 1 have the same name. Consider aliasing ("AS") one of them."# ); } @@ -1589,14 +1584,11 @@ fn select_from_typed_string_values() { #[test] fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() { let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person"; - let plan = logical_plan(sql).unwrap(); + let err = logical_plan(sql).expect_err("query should have failed"); + assert_snapshot!( - plan, - @r" - Projection: min(person.age) AS a, min(person.age) AS a AS a:1 - Aggregate: groupBy=[[]], aggr=[[min(person.age)]] - TableScan: person - " + err.strip_backtrace(), + @r#"Error during planning: Projections require unique expression names but the expression "min(person.age) AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them."# ); } @@ -1633,14 +1625,11 @@ fn select_simple_aggregate_with_groupby_with_aliases() { #[test] fn select_simple_aggregate_with_groupby_with_aliases_repeated() { let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state"; - let plan = logical_plan(sql).unwrap(); + let err = logical_plan(sql).expect_err("query should have failed"); + assert_snapshot!( - plan, - @r" - Projection: person.state AS a, min(person.age) AS a AS a:1 - Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]] - TableScan: person - " + err.strip_backtrace(), + @r#"Error during planning: Projections require unique expression names but the expression "person.state AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them."# ); } @@ -1761,14 +1750,11 @@ fn select_simple_aggregate_with_groupby_can_use_alias() { #[test] fn select_simple_aggregate_with_groupby_aggregate_repeated() { let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state"; - let plan = logical_plan(sql).unwrap(); + let err = logical_plan(sql).expect_err("query should have failed"); + assert_snapshot!( - plan, - @r" - Projection: person.state, min(person.age), min(person.age) AS min(person.age):1 - Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]] - TableScan: person - " + err.strip_backtrace(), + @r#"Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 1 and "min(person.age)" at position 2 have the same name. Consider aliasing ("AS") one of them."# ); } diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index cfca0de74bf27..e42ebd4ce7b16 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -7941,16 +7941,21 @@ select count(), count() * count() from t; ---- 2 4 -# Duplicate aggregate expressions are now auto-suffixed -query II +# DataFusion error: Error during planning: Projections require unique expression names but the expression "count\(Int64\(1\)\)" at position 0 and "count\(Int64\(1\)\)" at position 1 have the same name\. Consider aliasing \("AS"\) one of them\. +query error select count(1), count(1) from t; ----- -2 2 -query II +# DataFusion error: Error during planning: Projections require unique expression names but the expression "count\(Int64\(1\)\)" at position 0 and "count\(Int64\(1\)\)" at position 1 have the same name\. Consider aliasing \("AS"\) one of them\. +query error +explain select count(1), count(1) from t; + +# DataFusion error: Error during planning: Projections require unique expression names but the expression "count\(Int64\(1\) AS \)" at position 0 and "count\(Int64\(1\) AS \)" at position 1 have the same name\. Consider aliasing \("AS"\) one of them\. +query error select count(), count() from t; ----- -2 2 + +# DataFusion error: Error during planning: Projections require unique expression names but the expression "count\(Int64\(1\) AS \)" at position 0 and "count\(Int64\(1\) AS \)" at position 1 have the same name\. Consider aliasing \("AS"\) one of them\. +query error +explain select count(), count() from t; query II select count(1), count(2) from t; diff --git a/datafusion/sqllogictest/test_files/duplicate_column_alias.slt b/datafusion/sqllogictest/test_files/duplicate_column_alias.slt deleted file mode 100644 index e7e2d14976fcc..0000000000000 --- a/datafusion/sqllogictest/test_files/duplicate_column_alias.slt +++ /dev/null @@ -1,175 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Tests for duplicate column names/aliases in projections. -# DataFusion auto-suffixes duplicates with :{count} (e.g. cov, cov:1). - -# Setup -statement ok -CREATE TABLE t(x INT, y INT) AS VALUES (1, 2), (3, 4); - -# -# Basic duplicate alias -# -query II -SELECT x AS c1, y AS c1 FROM t; ----- -1 2 -3 4 - -# -# Duplicate literal expressions -# -query II -SELECT 1, 1; ----- -1 1 - -# -# Same column selected twice -# -query II -SELECT x, x FROM t; ----- -1 1 -3 3 - -# -# Subquery with duplicate column names -# -query II -SELECT * FROM (SELECT x AS c1, y AS c1 FROM t); ----- -1 2 -3 4 - -# -# ORDER BY referencing a duplicated alias resolves to first occurrence -# -query II -SELECT x AS c1, y AS c1 FROM t ORDER BY c1; ----- -1 2 -3 4 - -# -# CTE join producing duplicate column names (TPC-DS Q39 pattern) -# -statement ok -CREATE TABLE inv(warehouse_sk INT, item_sk INT, moy INT, cov DOUBLE) AS VALUES - (1, 10, 1, 1.5), - (1, 10, 2, 2.0), - (2, 20, 1, 0.8), - (2, 20, 2, 1.2); - -query IIIRIIIR -WITH inv1 AS ( - SELECT warehouse_sk, item_sk, moy, cov FROM inv WHERE moy = 1 -), -inv2 AS ( - SELECT warehouse_sk, item_sk, moy, cov FROM inv WHERE moy = 2 -) -SELECT inv1.warehouse_sk, inv1.item_sk, inv1.moy, inv1.cov, - inv2.warehouse_sk, inv2.item_sk, inv2.moy, inv2.cov -FROM inv1 JOIN inv2 - ON inv1.item_sk = inv2.item_sk AND inv1.warehouse_sk = inv2.warehouse_sk -ORDER BY inv1.warehouse_sk, inv1.item_sk; ----- -1 10 1 1.5 1 10 2 2 -2 20 1 0.8 2 20 2 1.2 - -# -# Three-way duplicate -# -query III -SELECT x AS a, y AS a, x + y AS a FROM t; ----- -1 2 3 -3 4 7 - -# -# CAST produces same schema_name as original column (TPC-DS Q39 pattern). -# CAST is transparent to schema_name, so CAST(x AS DOUBLE) and x -# both have schema_name "x" — this must be deduped. -# -query RI -SELECT CAST(x AS DOUBLE), x FROM t; ----- -1 1 -3 3 - -# -# GROUP BY with duplicate expressions in SELECT -# -query II -SELECT x, x FROM t GROUP BY x; ----- -1 1 -3 3 - -# -# Aggregate with GROUP BY producing duplicate column names -# -query III -SELECT x, SUM(y) AS total, SUM(y) AS total FROM t GROUP BY x ORDER BY x; ----- -1 2 2 -3 4 4 - -# -# ORDER BY referencing the second (renamed) column by position -# -query II -SELECT y AS c1, x AS c1 FROM t ORDER BY 2; ----- -2 1 -4 3 - -# -# Function calls that produce the same schema_name after argument -# normalization (reported in issue #6543 for iszero). -# -query BB -SELECT iszero(0.0), iszero(-0.0); ----- -true true - -# -# Duplicate expressions inside a UNION subquery -# -query II -SELECT * FROM (SELECT x AS a, y AS a FROM t UNION ALL SELECT y AS a, x AS a FROM t) ORDER BY 1, 2; ----- -1 2 -2 1 -3 4 -4 3 - -# -# Known limitation: wildcard expansion happens after dedup, so -# SELECT *, col FROM t still errors when col overlaps with *. -# This will be addressed in a follow-up PR. -# -query error DataFusion error: Error during planning: Projections require unique expression names but the expression "t\.x" at position 0 and "t\.x" at position 2 have the same name\. Consider aliasing \("AS"\) one of them\. -SELECT *, x FROM t; - -# Cleanup -statement ok -DROP TABLE t; - -statement ok -DROP TABLE inv; diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 4b3f3e4f38e23..8cfc01380d4b2 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -547,16 +547,8 @@ select unnest(column1) from (select * from (values([1,2,3]), ([4,5,6])) limit 1 5 6 -query II +query error DataFusion error: Error during planning: Projections require unique expression names but the expression "UNNEST\(unnest_table.column1\)" at position 0 and "UNNEST\(unnest_table.column1\)" at position 1 have the same name. Consider aliasing \("AS"\) one of them. select unnest(column1), unnest(column1) from unnest_table; ----- -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -12 12 query II select unnest(column1), unnest(column1) u1 from unnest_table;