diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs index 18766d7056355..63f0ba20cf7bd 100644 --- a/datafusion/sql/src/cte.rs +++ b/datafusion/sql/src/cte.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{ - Result, not_impl_err, plan_err, + Diagnostic, Result, Span, not_impl_err, plan_err, tree_node::{TreeNode, TreeNodeRecursion}, }; use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, TableSource}; @@ -37,10 +37,16 @@ impl SqlToRel<'_, S> { for cte in with.cte_tables { // A `WITH` block can't use the same name more than once let cte_name = self.ident_normalizer.normalize(cte.alias.name.clone()); + let cte_name_span = Span::try_from_sqlparser_span(cte.alias.name.span); if planner_context.contains_cte(&cte_name) { - return plan_err!( - "WITH query name {cte_name:?} specified more than once" - ); + let msg = + format!("WITH query name {cte_name:?} specified more than once"); + let mut diagnostic = Diagnostic::new_error(&msg, cte_name_span); + if let Some(first_span) = planner_context.get_cte_span(&cte_name) { + diagnostic = + diagnostic.with_note("previously defined here", Some(first_span)); + } + return plan_err!("{msg}").map_err(|e| e.with_diagnostic(diagnostic)); } // Create a logical plan for the CTE @@ -53,8 +59,7 @@ impl SqlToRel<'_, S> { // Each `WITH` block can change the column names in the last // projection (e.g. "WITH table(t1, t2) AS SELECT 1, 2"). let final_plan = self.apply_table_alias(cte_plan, cte.alias)?; - // Export the CTE to the outer query - planner_context.insert_cte(cte_name, final_plan); + planner_context.insert_cte_with_span(cte_name, final_plan, cte_name_span); } Ok(()) } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 32daf65a71fa4..c54228626f48f 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -27,7 +27,9 @@ use datafusion_common::TableReference; use datafusion_common::config::SqlParserOptions; use datafusion_common::datatype::{DataTypeExt, FieldExt}; use datafusion_common::error::add_possible_columns_to_diag; -use datafusion_common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err}; +use datafusion_common::{ + DFSchema, DataFusionError, Result, Span, not_impl_err, plan_err, +}; use datafusion_common::{ DFSchemaRef, Diagnostic, SchemaError, field_not_found, internal_err, plan_datafusion_err, @@ -258,9 +260,9 @@ pub struct PlannerContext { /// Data types for numbered parameters ($1, $2, etc), if supplied /// in `PREPARE` statement prepare_param_data_types: Arc>, - /// Map of CTE name to logical plan of the WITH clause. + /// Map of CTE name to logical plan of the WITH clause and optional span. /// Use `Arc` to allow cheap cloning - ctes: HashMap>, + ctes: HashMap, Option)>, /// The queries schemas of outer query relations, used to resolve the outer referenced /// columns in subquery (recursive aware) @@ -392,13 +394,24 @@ impl PlannerContext { /// Subquery for the specified name pub fn insert_cte(&mut self, cte_name: impl Into, plan: LogicalPlan) { let cte_name = cte_name.into(); - self.ctes.insert(cte_name, Arc::new(plan)); + self.ctes.insert(cte_name, (Arc::new(plan), None)); + } + + /// Inserts a LogicalPlan with an optional span for the CTE + pub(super) fn insert_cte_with_span( + &mut self, + cte_name: impl Into, + plan: LogicalPlan, + span: Option, + ) { + let cte_name = cte_name.into(); + self.ctes.insert(cte_name, (Arc::new(plan), span)); } /// Return a plan for the Common Table Expression (CTE) / Subquery for the /// specified name pub fn get_cte(&self, cte_name: &str) -> Option<&LogicalPlan> { - self.ctes.get(cte_name).map(|cte| cte.as_ref()) + self.ctes.get(cte_name).map(|(cte, _)| cte.as_ref()) } /// Remove the plan of CTE / Subquery for the specified name @@ -406,6 +419,11 @@ impl PlannerContext { self.ctes.remove(cte_name); } + /// Get the span of a previously defined CTE name + pub(super) fn get_cte_span(&self, name: &str) -> Option { + self.ctes.get(name).and_then(|(_, span)| *span) + } + /// Sets the left-most set expression schema, returning the previous value pub(super) fn set_set_expr_left_schema( &mut self, diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs index 8e1a8817309f0..f5830ebc90118 100644 --- a/datafusion/sql/src/relation/join.rs +++ b/datafusion/sql/src/relation/join.rs @@ -16,12 +16,14 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{Column, Result, not_impl_err, plan_datafusion_err}; +use datafusion_common::{ + Column, Diagnostic, Result, Span, not_impl_err, plan_datafusion_err, plan_err, +}; use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder}; use sqlparser::ast::{ Join, JoinConstraint, JoinOperator, ObjectName, TableFactor, TableWithJoins, }; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; impl SqlToRel<'_, S> { pub(crate) fn plan_table_with_joins( @@ -29,13 +31,39 @@ impl SqlToRel<'_, S> { t: TableWithJoins, planner_context: &mut PlannerContext, ) -> Result { - let mut left = if is_lateral(&t.relation) { - self.create_relation_subquery(t.relation, planner_context)? + let TableWithJoins { relation, joins } = t; + + let mut alias_spans: HashMap> = HashMap::new(); + + if let Some((name, span)) = self.extract_relation_name(&relation)? { + alias_spans.insert(name, span); + } + + let mut left = if is_lateral(&relation) { + self.create_relation_subquery(relation, planner_context)? } else { - self.create_relation(t.relation, planner_context)? + self.create_relation(relation, planner_context)? }; let old_outer_from_schema = planner_context.outer_from_schema(); - for join in t.joins { + for join in joins { + if let Some((ref name, current_span)) = + self.extract_relation_name(&join.relation)? + { + if let Some(prior_span) = alias_spans.get(name) { + let mut diagnostic = Diagnostic::new_error( + "duplicate table alias in FROM clause", + current_span, + ); + if let Some(span) = *prior_span { + diagnostic = + diagnostic.with_note("first defined here", Some(span)); + } + return plan_err!("duplicate table alias in FROM clause") + .map_err(|e| e.with_diagnostic(diagnostic)); + } + alias_spans.insert(name.clone(), current_span); + } + planner_context.extend_outer_from_schema(left.schema())?; left = self.parse_relation_join(left, join, planner_context)?; } diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 14ccab870317e..bb64c62a52343 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -80,6 +80,31 @@ impl<'a, 'b, S: ContextProvider> RelationPlannerContext } impl SqlToRel<'_, S> { + pub(crate) fn extract_relation_name( + &self, + relation: &TableFactor, + ) -> Result)>> { + match relation { + TableFactor::Table { alias: Some(a), .. } + | TableFactor::Derived { alias: Some(a), .. } + | TableFactor::Function { alias: Some(a), .. } + | TableFactor::UNNEST { alias: Some(a), .. } + | TableFactor::NestedJoin { alias: Some(a), .. } => { + let span = Span::try_from_sqlparser_span(a.name.span); + let name = self.ident_normalizer.normalize(a.name.clone()); + Ok(Some((name, span))) + } + TableFactor::Table { + name, alias: None, .. + } => { + let span = Span::try_from_sqlparser_span(relation.span()); + let table_ref = self.object_name_to_table_reference(name.clone())?; + Ok(Some((table_ref.to_string(), span))) + } + _ => Ok(None), + } + } + /// Create a `LogicalPlan` that scans the named relation. /// /// First tries any registered extension planners. If no extension handles diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 82cca91d4e8ae..3cbea88e8b642 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::ops::ControlFlow; use std::sync::Arc; @@ -29,7 +29,9 @@ use crate::utils::{ use datafusion_common::error::DataFusionErrorBuilder; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{Column, DFSchema, DFSchemaRef, Result, not_impl_err, plan_err}; +use datafusion_common::{ + Column, DFSchema, DFSchemaRef, Diagnostic, Result, Span, not_impl_err, plan_err, +}; use datafusion_common::{RecursionUnnestOption, UnnestOptions}; use datafusion_expr::expr::{PlannedReplaceSelectItem, WildcardOptions}; use datafusion_expr::expr_rewriter::{ @@ -711,21 +713,63 @@ impl SqlToRel<'_, S> { self.plan_table_with_joins(input, planner_context) } _ => { + let mut alias_spans: HashMap> = HashMap::new(); + let mut from = from.into_iter(); + let first = from.next().unwrap(); + + if let Some((name, span)) = self.extract_relation_name(&first.relation)? { + alias_spans.entry(name).or_insert(span); + } + for join in &first.joins { + if let Some((name, span)) = + self.extract_relation_name(&join.relation)? + { + alias_spans.entry(name).or_insert(span); + } + } + + let mut left = LogicalPlanBuilder::from( + self.plan_table_with_joins(first, planner_context)?, + ); - let mut left = LogicalPlanBuilder::from({ - let input = from.next().unwrap(); - self.plan_table_with_joins(input, planner_context)? - }); let old_outer_from_schema = { let left_schema = Some(Arc::clone(left.schema())); planner_context.set_outer_from_schema(left_schema) }; for input in from { - // Join `input` with the current result (`left`). + let mut current_names = Vec::new(); + if let Some(pair) = self.extract_relation_name(&input.relation)? { + current_names.push(pair); + } + for join in &input.joins { + if let Some(pair) = self.extract_relation_name(&join.relation)? { + current_names.push(pair); + } + } + + for (name, current_span) in ¤t_names { + if let Some(prior_span) = alias_spans.get(name.as_str()) { + let mut diagnostic = Diagnostic::new_error( + "duplicate table alias in FROM clause", + *current_span, + ); + if let Some(span) = *prior_span { + diagnostic = diagnostic + .with_note("first defined here", Some(span)); + } + return plan_err!("duplicate table alias in FROM clause") + .map_err(|e| e.with_diagnostic(diagnostic)); + } + } + + for (name, span) in current_names { + alias_spans.insert(name, span); + } + let right = self.plan_table_with_joins(input, planner_context)?; + left = left.cross_join(right)?; - // Update the outer FROM schema. let left_schema = Some(Arc::clone(left.schema())); planner_context.set_outer_from_schema(left_schema); } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 7a729739469d3..b8c5bf87b189f 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -28,6 +28,25 @@ use regex::Regex; use crate::{MockContextProvider, MockSessionState}; +fn expect_plan_ok(sql: &str) { + let statement = DFParserBuilder::new(sql) + .build() + .expect("unable to create parser") + .parse_statement() + .expect("unable to parse query"); + let options = ParserOptions { + collect_spans: true, + ..ParserOptions::default() + }; + let state = MockSessionState::default() + .with_scalar_function(Arc::new(string::concat().as_ref().clone())); + let context = MockContextProvider { state }; + let sql_to_rel = SqlToRel::new_with_options(&context, options); + sql_to_rel + .statement_to_plan(statement) + .expect("expected query to plan successfully"); +} + fn do_query(sql: &'static str) -> Diagnostic { let statement = DFParserBuilder::new(sql) .build() @@ -79,7 +98,7 @@ fn do_query(sql: &'static str) -> Diagnostic { /// dbg!(&spans["left"]); /// dbg!(&spans["right"]); /// ``` -fn get_spans(query: &'static str) -> HashMap { +fn get_spans(query: &str) -> HashMap { let mut spans = HashMap::new(); let mut bytes_per_line = vec![]; @@ -390,3 +409,163 @@ fn test_syntax_error() -> Result<()> { }, } } + +#[test] +fn test_duplicate_name_diagnostics() { + let cases: &[(&str, &str, &str, &str)] = &[ + // (label, query, expected_error, expected_note) + ( + "cte_duplicate", + "WITH /*a*/cte/*a*/ AS (SELECT 1 AS col), /*b*/cte/*b*/ AS (SELECT 2 AS col) SELECT 1", + r#"WITH query name "cte" specified more than once"#, + "previously defined here", + ), + ( + "comma_basic_alias", + "SELECT * FROM person /*a*/a/*a*/, person /*b*/a/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_alias_not_first", + "SELECT * FROM person a, test_decimal /*a*/b/*a*/, person /*b*/b/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_bare_table", + "SELECT * FROM /*a*/person/*a*/, /*b*/person/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_non_overlapping_columns", + "SELECT * FROM j1 AS /*a*/t/*a*/, j2 AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_non_overlapping_three_tables", + "SELECT * FROM j1 AS x, j2 AS /*a*/t/*a*/, j3 AS y, j1 AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_derived_subquery", + "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, (SELECT 2) AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_table_and_derived", + "SELECT * FROM person AS /*a*/t/*a*/, (SELECT 1) AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_derived_and_table", + "SELECT * FROM (SELECT 1) AS /*a*/t/*a*/, person AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_nested_join", + "SELECT * FROM (person CROSS JOIN j1) AS /*a*/t/*a*/, (person CROSS JOIN j2) AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_join_alias", + "SELECT 1 FROM j1 AS /*a*/t/*a*/ JOIN j2 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_join_derived", + "SELECT 1 FROM (SELECT 1 AS a) AS /*a*/t/*a*/ JOIN (SELECT 2 AS b) AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "comma_and_explicit_join", + "SELECT 1 FROM j1 AS /*a*/t/*a*/, j2 JOIN j3 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_join_bare_table", + "SELECT 1 FROM /*a*/person/*a*/ JOIN /*b*/person/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_cross_join", + "SELECT 1 FROM j1 AS /*a*/t/*a*/ CROSS JOIN j2 AS /*b*/t/*b*/", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_left_join", + "SELECT 1 FROM j1 AS /*a*/t/*a*/ LEFT JOIN j2 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_three_way_join", + "SELECT 1 FROM j1 AS /*a*/x/*a*/ JOIN j2 AS y ON true JOIN j3 AS /*b*/x/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_between_joins", + "SELECT 1 FROM j1 AS x JOIN j2 AS /*a*/y/*a*/ ON true JOIN j3 AS /*b*/y/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "explicit_nested_join", + "SELECT 1 FROM (j1 CROSS JOIN j2) AS /*a*/t/*a*/ JOIN j3 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "cross_entry_joins", + "SELECT 1 FROM j1 JOIN j2 AS /*a*/t/*a*/ ON true, j3 JOIN person AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ( + "case_insensitive", + "SELECT 1 FROM j1 AS /*a*/T/*a*/ JOIN j2 AS /*b*/t/*b*/ ON true", + "duplicate table alias in FROM clause", + "first defined here", + ), + ]; + + for &(label, query, expected_error, expected_note) in cases { + let spans = get_spans(query); + let diag = do_query(query); + + assert_eq!(diag.message, expected_error, "message mismatch in {label}"); + assert_eq!( + diag.span, + Some(spans["b"]), + "error span mismatch in {label}" + ); + assert_eq!(diag.notes.len(), 1, "expected 1 note in {label}"); + assert_eq!( + diag.notes[0].message, expected_note, + "note mismatch in {label}" + ); + assert_eq!( + diag.notes[0].span, + Some(spans["a"]), + "note span mismatch in {label}" + ); + } +} + +#[test] +fn test_schema_qualified_tables_not_duplicate() { + expect_plan_ok("SELECT * FROM schema1.person, schema2.person"); +} diff --git a/datafusion/sqllogictest/test_files/join.slt.part b/datafusion/sqllogictest/test_files/join.slt.part index b9d163d877596..551e40f83a6a1 100644 --- a/datafusion/sqllogictest/test_files/join.slt.part +++ b/datafusion/sqllogictest/test_files/join.slt.part @@ -1230,14 +1230,14 @@ drop table t2; statement ok create table t1(v1 int) as values(100); -## Query with Ambiguous column reference -query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1 +## Query with duplicate table name in self-join (no aliases) +query error DataFusion error: Error during planning: duplicate table alias in FROM clause select count(*) from t1 right outer join t1 on t1.v1 > 0; -query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1 +query error DataFusion error: Error during planning: duplicate table alias in FROM clause select t1.v1 from t1 join t1 using(v1) cross join (select struct('foo' as v1) as t1); statement ok