Skip to content

Commit e6716d7

Browse files
committed
attach span to cte and diagnostic
1 parent 73fbd48 commit e6716d7

4 files changed

Lines changed: 165 additions & 20 deletions

File tree

datafusion/sql/src/cte.rs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use std::sync::Arc;
2020
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
2121

2222
use datafusion_common::{
23-
Result, not_impl_err, plan_err,
23+
Diagnostic, Result, Span, not_impl_err, plan_err,
2424
tree_node::{TreeNode, TreeNodeRecursion},
2525
};
2626
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, TableSource};
@@ -37,10 +37,24 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
3737
for cte in with.cte_tables {
3838
// A `WITH` block can't use the same name more than once
3939
let cte_name = self.ident_normalizer.normalize(cte.alias.name.clone());
40+
let cte_name_span =
41+
Span::try_from_sqlparser_span(cte.alias.name.span);
4042
if planner_context.contains_cte(&cte_name) {
43+
let first_span = planner_context.get_cte_span(&cte_name);
44+
let mut diagnostic = Diagnostic::new_error(
45+
format!(
46+
"WITH query name {cte_name:?} specified more than once"
47+
),
48+
cte_name_span,
49+
);
50+
if let Some(first_span) = first_span {
51+
diagnostic =
52+
diagnostic.with_note("previously defined here", Some(first_span));
53+
}
4154
return plan_err!(
4255
"WITH query name {cte_name:?} specified more than once"
43-
);
56+
)
57+
.map_err(|e| e.with_diagnostic(diagnostic));
4458
}
4559

4660
// Create a logical plan for the CTE
@@ -53,8 +67,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
5367
// Each `WITH` block can change the column names in the last
5468
// projection (e.g. "WITH table(t1, t2) AS SELECT 1, 2").
5569
let final_plan = self.apply_table_alias(cte_plan, cte.alias)?;
56-
// Export the CTE to the outer query
57-
planner_context.insert_cte(cte_name, final_plan);
70+
planner_context.insert_cte_with_span(cte_name, final_plan, cte_name_span);
5871
}
5972
Ok(())
6073
}

datafusion/sql/src/planner.rs

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use datafusion_common::TableReference;
2727
use datafusion_common::config::SqlParserOptions;
2828
use datafusion_common::datatype::{DataTypeExt, FieldExt};
2929
use datafusion_common::error::add_possible_columns_to_diag;
30-
use datafusion_common::{DFSchema, DataFusionError, Result, not_impl_err, plan_err};
30+
use datafusion_common::{DFSchema, DataFusionError, Result, Span, not_impl_err, plan_err};
3131
use datafusion_common::{
3232
DFSchemaRef, Diagnostic, SchemaError, field_not_found, internal_err,
3333
plan_datafusion_err,
@@ -258,9 +258,9 @@ pub struct PlannerContext {
258258
/// Data types for numbered parameters ($1, $2, etc), if supplied
259259
/// in `PREPARE` statement
260260
prepare_param_data_types: Arc<Vec<FieldRef>>,
261-
/// Map of CTE name to logical plan of the WITH clause.
261+
/// Map of CTE name to logical plan of the WITH clause and optional span.
262262
/// Use `Arc<LogicalPlan>` to allow cheap cloning
263-
ctes: HashMap<String, Arc<LogicalPlan>>,
263+
ctes: HashMap<String, (Arc<LogicalPlan>, Option<Span>)>,
264264

265265
/// The queries schemas of outer query relations, used to resolve the outer referenced
266266
/// columns in subquery (recursive aware)
@@ -387,19 +387,35 @@ impl PlannerContext {
387387
/// Subquery for the specified name
388388
pub fn insert_cte(&mut self, cte_name: impl Into<String>, plan: LogicalPlan) {
389389
let cte_name = cte_name.into();
390-
self.ctes.insert(cte_name, Arc::new(plan));
390+
self.ctes.insert(cte_name, (Arc::new(plan), None));
391+
}
392+
393+
/// Inserts a LogicalPlan with an optional span for the CTE
394+
pub(super) fn insert_cte_with_span(
395+
&mut self,
396+
cte_name: impl Into<String>,
397+
plan: LogicalPlan,
398+
span: Option<Span>,
399+
) {
400+
let cte_name = cte_name.into();
401+
self.ctes.insert(cte_name, (Arc::new(plan), span));
391402
}
392403

393404
/// Return a plan for the Common Table Expression (CTE) / Subquery for the
394405
/// specified name
395406
pub fn get_cte(&self, cte_name: &str) -> Option<&LogicalPlan> {
396-
self.ctes.get(cte_name).map(|cte| cte.as_ref())
407+
self.ctes.get(cte_name).map(|(cte, _)| cte.as_ref())
397408
}
398409

399410
/// Remove the plan of CTE / Subquery for the specified name
400411
pub(super) fn remove_cte(&mut self, cte_name: &str) {
401412
self.ctes.remove(cte_name);
402413
}
414+
415+
/// Get the span of a previously defined CTE name
416+
pub(super) fn get_cte_span(&self, name: &str) -> Option<Span> {
417+
self.ctes.get(name).and_then(|(_, span)| *span)
418+
}
403419
}
404420

405421
/// SQL query planner and binder

datafusion/sql/src/select.rs

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use std::collections::HashSet;
18+
use std::collections::{HashMap, HashSet};
1919
use std::ops::ControlFlow;
2020
use std::sync::Arc;
2121

@@ -29,7 +29,7 @@ use crate::utils::{
2929

3030
use datafusion_common::error::DataFusionErrorBuilder;
3131
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
32-
use datafusion_common::{Column, DFSchema, Result, not_impl_err, plan_err};
32+
use datafusion_common::{Column, DFSchema, Diagnostic, Result, Span, not_impl_err, plan_err};
3333
use datafusion_common::{RecursionUnnestOption, UnnestOptions};
3434
use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions};
3535
use datafusion_expr::expr_rewriter::{
@@ -50,7 +50,9 @@ use sqlparser::ast::{
5050
SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType,
5151
visit_expressions_mut,
5252
};
53-
use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins};
53+
use sqlparser::ast::{
54+
NamedWindowDefinition, Select, SelectItem, Spanned, TableFactor, TableWithJoins,
55+
};
5456

5557
/// Result of the `aggregate` function, containing the aggregate plan and
5658
/// rewritten expressions that reference the aggregate output columns.
@@ -690,21 +692,81 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
690692
self.plan_table_with_joins(input, planner_context)
691693
}
692694
_ => {
695+
let extract_table_name =
696+
|t: &TableWithJoins| -> Option<(String, Option<Span>)> {
697+
let span =
698+
Span::try_from_sqlparser_span(t.relation.span());
699+
match &t.relation {
700+
TableFactor::Table { alias: Some(a), .. } => {
701+
let name = self
702+
.ident_normalizer
703+
.normalize(a.name.clone());
704+
Some((name, span))
705+
}
706+
TableFactor::Table { name, alias: None, .. } => {
707+
let table_name = name
708+
.0
709+
.iter()
710+
.filter_map(|p| p.as_ident())
711+
.map(|id| {
712+
self.ident_normalizer
713+
.normalize(id.clone())
714+
})
715+
.last()?;
716+
Some((table_name, span))
717+
}
718+
_ => None,
719+
}
720+
};
721+
722+
let mut alias_spans: HashMap<String, Option<Span>> =
723+
HashMap::new();
724+
693725
let mut from = from.into_iter();
726+
let first = from.next().unwrap();
727+
728+
if let Some((name, span)) = extract_table_name(&first) {
729+
alias_spans.entry(name).or_insert(span);
730+
}
731+
732+
let mut left = LogicalPlanBuilder::from(
733+
self.plan_table_with_joins(first, planner_context)?,
734+
);
694735

695-
let mut left = LogicalPlanBuilder::from({
696-
let input = from.next().unwrap();
697-
self.plan_table_with_joins(input, planner_context)?
698-
});
699736
let old_outer_from_schema = {
700737
let left_schema = Some(Arc::clone(left.schema()));
701738
planner_context.set_outer_from_schema(left_schema)
702739
};
703740
for input in from {
704-
// Join `input` with the current result (`left`).
705-
let right = self.plan_table_with_joins(input, planner_context)?;
706-
left = left.cross_join(right)?;
707-
// Update the outer FROM schema.
741+
let current_span =
742+
Span::try_from_sqlparser_span(input.relation.span());
743+
let current_name = extract_table_name(&input);
744+
745+
if let Some((ref name, _)) = current_name {
746+
alias_spans.entry(name.clone()).or_insert(current_span);
747+
}
748+
749+
let right =
750+
self.plan_table_with_joins(input, planner_context)?;
751+
752+
left = left.cross_join(right).map_err(|e| {
753+
if let Some((ref name, _)) = current_name {
754+
if let Some(prior_span) =
755+
alias_spans.get(name).copied().flatten()
756+
{
757+
let mut diagnostic = Diagnostic::new_error(
758+
"duplicate table alias in FROM clause",
759+
current_span,
760+
);
761+
diagnostic = diagnostic.with_note(
762+
"first defined here",
763+
Some(prior_span),
764+
);
765+
return e.with_diagnostic(diagnostic);
766+
}
767+
}
768+
e
769+
})?;
708770
let left_schema = Some(Arc::clone(left.schema()));
709771
planner_context.set_outer_from_schema(left_schema);
710772
}

datafusion/sql/tests/cases/diagnostic.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,3 +390,57 @@ fn test_syntax_error() -> Result<()> {
390390
},
391391
}
392392
}
393+
394+
#[test]
395+
fn test_duplicate_cte_name() -> Result<()> {
396+
let query =
397+
"WITH /*a*/cte/*a*/ AS (SELECT 1 AS col), /*b*/cte/*b*/ AS (SELECT 2 AS col) SELECT 1";
398+
let spans = get_spans(query);
399+
let diag = do_query(query);
400+
assert_snapshot!(diag.message, @r#"WITH query name "cte" specified more than once"#);
401+
assert_eq!(diag.span, Some(spans["b"]));
402+
assert_eq!(diag.notes.len(), 1);
403+
assert_snapshot!(diag.notes[0].message, @"previously defined here");
404+
assert_eq!(diag.notes[0].span, Some(spans["a"]));
405+
Ok(())
406+
}
407+
408+
#[test]
409+
fn test_duplicate_table_alias() -> Result<()> {
410+
let query = "SELECT * FROM /*a*/person a/*a*/, /*b*/person a/*b*/";
411+
let spans = get_spans(query);
412+
let diag = do_query(query);
413+
assert_snapshot!(diag.message, @"duplicate table alias in FROM clause");
414+
assert_eq!(diag.span, Some(spans["b"]));
415+
assert_eq!(diag.notes.len(), 1);
416+
assert_snapshot!(diag.notes[0].message, @"first defined here");
417+
assert_eq!(diag.notes[0].span, Some(spans["a"]));
418+
Ok(())
419+
}
420+
421+
#[test]
422+
fn test_duplicate_table_alias_not_first() -> Result<()> {
423+
let query =
424+
"SELECT * FROM person a, /*b*/test_decimal b/*b*/, /*c*/person b/*c*/";
425+
let spans = get_spans(query);
426+
let diag = do_query(query);
427+
assert_snapshot!(diag.message, @"duplicate table alias in FROM clause");
428+
assert_eq!(diag.span, Some(spans["c"]));
429+
assert_eq!(diag.notes.len(), 1);
430+
assert_snapshot!(diag.notes[0].message, @"first defined here");
431+
assert_eq!(diag.notes[0].span, Some(spans["b"]));
432+
Ok(())
433+
}
434+
435+
#[test]
436+
fn test_duplicate_bare_table_in_from() -> Result<()> {
437+
let query = "SELECT * FROM /*a*/person/*a*/, /*b*/person/*b*/";
438+
let spans = get_spans(query);
439+
let diag = do_query(query);
440+
assert_snapshot!(diag.message, @"duplicate table alias in FROM clause");
441+
assert_eq!(diag.span, Some(spans["b"]));
442+
assert_eq!(diag.notes.len(), 1);
443+
assert_snapshot!(diag.notes[0].message, @"first defined here");
444+
assert_eq!(diag.notes[0].span, Some(spans["a"]));
445+
Ok(())
446+
}

0 commit comments

Comments
 (0)