Skip to content

Commit 3bc8dc3

Browse files
yoavcloudayman-sigma
authored andcommitted
Add support for Snowflake identifier function (apache#1929)
1 parent 7f1304a commit 3bc8dc3

File tree

7 files changed

+268
-51
lines changed

7 files changed

+268
-51
lines changed

src/ast/mod.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,12 +344,14 @@ impl fmt::Display for ObjectName {
344344
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
345345
pub enum ObjectNamePart {
346346
Identifier(Ident),
347+
Function(ObjectNamePartFunction),
347348
}
348349

349350
impl ObjectNamePart {
350351
pub fn as_ident(&self) -> Option<&Ident> {
351352
match self {
352353
ObjectNamePart::Identifier(ident) => Some(ident),
354+
ObjectNamePart::Function(_) => None,
353355
}
354356
}
355357
}
@@ -358,10 +360,30 @@ impl fmt::Display for ObjectNamePart {
358360
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
359361
match self {
360362
ObjectNamePart::Identifier(ident) => write!(f, "{ident}"),
363+
ObjectNamePart::Function(func) => write!(f, "{func}"),
361364
}
362365
}
363366
}
364367

368+
/// An object name part that consists of a function that dynamically
369+
/// constructs identifiers.
370+
///
371+
/// - [Snowflake](https://docs.snowflake.com/en/sql-reference/identifier-literal)
372+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
373+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
374+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
375+
pub struct ObjectNamePartFunction {
376+
pub name: Ident,
377+
pub args: Vec<FunctionArg>,
378+
}
379+
380+
impl fmt::Display for ObjectNamePartFunction {
381+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
382+
write!(f, "{}(", self.name)?;
383+
write!(f, "{})", display_comma_separated(&self.args))
384+
}
385+
}
386+
365387
/// Represents an Array Expression, either
366388
/// `ARRAY[..]`, or `[..]`
367389
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

src/ast/spans.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,6 +1676,10 @@ impl Spanned for ObjectNamePart {
16761676
fn span(&self) -> Span {
16771677
match self {
16781678
ObjectNamePart::Identifier(ident) => ident.span,
1679+
ObjectNamePart::Function(func) => func
1680+
.name
1681+
.span
1682+
.union(&union_spans(func.args.iter().map(|i| i.span()))),
16791683
}
16801684
}
16811685
}

src/dialect/mod.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pub use self::postgresql::PostgreSqlDialect;
4949
pub use self::redshift::RedshiftSqlDialect;
5050
pub use self::snowflake::SnowflakeDialect;
5151
pub use self::sqlite::SQLiteDialect;
52-
use crate::ast::{ColumnOption, Expr, GranteesType, Statement};
52+
use crate::ast::{ColumnOption, Expr, GranteesType, Ident, ObjectNamePart, Statement};
5353
pub use crate::keywords;
5454
use crate::keywords::Keyword;
5555
use crate::parser::{Parser, ParserError};
@@ -1081,6 +1081,19 @@ pub trait Dialect: Debug + Any {
10811081
fn supports_comma_separated_drop_column_list(&self) -> bool {
10821082
false
10831083
}
1084+
1085+
/// Returns true if the dialect considers the specified ident as a function
1086+
/// that returns an identifier. Typically used to generate identifiers
1087+
/// programmatically.
1088+
///
1089+
/// - [Snowflake](https://docs.snowflake.com/en/sql-reference/identifier-literal)
1090+
fn is_identifier_generating_function_name(
1091+
&self,
1092+
_ident: &Ident,
1093+
_name_parts: &[ObjectNamePart],
1094+
) -> bool {
1095+
false
1096+
}
10841097
}
10851098

10861099
/// This represents the operators for which precedence must be defined

src/dialect/snowflake.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ use crate::ast::helpers::stmt_data_loading::{
2525
use crate::ast::{
2626
ColumnOption, ColumnPolicy, ColumnPolicyProperty, CopyIntoSnowflakeKind, Ident,
2727
IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind,
28-
IdentityPropertyOrder, ObjectName, RowAccessPolicy, ShowObjects, SqlOption, Statement,
29-
TagsColumnOption, WrappedCollection,
28+
IdentityPropertyOrder, ObjectName, ObjectNamePart, RowAccessPolicy, ShowObjects, SqlOption,
29+
Statement, TagsColumnOption, WrappedCollection,
3030
};
3131
use crate::dialect::{Dialect, Precedence};
3232
use crate::keywords::Keyword;
@@ -367,6 +367,23 @@ impl Dialect for SnowflakeDialect {
367367
fn supports_comma_separated_drop_column_list(&self) -> bool {
368368
true
369369
}
370+
371+
fn is_identifier_generating_function_name(
372+
&self,
373+
ident: &Ident,
374+
name_parts: &[ObjectNamePart],
375+
) -> bool {
376+
ident.quote_style.is_none()
377+
&& ident.value.to_lowercase() == "identifier"
378+
&& !name_parts
379+
.iter()
380+
.any(|p| matches!(p, ObjectNamePart::Function(_)))
381+
}
382+
383+
// For example: `SELECT IDENTIFIER('alias1').* FROM tbl AS alias1`
384+
fn supports_select_expr_star(&self) -> bool {
385+
true
386+
}
370387
}
371388

372389
fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {

src/parser/mod.rs

Lines changed: 69 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10384,70 +10384,92 @@ impl<'a> Parser<'a> {
1038410384
}
1038510385
}
1038610386

10387-
/// Parse a possibly qualified, possibly quoted identifier, optionally allowing for wildcards,
10387+
/// Parse a possibly qualified, possibly quoted identifier, e.g.
10388+
/// `foo` or `myschema."table"
10389+
///
10390+
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
10391+
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
10392+
/// in this context on BigQuery.
10393+
pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result<ObjectName, ParserError> {
10394+
self.parse_object_name_inner(in_table_clause, false)
10395+
}
10396+
10397+
/// Parse a possibly qualified, possibly quoted identifier, e.g.
10398+
/// `foo` or `myschema."table"
10399+
///
10400+
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
10401+
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
10402+
/// in this context on BigQuery.
10403+
///
10404+
/// The `allow_wildcards` parameter indicates whether to allow for wildcards in the object name
1038810405
/// e.g. *, *.*, `foo`.*, or "foo"."bar"
10389-
fn parse_object_name_with_wildcards(
10406+
fn parse_object_name_inner(
1039010407
&mut self,
1039110408
in_table_clause: bool,
1039210409
allow_wildcards: bool,
1039310410
) -> Result<ObjectName, ParserError> {
10394-
let mut idents = vec![];
10395-
10411+
let mut parts = vec![];
1039610412
if dialect_of!(self is BigQueryDialect) && in_table_clause {
1039710413
loop {
1039810414
let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
10399-
idents.push(ident);
10415+
parts.push(ObjectNamePart::Identifier(ident));
1040010416
if !self.consume_token(&Token::Period) && !end_with_period {
1040110417
break;
1040210418
}
1040310419
}
1040410420
} else {
1040510421
loop {
10406-
let ident = if allow_wildcards && self.peek_token().token == Token::Mul {
10422+
if allow_wildcards && self.peek_token().token == Token::Mul {
1040710423
let span = self.next_token().span;
10408-
Ident {
10424+
parts.push(ObjectNamePart::Identifier(Ident {
1040910425
value: Token::Mul.to_string(),
1041010426
quote_style: None,
1041110427
span,
10428+
}));
10429+
} else if dialect_of!(self is BigQueryDialect) && in_table_clause {
10430+
let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
10431+
parts.push(ObjectNamePart::Identifier(ident));
10432+
if !self.consume_token(&Token::Period) && !end_with_period {
10433+
break;
1041210434
}
10435+
} else if self.dialect.supports_object_name_double_dot_notation()
10436+
&& parts.len() == 1
10437+
&& matches!(self.peek_token().token, Token::Period)
10438+
{
10439+
// Empty string here means default schema
10440+
parts.push(ObjectNamePart::Identifier(Ident::new("")));
1041310441
} else {
10414-
if self.dialect.supports_object_name_double_dot_notation()
10415-
&& idents.len() == 1
10416-
&& self.consume_token(&Token::Period)
10442+
let ident = self.parse_identifier()?;
10443+
let part = if self
10444+
.dialect
10445+
.is_identifier_generating_function_name(&ident, &parts)
1041710446
{
10418-
// Empty string here means default schema
10419-
idents.push(Ident::new(""));
10420-
}
10421-
self.parse_identifier()?
10422-
};
10423-
idents.push(ident);
10447+
self.expect_token(&Token::LParen)?;
10448+
let args: Vec<FunctionArg> =
10449+
self.parse_comma_separated0(Self::parse_function_args, Token::RParen)?;
10450+
self.expect_token(&Token::RParen)?;
10451+
ObjectNamePart::Function(ObjectNamePartFunction { name: ident, args })
10452+
} else {
10453+
ObjectNamePart::Identifier(ident)
10454+
};
10455+
parts.push(part);
10456+
}
10457+
1042410458
if !self.consume_token(&Token::Period) {
1042510459
break;
1042610460
}
1042710461
}
1042810462
}
10429-
Ok(ObjectName::from(idents))
10430-
}
10431-
10432-
/// Parse a possibly qualified, possibly quoted identifier, e.g.
10433-
/// `foo` or `myschema."table"
10434-
///
10435-
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
10436-
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
10437-
/// in this context on BigQuery.
10438-
pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result<ObjectName, ParserError> {
10439-
let ObjectName(mut idents) =
10440-
self.parse_object_name_with_wildcards(in_table_clause, false)?;
1044110463

1044210464
// BigQuery accepts any number of quoted identifiers of a table name.
1044310465
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers
1044410466
if dialect_of!(self is BigQueryDialect)
10445-
&& idents.iter().any(|part| {
10467+
&& parts.iter().any(|part| {
1044610468
part.as_ident()
1044710469
.is_some_and(|ident| ident.value.contains('.'))
1044810470
})
1044910471
{
10450-
idents = idents
10472+
parts = parts
1045110473
.into_iter()
1045210474
.flat_map(|part| match part.as_ident() {
1045310475
Some(ident) => ident
@@ -10466,7 +10488,7 @@ impl<'a> Parser<'a> {
1046610488
.collect()
1046710489
}
1046810490

10469-
Ok(ObjectName(idents))
10491+
Ok(ObjectName(parts))
1047010492
}
1047110493

1047210494
/// Parse identifiers
@@ -14038,25 +14060,25 @@ impl<'a> Parser<'a> {
1403814060
schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?,
1403914061
})
1404014062
} else if self.parse_keywords(&[Keyword::RESOURCE, Keyword::MONITOR]) {
14041-
Some(GrantObjects::ResourceMonitors(self.parse_comma_separated(
14042-
|p| p.parse_object_name_with_wildcards(false, true),
14043-
)?))
14063+
Some(GrantObjects::ResourceMonitors(
14064+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
14065+
))
1404414066
} else if self.parse_keywords(&[Keyword::COMPUTE, Keyword::POOL]) {
14045-
Some(GrantObjects::ComputePools(self.parse_comma_separated(
14046-
|p| p.parse_object_name_with_wildcards(false, true),
14047-
)?))
14067+
Some(GrantObjects::ComputePools(
14068+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
14069+
))
1404814070
} else if self.parse_keywords(&[Keyword::FAILOVER, Keyword::GROUP]) {
14049-
Some(GrantObjects::FailoverGroup(self.parse_comma_separated(
14050-
|p| p.parse_object_name_with_wildcards(false, true),
14051-
)?))
14071+
Some(GrantObjects::FailoverGroup(
14072+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
14073+
))
1405214074
} else if self.parse_keywords(&[Keyword::REPLICATION, Keyword::GROUP]) {
14053-
Some(GrantObjects::ReplicationGroup(self.parse_comma_separated(
14054-
|p| p.parse_object_name_with_wildcards(false, true),
14055-
)?))
14075+
Some(GrantObjects::ReplicationGroup(
14076+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
14077+
))
1405614078
} else if self.parse_keywords(&[Keyword::EXTERNAL, Keyword::VOLUME]) {
14057-
Some(GrantObjects::ExternalVolumes(self.parse_comma_separated(
14058-
|p| p.parse_object_name_with_wildcards(false, true),
14059-
)?))
14079+
Some(GrantObjects::ExternalVolumes(
14080+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
14081+
))
1406014082
} else {
1406114083
let object_type = self.parse_one_of_keywords(&[
1406214084
Keyword::SEQUENCE,
@@ -14073,7 +14095,7 @@ impl<'a> Parser<'a> {
1407314095
Keyword::CONNECTION,
1407414096
]);
1407514097
let objects =
14076-
self.parse_comma_separated(|p| p.parse_object_name_with_wildcards(false, true));
14098+
self.parse_comma_separated(|p| p.parse_object_name_inner(false, true));
1407714099
match object_type {
1407814100
Some(Keyword::DATABASE) => Some(GrantObjects::Databases(objects?)),
1407914101
Some(Keyword::SCHEMA) => Some(GrantObjects::Schemas(objects?)),

tests/sqlparser_common.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1232,14 +1232,18 @@ fn parse_select_expr_star() {
12321232
"SELECT 2. * 3 FROM T",
12331233
);
12341234
dialects.verified_only_select("SELECT myfunc().* FROM T");
1235-
dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T");
12361235

12371236
// Invalid
12381237
let res = dialects.parse_sql_statements("SELECT foo.*.* FROM T");
12391238
assert_eq!(
12401239
ParserError::ParserError("Expected: end of statement, found: .".to_string()),
12411240
res.unwrap_err()
12421241
);
1242+
1243+
let dialects = all_dialects_where(|d| {
1244+
d.supports_select_expr_star() && d.supports_select_wildcard_except()
1245+
});
1246+
dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T");
12431247
}
12441248

12451249
#[test]

0 commit comments

Comments
 (0)