Skip to content

Commit 45334af

Browse files
yoavcloudayman-sigma
authored andcommitted
Snowflake Reserved SQL Keywords as Implicit Table Alias (apache#1934)
1 parent 6838c54 commit 45334af

File tree

4 files changed

+163
-12
lines changed

4 files changed

+163
-12
lines changed

src/dialect/mod.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -997,11 +997,17 @@ pub trait Dialect: Debug + Any {
997997
explicit || self.is_column_alias(kw, parser)
998998
}
999999

1000+
/// Returns true if the specified keyword should be parsed as a table identifier.
1001+
/// See [keywords::RESERVED_FOR_TABLE_ALIAS]
1002+
fn is_table_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
1003+
!keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw)
1004+
}
1005+
10001006
/// Returns true if the specified keyword should be parsed as a table factor alias.
10011007
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
10021008
/// to enable looking ahead if needed.
1003-
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
1004-
explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw)
1009+
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
1010+
explicit || self.is_table_alias(kw, parser)
10051011
}
10061012

10071013
/// Returns true if this dialect supports querying historical table data

src/dialect/snowflake.rs

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,9 +318,11 @@ impl Dialect for SnowflakeDialect {
318318
}
319319

320320
// `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT`
321-
// which would give it a different meanings, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias
322-
Keyword::FETCH
323-
if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) =>
321+
// which would give it a different meanings, for example:
322+
// `SELECT 1 FETCH FIRST 10 ROWS` - not an alias
323+
// `SELECT 1 FETCH 10` - not an alias
324+
Keyword::FETCH if parser.peek_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT]).is_some()
325+
|| matches!(parser.peek_token().token, Token::Number(_, _)) =>
324326
{
325327
false
326328
}
@@ -345,6 +347,86 @@ impl Dialect for SnowflakeDialect {
345347
}
346348
}
347349

350+
fn is_table_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool {
351+
match kw {
352+
// The following keywords can be considered an alias as long as
353+
// they are not followed by other tokens that may change their meaning
354+
Keyword::LIMIT
355+
| Keyword::RETURNING
356+
| Keyword::INNER
357+
| Keyword::USING
358+
| Keyword::PIVOT
359+
| Keyword::UNPIVOT
360+
| Keyword::EXCEPT
361+
| Keyword::MATCH_RECOGNIZE
362+
| Keyword::OFFSET
363+
if !matches!(parser.peek_token_ref().token, Token::SemiColon | Token::EOF) =>
364+
{
365+
false
366+
}
367+
368+
// `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT`
369+
// which would give it a different meanings, for example:
370+
// `SELECT * FROM tbl FETCH FIRST 10 ROWS` - not an alias
371+
// `SELECT * FROM tbl FETCH 10` - not an alias
372+
Keyword::FETCH
373+
if parser
374+
.peek_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT])
375+
.is_some()
376+
|| matches!(parser.peek_token().token, Token::Number(_, _)) =>
377+
{
378+
false
379+
}
380+
381+
// All sorts of join-related keywords can be considered aliases unless additional
382+
// keywords change their meaning.
383+
Keyword::RIGHT | Keyword::LEFT | Keyword::SEMI | Keyword::ANTI
384+
if parser
385+
.peek_one_of_keywords(&[Keyword::JOIN, Keyword::OUTER])
386+
.is_some() =>
387+
{
388+
false
389+
}
390+
Keyword::GLOBAL if parser.peek_keyword(Keyword::FULL) => false,
391+
392+
// Reserved keywords by the Snowflake dialect, which seem to be less strictive
393+
// than what is listed in `keywords::RESERVED_FOR_TABLE_ALIAS`. The following
394+
// keywords were tested with the this statement: `SELECT <KW>.* FROM tbl <KW>`.
395+
Keyword::WITH
396+
| Keyword::ORDER
397+
| Keyword::SELECT
398+
| Keyword::WHERE
399+
| Keyword::GROUP
400+
| Keyword::HAVING
401+
| Keyword::LATERAL
402+
| Keyword::UNION
403+
| Keyword::INTERSECT
404+
| Keyword::MINUS
405+
| Keyword::ON
406+
| Keyword::JOIN
407+
| Keyword::INNER
408+
| Keyword::CROSS
409+
| Keyword::FULL
410+
| Keyword::LEFT
411+
| Keyword::RIGHT
412+
| Keyword::NATURAL
413+
| Keyword::USING
414+
| Keyword::ASOF
415+
| Keyword::MATCH_CONDITION
416+
| Keyword::SET
417+
| Keyword::QUALIFY
418+
| Keyword::FOR
419+
| Keyword::START
420+
| Keyword::CONNECT
421+
| Keyword::SAMPLE
422+
| Keyword::TABLESAMPLE
423+
| Keyword::FROM => false,
424+
425+
// Any other word is considered an alias
426+
_ => true,
427+
}
428+
}
429+
348430
/// See: <https://docs.snowflake.com/en/sql-reference/constructs/at-before>
349431
fn supports_timestamp_versioning(&self) -> bool {
350432
true

tests/sqlparser_common.rs

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5549,7 +5549,8 @@ fn parse_named_window_functions() {
55495549
WINDOW w AS (PARTITION BY x), win AS (ORDER BY y)";
55505550
supported_dialects.verified_stmt(sql);
55515551

5552-
let select = verified_only_select(sql);
5552+
let select = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d)))
5553+
.verified_only_select(sql);
55535554

55545555
const EXPECTED_PROJ_QTY: usize = 2;
55555556
assert_eq!(EXPECTED_PROJ_QTY, select.projection.len());
@@ -5579,6 +5580,7 @@ fn parse_named_window_functions() {
55795580

55805581
#[test]
55815582
fn parse_window_clause() {
5583+
let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d)));
55825584
let sql = "SELECT * \
55835585
FROM mytable \
55845586
WINDOW \
@@ -5591,10 +5593,14 @@ fn parse_window_clause() {
55915593
window7 AS (window1 ROWS UNBOUNDED PRECEDING), \
55925594
window8 AS (window1 PARTITION BY a ORDER BY b ROWS UNBOUNDED PRECEDING) \
55935595
ORDER BY C3";
5594-
verified_only_select(sql);
5596+
dialects.verified_only_select(sql);
55955597

55965598
let sql = "SELECT * from mytable WINDOW window1 AS window2";
5597-
let dialects = all_dialects_except(|d| d.is::<BigQueryDialect>() || d.is::<GenericDialect>());
5599+
let dialects = all_dialects_except(|d| {
5600+
d.is::<BigQueryDialect>()
5601+
|| d.is::<GenericDialect>()
5602+
|| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d))
5603+
});
55985604
let res = dialects.parse_sql_statements(sql);
55995605
assert_eq!(
56005606
ParserError::ParserError("Expected: (, found: window2".to_string()),
@@ -5604,14 +5610,15 @@ fn parse_window_clause() {
56045610

56055611
#[test]
56065612
fn test_parse_named_window() {
5613+
let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d)));
56075614
let sql = "SELECT \
56085615
MIN(c12) OVER window1 AS min1, \
56095616
MAX(c12) OVER window2 AS max1 \
56105617
FROM aggregate_test_100 \
56115618
WINDOW window1 AS (ORDER BY C12), \
56125619
window2 AS (PARTITION BY C11) \
56135620
ORDER BY C3";
5614-
let actual_select_only = verified_only_select(sql);
5621+
let actual_select_only = dialects.verified_only_select(sql);
56155622
let expected = Select {
56165623
select_token: AttachedToken::empty(),
56175624
distinct: None,
@@ -5760,14 +5767,18 @@ fn test_parse_named_window() {
57605767

57615768
#[test]
57625769
fn parse_window_and_qualify_clause() {
5770+
let dialects = all_dialects_except(|d| {
5771+
d.is_table_alias(&Keyword::WINDOW, &mut Parser::new(d))
5772+
|| d.is_table_alias(&Keyword::QUALIFY, &mut Parser::new(d))
5773+
});
57635774
let sql = "SELECT \
57645775
MIN(c12) OVER window1 AS min1 \
57655776
FROM aggregate_test_100 \
57665777
QUALIFY ROW_NUMBER() OVER my_window \
57675778
WINDOW window1 AS (ORDER BY C12), \
57685779
window2 AS (PARTITION BY C11) \
57695780
ORDER BY C3";
5770-
verified_only_select(sql);
5781+
dialects.verified_only_select(sql);
57715782

57725783
let sql = "SELECT \
57735784
MIN(c12) OVER window1 AS min1 \
@@ -5776,7 +5787,7 @@ fn parse_window_and_qualify_clause() {
57765787
window2 AS (PARTITION BY C11) \
57775788
QUALIFY ROW_NUMBER() OVER my_window \
57785789
ORDER BY C3";
5779-
verified_only_select(sql);
5790+
dialects.verified_only_select(sql);
57805791
}
57815792

57825793
#[test]
@@ -7444,7 +7455,8 @@ fn parse_join_syntax_variants() {
74447455
"SELECT c1 FROM t1 FULL JOIN t2 USING(c1)",
74457456
);
74467457

7447-
let res = parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1");
7458+
let dialects = all_dialects_except(|d| d.is_table_alias(&Keyword::OUTER, &mut Parser::new(d)));
7459+
let res = dialects.parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1");
74487460
assert_eq!(
74497461
ParserError::ParserError("Expected: APPLY, found: JOIN".to_string()),
74507462
res.unwrap_err()

tests/sqlparser_snowflake.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3511,6 +3511,57 @@ fn test_sql_keywords_as_select_item_aliases() {
35113511
}
35123512
}
35133513

3514+
#[test]
3515+
fn test_sql_keywords_as_table_aliases() {
3516+
// Some keywords that should be parsed as an alias implicitly
3517+
let unreserved_kws = vec![
3518+
"VIEW",
3519+
"EXPLAIN",
3520+
"ANALYZE",
3521+
"SORT",
3522+
"PIVOT",
3523+
"UNPIVOT",
3524+
"TOP",
3525+
"LIMIT",
3526+
"OFFSET",
3527+
"FETCH",
3528+
"EXCEPT",
3529+
"CLUSTER",
3530+
"DISTRIBUTE",
3531+
"GLOBAL",
3532+
"ANTI",
3533+
"SEMI",
3534+
"RETURNING",
3535+
"OUTER",
3536+
"WINDOW",
3537+
"END",
3538+
"PARTITION",
3539+
"PREWHERE",
3540+
"SETTINGS",
3541+
"FORMAT",
3542+
"MATCH_RECOGNIZE",
3543+
"OPEN",
3544+
];
3545+
3546+
for kw in unreserved_kws {
3547+
snowflake().verified_stmt(&format!("SELECT * FROM tbl AS {kw}"));
3548+
snowflake().one_statement_parses_to(
3549+
&format!("SELECT * FROM tbl {kw}"),
3550+
&format!("SELECT * FROM tbl AS {kw}"),
3551+
);
3552+
}
3553+
3554+
// Some keywords that should not be parsed as an alias implicitly
3555+
let reserved_kws = vec![
3556+
"FROM", "GROUP", "HAVING", "ORDER", "SELECT", "UNION", "WHERE", "WITH",
3557+
];
3558+
for kw in reserved_kws {
3559+
assert!(snowflake()
3560+
.parse_sql_statements(&format!("SELECT * FROM tbl {kw}"))
3561+
.is_err());
3562+
}
3563+
}
3564+
35143565
#[test]
35153566
fn test_timetravel_at_before() {
35163567
snowflake().verified_only_select("SELECT * FROM tbl AT(TIMESTAMP => '2024-12-15 00:00:00')");

0 commit comments

Comments
 (0)