From 12cba60e52d00d1e21357e3ad35362c963524ca1 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Wed, 11 Jun 2025 14:48:09 +0200 Subject: [PATCH 1/5] Support `DISTINCT AS { STRUCT | VALUE }` for BigQuery According to the query syntax at https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_list you can combine `[ { ALL | DISTINCT } ]` with `[ AS { STRUCT | VALUE } ]`: ```sh SELECT [ WITH differential_privacy_clause ] [ { ALL | DISTINCT } ] [ AS { STRUCT | VALUE } ] select_list ``` This adds support to parse `DISTINCT` or `ALL` as the first keyword after `SELECT` and adds two new variants to the `ValueTableMode` if defined. --- src/ast/query.rs | 9 +++++++++ src/parser/mod.rs | 26 +++++++++++++++++++++++--- tests/sqlparser_bigquery.rs | 11 +++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 4398531cb7..003ae47b3c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -3340,13 +3340,20 @@ impl fmt::Display for OpenJsonTableColumn { /// BigQuery supports ValueTables which have 2 modes: /// `SELECT AS STRUCT` /// `SELECT AS VALUE` +/// +/// They can be combined with `[ { ALL | DISTINCT } ]`, e.g. +/// `SELECT DISTINCT AS STRUCT` +/// /// +/// #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ValueTableMode { AsStruct, AsValue, + DistinctAsStruct, + DistinctAsValue, } impl fmt::Display for ValueTableMode { @@ -3354,6 +3361,8 @@ impl fmt::Display for ValueTableMode { match self { ValueTableMode::AsStruct => write!(f, "AS STRUCT"), ValueTableMode::AsValue => write!(f, "AS VALUE"), + ValueTableMode::DistinctAsStruct => write!(f, "DISTINCT AS STRUCT"), + ValueTableMode::DistinctAsValue => write!(f, "DISTINCT AS VALUE"), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6831d52e06..6682f287cd 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11505,12 +11505,22 @@ impl<'a> Parser<'a> { } let select_token = self.expect_keyword(Keyword::SELECT)?; + let distinct_pre_as = self.parse_all_or_distinct()?; + let value_table_mode = if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) { if self.parse_keyword(Keyword::VALUE) { - Some(ValueTableMode::AsValue) + if distinct_pre_as.is_some() { + Some(ValueTableMode::DistinctAsValue) + } else { + Some(ValueTableMode::AsValue) + } } else if self.parse_keyword(Keyword::STRUCT) { - Some(ValueTableMode::AsStruct) + if distinct_pre_as.is_some() { + Some(ValueTableMode::DistinctAsStruct) + } else { + Some(ValueTableMode::AsStruct) + } } else { self.expected("VALUE or STRUCT", self.peek_token())? } @@ -11524,7 +11534,17 @@ impl<'a> Parser<'a> { top = Some(self.parse_top()?); top_before_distinct = true; } - let distinct = self.parse_all_or_distinct()?; + + // If we parsed a `DISTINCT` value before checking `ValueTableMode` and it is set to some, + // but we didn't have an `AS`, this is the initial `DISTINCT` value in the `SELECT` and + // should be re-used. + // If we don't have a `DISTINCT` parsed or if it was consumed for the `ValueTableMode` we + // look for `DISTINCT` again. + let distinct = if value_table_mode.is_none() && distinct_pre_as.is_some() { + distinct_pre_as + } else { + self.parse_all_or_distinct()? + }; if !self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { top = Some(self.parse_top()?); } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 8f54f3c974..3a5bdaf760 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2377,3 +2377,14 @@ fn test_any_type() { fn test_any_type_dont_break_custom_type() { bigquery_and_generic().verified_stmt("CREATE TABLE foo (x ANY)"); } + +#[test] +fn test_select_distinct_as_struct_or_value() { + for sql in [ + "SELECT DISTINCT AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", + "SELECT DISTINCT AS VALUE a, ABS(b) FROM UNNEST(c) AS T", + "SELECT ARRAY(SELECT DISTINCT AS STRUCT a, b, ABS(c) AS c, ABS(d) AS d FROM UNNEST(e) AS T)", + ] { + bigquery().verified_stmt(sql); + } +} From 6a774dabfc3573679a3ba392244f20461c0cb0f5 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Wed, 11 Jun 2025 15:37:27 +0200 Subject: [PATCH 2/5] Only check for `DISTINCT AS` together and for BigQuery --- src/parser/mod.rs | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6682f287cd..e45332d4d0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11505,28 +11505,29 @@ impl<'a> Parser<'a> { } let select_token = self.expect_keyword(Keyword::SELECT)?; - let distinct_pre_as = self.parse_all_or_distinct()?; - - let value_table_mode = - if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) { + let value_table_mode = if dialect_of!(self is BigQueryDialect) { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS]) { if self.parse_keyword(Keyword::VALUE) { - if distinct_pre_as.is_some() { - Some(ValueTableMode::DistinctAsValue) - } else { - Some(ValueTableMode::AsValue) - } + Some(ValueTableMode::DistinctAsValue) } else if self.parse_keyword(Keyword::STRUCT) { - if distinct_pre_as.is_some() { - Some(ValueTableMode::DistinctAsStruct) - } else { - Some(ValueTableMode::AsStruct) - } + Some(ValueTableMode::DistinctAsStruct) + } else { + self.expected("VALUE or STRUCT", self.peek_token())? + } + } else if self.parse_keyword(Keyword::AS) { + if self.parse_keyword(Keyword::VALUE) { + Some(ValueTableMode::AsValue) + } else if self.parse_keyword(Keyword::STRUCT) { + Some(ValueTableMode::AsStruct) } else { self.expected("VALUE or STRUCT", self.peek_token())? } } else { None - }; + } + } else { + None + }; let mut top_before_distinct = false; let mut top = None; @@ -11534,17 +11535,7 @@ impl<'a> Parser<'a> { top = Some(self.parse_top()?); top_before_distinct = true; } - - // If we parsed a `DISTINCT` value before checking `ValueTableMode` and it is set to some, - // but we didn't have an `AS`, this is the initial `DISTINCT` value in the `SELECT` and - // should be re-used. - // If we don't have a `DISTINCT` parsed or if it was consumed for the `ValueTableMode` we - // look for `DISTINCT` again. - let distinct = if value_table_mode.is_none() && distinct_pre_as.is_some() { - distinct_pre_as - } else { - self.parse_all_or_distinct()? - }; + let distinct = self.parse_all_or_distinct()?; if !self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { top = Some(self.parse_top()?); } From 68f4519272cd93853b294cddb5b63960d05608c1 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Wed, 11 Jun 2025 17:11:36 +0200 Subject: [PATCH 3/5] Also support `ALL AS` for BigQuery This does, similar to regular `ALL`, not store that state on the `Select` struct so it's not tested as a `verified_stmt`. --- src/parser/mod.rs | 4 +++- tests/sqlparser_bigquery.rs | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e45332d4d0..664a21024d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11514,7 +11514,9 @@ impl<'a> Parser<'a> { } else { self.expected("VALUE or STRUCT", self.peek_token())? } - } else if self.parse_keyword(Keyword::AS) { + } else if self.parse_keyword(Keyword::AS) + || self.parse_keywords(&[Keyword::ALL, Keyword::AS]) + { if self.parse_keyword(Keyword::VALUE) { Some(ValueTableMode::AsValue) } else if self.parse_keyword(Keyword::STRUCT) { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 3a5bdaf760..7e97396dc5 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2379,7 +2379,7 @@ fn test_any_type_dont_break_custom_type() { } #[test] -fn test_select_distinct_as_struct_or_value() { +fn test_select_distinct_or_all_as_struct_or_value() { for sql in [ "SELECT DISTINCT AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", "SELECT DISTINCT AS VALUE a, ABS(b) FROM UNNEST(c) AS T", @@ -2387,4 +2387,11 @@ fn test_select_distinct_as_struct_or_value() { ] { bigquery().verified_stmt(sql); } + + for sql in [ + "SELECT ALL AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", + "SELECT ALL AS VALUE a, ABS(b) FROM UNNEST(c) AS T", + ] { + assert!(bigquery().parse_sql_statements(sql).is_ok()); + } } From 2e00c845d955e7e25077befb869efba3bab51f26 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Wed, 11 Jun 2025 17:13:47 +0200 Subject: [PATCH 4/5] Use `one_statement_parses_to` --- tests/sqlparser_bigquery.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7e97396dc5..ba6c74d6af 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2388,10 +2388,16 @@ fn test_select_distinct_or_all_as_struct_or_value() { bigquery().verified_stmt(sql); } - for sql in [ - "SELECT ALL AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", - "SELECT ALL AS VALUE a, ABS(b) FROM UNNEST(c) AS T", + for (sql, parse_to) in [ + ( + "SELECT ALL AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", + "SELECT AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", + ), + ( + "SELECT ALL AS VALUE a, ABS(b) FROM UNNEST(c) AS T", + "SELECT AS VALUE a, ABS(b) FROM UNNEST(c) AS T", + ), ] { - assert!(bigquery().parse_sql_statements(sql).is_ok()); + bigquery().one_statement_parses_to(sql, parse_to); } } From ff403cbf29dec9d9172249a3e764dd61284f904a Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Wed, 11 Jun 2025 17:33:40 +0200 Subject: [PATCH 5/5] Add `parse_value_table_mode`, mvoe tests, update docs --- src/ast/query.rs | 7 ++--- src/parser/mod.rs | 52 ++++++++++++++++--------------- tests/sqlparser_bigquery.rs | 62 ++++++++++++++++++++----------------- 3 files changed, 63 insertions(+), 58 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 003ae47b3c..1fb93b6c68 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -3338,11 +3338,8 @@ impl fmt::Display for OpenJsonTableColumn { } /// BigQuery supports ValueTables which have 2 modes: -/// `SELECT AS STRUCT` -/// `SELECT AS VALUE` -/// -/// They can be combined with `[ { ALL | DISTINCT } ]`, e.g. -/// `SELECT DISTINCT AS STRUCT` +/// `SELECT [ALL | DISTINCT] AS STRUCT` +/// `SELECT [ALL | DISTINCT] AS VALUE` /// /// /// diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 664a21024d..2c208e2e54 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11505,31 +11505,7 @@ impl<'a> Parser<'a> { } let select_token = self.expect_keyword(Keyword::SELECT)?; - let value_table_mode = if dialect_of!(self is BigQueryDialect) { - if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS]) { - if self.parse_keyword(Keyword::VALUE) { - Some(ValueTableMode::DistinctAsValue) - } else if self.parse_keyword(Keyword::STRUCT) { - Some(ValueTableMode::DistinctAsStruct) - } else { - self.expected("VALUE or STRUCT", self.peek_token())? - } - } else if self.parse_keyword(Keyword::AS) - || self.parse_keywords(&[Keyword::ALL, Keyword::AS]) - { - if self.parse_keyword(Keyword::VALUE) { - Some(ValueTableMode::AsValue) - } else if self.parse_keyword(Keyword::STRUCT) { - Some(ValueTableMode::AsStruct) - } else { - self.expected("VALUE or STRUCT", self.peek_token())? - } - } else { - None - } - } else { - None - }; + let value_table_mode = self.parse_value_table_mode()?; let mut top_before_distinct = false; let mut top = None; @@ -11705,6 +11681,32 @@ impl<'a> Parser<'a> { }) } + fn parse_value_table_mode(&mut self) -> Result, ParserError> { + if !dialect_of!(self is BigQueryDialect) { + return Ok(None); + } + + let mode = if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS, Keyword::VALUE]) { + Some(ValueTableMode::DistinctAsValue) + } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::AS, Keyword::STRUCT]) { + Some(ValueTableMode::DistinctAsStruct) + } else if self.parse_keywords(&[Keyword::AS, Keyword::VALUE]) + || self.parse_keywords(&[Keyword::ALL, Keyword::AS, Keyword::VALUE]) + { + Some(ValueTableMode::AsValue) + } else if self.parse_keywords(&[Keyword::AS, Keyword::STRUCT]) + || self.parse_keywords(&[Keyword::ALL, Keyword::AS, Keyword::STRUCT]) + { + Some(ValueTableMode::AsStruct) + } else if self.parse_keyword(Keyword::AS) { + self.expected("VALUE or STRUCT", self.peek_token())? + } else { + None + }; + + Ok(mode) + } + /// Invoke `f` after first setting the parser's `ParserState` to `state`. /// /// Upon return, restores the parser's state to what it started at. diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ba6c74d6af..b64f190f63 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2313,16 +2313,46 @@ fn bigquery_select_expr_star() { #[test] fn test_select_as_struct() { - bigquery().verified_only_select("SELECT * FROM (SELECT AS VALUE STRUCT(123 AS a, false AS b))"); + for (sql, parse_to) in [ + ( + "SELECT * FROM (SELECT AS STRUCT STRUCT(123 AS a, false AS b))", + "SELECT * FROM (SELECT AS STRUCT STRUCT(123 AS a, false AS b))", + ), + ( + "SELECT * FROM (SELECT DISTINCT AS STRUCT STRUCT(123 AS a, false AS b))", + "SELECT * FROM (SELECT DISTINCT AS STRUCT STRUCT(123 AS a, false AS b))", + ), + ( + "SELECT * FROM (SELECT ALL AS STRUCT STRUCT(123 AS a, false AS b))", + "SELECT * FROM (SELECT AS STRUCT STRUCT(123 AS a, false AS b))", + ), + ] { + bigquery().one_statement_parses_to(sql, parse_to); + } + let select = bigquery().verified_only_select("SELECT AS STRUCT 1 AS a, 2 AS b"); assert_eq!(Some(ValueTableMode::AsStruct), select.value_table_mode); } #[test] fn test_select_as_value() { - bigquery().verified_only_select( - "SELECT * FROM (SELECT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", - ); + for (sql, parse_to) in [ + ( + "SELECT * FROM (SELECT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + "SELECT * FROM (SELECT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + ), + ( + "SELECT * FROM (SELECT DISTINCT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + "SELECT * FROM (SELECT DISTINCT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + ), + ( + "SELECT * FROM (SELECT ALL AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + "SELECT * FROM (SELECT AS VALUE STRUCT(5 AS star_rating, false AS up_down_rating))", + ), + ] { + bigquery().one_statement_parses_to(sql, parse_to); + } + let select = bigquery().verified_only_select("SELECT AS VALUE STRUCT(1 AS a, 2 AS b) AS xyz"); assert_eq!(Some(ValueTableMode::AsValue), select.value_table_mode); } @@ -2377,27 +2407,3 @@ fn test_any_type() { fn test_any_type_dont_break_custom_type() { bigquery_and_generic().verified_stmt("CREATE TABLE foo (x ANY)"); } - -#[test] -fn test_select_distinct_or_all_as_struct_or_value() { - for sql in [ - "SELECT DISTINCT AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", - "SELECT DISTINCT AS VALUE a, ABS(b) FROM UNNEST(c) AS T", - "SELECT ARRAY(SELECT DISTINCT AS STRUCT a, b, ABS(c) AS c, ABS(d) AS d FROM UNNEST(e) AS T)", - ] { - bigquery().verified_stmt(sql); - } - - for (sql, parse_to) in [ - ( - "SELECT ALL AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", - "SELECT AS STRUCT a, ABS(b) FROM UNNEST(c) AS T", - ), - ( - "SELECT ALL AS VALUE a, ABS(b) FROM UNNEST(c) AS T", - "SELECT AS VALUE a, ABS(b) FROM UNNEST(c) AS T", - ), - ] { - bigquery().one_statement_parses_to(sql, parse_to); - } -}