add BigQueryDialect

donhcd · donhcd · commit 4b840d2dd22a · 2020-12-11T22:38:03.000-08:00
diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs
@@ -0,0 +1,30 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::dialect::Dialect;
+
+#[derive(Debug, Default)]
+pub struct BigQueryDialect;
+
+impl Dialect for BigQueryDialect {
+    // see https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
+    fn is_identifier_start(&self, ch: char) -> bool {
+        (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'
+    }
+
+    fn is_identifier_part(&self, ch: char) -> bool {
+        (ch >= 'a' && ch <= 'z')
+            || (ch >= 'A' && ch <= 'Z')
+            || (ch >= '0' && ch <= '9')
+            || ch == '_'
+    }
+}
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
@@ -11,6 +11,7 @@
 // limitations under the License.
 
 mod ansi;
+mod bigquery;
 mod generic;
 pub mod keywords;
 mod mssql;
@@ -23,6 +24,7 @@ use std::any::{Any, TypeId};
 use std::fmt::Debug;
 
 pub use self::ansi::AnsiDialect;
+pub use self::bigquery::BigQueryDialect;
 pub use self::generic::GenericDialect;
 pub use self::mssql::MsSqlDialect;
 pub use self::mysql::MySqlDialect;
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
@@ -20,6 +20,7 @@ use std::iter::Peekable;
 use std::str::Chars;
 
 use super::dialect::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
+use super::dialect::BigQueryDialect;
 use super::dialect::Dialect;
 use super::dialect::PostgreSqlDialect;
 use super::dialect::SnowflakeDialect;
@@ -465,7 +466,7 @@ impl<'a> Tokenizer<'a> {
                             chars.next(); // consume the '*', starting a multi-line comment
                             self.tokenize_multiline_comment(chars)
                         }
-                        Some('/') if dialect_of!(self is SnowflakeDialect) => {
+                        Some('/') if dialect_of!(self is SnowflakeDialect | BigQueryDialect) => {
                             chars.next(); // consume the second '/', starting a snowflake single-line comment
                             let comment = self.tokenize_single_line_comment(chars);
                             Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
@@ -542,7 +543,7 @@ impl<'a> Tokenizer<'a> {
                 '^' => self.consume_and_return(chars, Token::Caret),
                 '{' => self.consume_and_return(chars, Token::LBrace),
                 '}' => self.consume_and_return(chars, Token::RBrace),
-                '#' if dialect_of!(self is SnowflakeDialect) => {
+                '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect) => {
                     chars.next(); // consume the '#', starting a snowflake single-line comment
                     let comment = self.tokenize_single_line_comment(chars);
                     Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {