@@ -46,6 +46,10 @@ pub enum Token {
4646 SingleQuotedString ( String ) ,
4747 /// Single quoted string: i.e: 'string'
4848 BacktickQuotedString ( String ) ,
49+ BqRegexQuotedString {
50+ value : String ,
51+ quote : char ,
52+ } ,
4953 /// "National" string literal: i.e: N'string'
5054 NationalStringLiteral ( String ) ,
5155 /// Hexadecimal string literal: i.e.: X'deadbeef'
@@ -163,6 +167,9 @@ impl fmt::Display for Token {
163167 Token :: SingleQuotedString ( ref s) => write ! ( f, "'{}'" , s) ,
164168 Token :: BacktickQuotedString ( ref s) => write ! ( f, "`{}`" , s) ,
165169 Token :: NationalStringLiteral ( ref s) => write ! ( f, "N'{}'" , s) ,
170+ Token :: BqRegexQuotedString { ref value, quote } => {
171+ write ! ( f, "r{}{}{}" , quote, value, quote)
172+ }
166173 Token :: HexStringLiteral ( ref s) => write ! ( f, "X'{}'" , s) ,
167174 Token :: Comma => f. write_str ( "," ) ,
168175 Token :: Whitespace ( ws) => write ! ( f, "{}" , ws) ,
@@ -372,6 +379,26 @@ impl<'a> Tokenizer<'a> {
372379 }
373380 Ok ( Some ( Token :: Whitespace ( Whitespace :: Newline ) ) )
374381 }
382+ r @ 'r' | r @ 'R' if dialect_of ! ( self is BigQueryDialect ) => {
383+ chars. next ( ) ; // consume, to check the next char
384+ match chars. peek ( ) {
385+ Some ( '\'' ) => {
386+ // r'...' - a regex literal
387+ let value = self . tokenize_single_quoted_string ( chars) ?;
388+ Ok ( Some ( Token :: BqRegexQuotedString { value, quote : '\'' } ) )
389+ }
390+ Some ( '"' ) => {
391+ // r"..." - a regex literal
392+ let value = self . tokenize_double_quoted_string ( chars) ?;
393+ Ok ( Some ( Token :: BqRegexQuotedString { value, quote : '"' } ) )
394+ }
395+ _ => {
396+ // regular identifier starting with an "r" or "R"
397+ let s = self . tokenize_word ( r, chars) ;
398+ Ok ( Some ( Token :: make_word ( & s, None ) ) )
399+ }
400+ }
401+ }
375402 'N' => {
376403 chars. next ( ) ; // consume, to check the next char
377404 match chars. peek ( ) {
@@ -664,11 +691,29 @@ impl<'a> Tokenizer<'a> {
664691 fn tokenize_single_quoted_string (
665692 & self ,
666693 chars : & mut Peekable < Chars < ' _ > > ,
694+ ) -> Result < String , TokenizerError > {
695+ self . tokenize_quoted_string ( chars, '\'' )
696+ }
697+
698+ /// Read a double quoted string, starting with the opening quote.
699+ fn tokenize_double_quoted_string (
700+ & self ,
701+ chars : & mut Peekable < Chars < ' _ > > ,
702+ ) -> Result < String , TokenizerError > {
703+ self . tokenize_quoted_string ( chars, '"' )
704+ }
705+
706+ /// Read a quoted string (quoted by any character, typically ' or "),
707+ /// starting with the opening quote.
708+ fn tokenize_quoted_string (
709+ & self ,
710+ chars : & mut Peekable < Chars < ' _ > > ,
711+ quote_ch : char ,
667712 ) -> Result < String , TokenizerError > {
668713 let mut s = String :: new ( ) ;
669714 chars. next ( ) ; // consume the opening quote
670715 while let Some ( ch) = chars. next ( ) {
671- let next_char_is_quote = chars. peek ( ) . map ( |c| * c == '\'' ) . unwrap_or ( false ) ;
716+ let next_char_is_quote = chars. peek ( ) . map ( |c| * c == quote_ch ) . unwrap_or ( false ) ;
672717 match ch {
673718 // allow backslash to escape the next character, whatever it is
674719 '\\' => {
@@ -680,14 +725,14 @@ impl<'a> Tokenizer<'a> {
680725 // bq allows escaping only with backslash; other warehouses
681726 // allow escaping the quote character by repeating it
682727 _ if !dialect_of ! ( self is BigQueryDialect )
683- && ch == '\''
728+ && ch == quote_ch
684729 && next_char_is_quote =>
685730 {
686- s. push ( '\'' ) ;
687- s. push ( '\'' ) ;
688- chars. next ( ) ; // consume '
731+ s. push ( quote_ch ) ;
732+ s. push ( quote_ch ) ;
733+ chars. next ( ) ; // consume quote_ch
689734 }
690- '\'' => return Ok ( s) ,
735+ ch if ch == quote_ch => return Ok ( s) ,
691736 _ => s. push ( ch) ,
692737 }
693738 }
0 commit comments