@@ -901,13 +901,69 @@ impl<'a> Tokenizer<'a> {
901901 while let Some ( token) = self . next_token ( & mut state, buf. last ( ) . map ( |t| & t. token ) ) ? {
902902 let span = location. span_to ( state. location ( ) ) ;
903903
904- buf. push ( TokenWithSpan { token, span } ) ;
904+ // Check if this is a multiline comment hint that should be expanded
905+ match & token {
906+ Token :: Whitespace ( Whitespace :: MultiLineComment ( comment) )
907+ if self . dialect . supports_multiline_comment_hints ( )
908+ && comment. starts_with ( '!' ) =>
909+ {
910+ // Re-tokenize the hints and add them to the buffer
911+ self . tokenize_comment_hints ( comment, span, buf) ?;
912+ }
913+ _ => {
914+ buf. push ( TokenWithSpan { token, span } ) ;
915+ }
916+ }
905917
906918 location = state. location ( ) ;
907919 }
908920 Ok ( ( ) )
909921 }
910922
923+ /// Re-tokenize optimizer hints from a multiline comment and add them to the buffer.
924+ /// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024`
925+ fn tokenize_comment_hints (
926+ & self ,
927+ comment : & str ,
928+ span : Span ,
929+ buf : & mut Vec < TokenWithSpan > ,
930+ ) -> Result < ( ) , TokenizerError > {
931+ // Strip the leading '!' and any version digits (e.g., "50110")
932+ let hint_content = comment
933+ . strip_prefix ( '!' )
934+ . unwrap_or ( comment)
935+ . trim_start_matches ( |c : char | c. is_ascii_digit ( ) )
936+ . trim ( ) ;
937+
938+ // If there's no content after stripping, nothing to tokenize
939+ if hint_content. is_empty ( ) {
940+ return Ok ( ( ) ) ;
941+ }
942+
943+ // Create a new tokenizer for the hint content
944+ let mut inner = Tokenizer :: new ( self . dialect , hint_content) . with_unescape ( self . unescape ) ;
945+
946+ // Create a state for tracking position within the hint
947+ let mut state = State {
948+ peekable : hint_content. chars ( ) . peekable ( ) ,
949+ line : span. start . line ,
950+ col : span. start . column ,
951+ } ;
952+
953+ // Tokenize the hint content and add tokens to the buffer
954+ let mut location = state. location ( ) ;
955+ while let Some ( token) = inner. next_token ( & mut state, buf. last ( ) . map ( |t| & t. token ) ) ? {
956+ let token_span = location. span_to ( state. location ( ) ) ;
957+ buf. push ( TokenWithSpan {
958+ token,
959+ span : token_span,
960+ } ) ;
961+ location = state. location ( ) ;
962+ }
963+
964+ Ok ( ( ) )
965+ }
966+
911967 // Tokenize the identifier or keywords in `ch`
912968 fn tokenize_identifier_or_keyword (
913969 & self ,
@@ -936,7 +992,7 @@ impl<'a> Tokenizer<'a> {
936992
937993 /// Get the next token or return None
938994 fn next_token (
939- & self ,
995+ & mut self ,
940996 chars : & mut State ,
941997 prev_token : Option < & Token > ,
942998 ) -> Result < Option < Token > , TokenizerError > {
@@ -2102,13 +2158,12 @@ impl<'a> Tokenizer<'a> {
21022158 }
21032159
21042160 fn tokenize_multiline_comment (
2105- & self ,
2161+ & mut self ,
21062162 chars : & mut State ,
21072163 ) -> Result < Option < Token > , TokenizerError > {
21082164 let mut s = String :: new ( ) ;
21092165 let mut nested = 1 ;
21102166 let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
2111-
21122167 loop {
21132168 match chars. next ( ) {
21142169 Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
@@ -4070,4 +4125,44 @@ mod tests {
40704125 panic ! ( "Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}" ) ;
40714126 }
40724127 }
4128+ #[ test]
4129+ fn tokenize_multiline_comment_with_c_style_comment ( ) {
4130+ let sql = String :: from ( "0/*! word */1" ) ;
4131+
4132+ let dialect = MySqlDialect { } ;
4133+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4134+ let expected = vec ! [
4135+ Token :: Number ( "0" . to_string( ) , false ) ,
4136+ Token :: Word ( Word {
4137+ value: "word" . to_string( ) ,
4138+ quote_style: None ,
4139+ keyword: Keyword :: NoKeyword ,
4140+ } ) ,
4141+ Token :: Number ( "1" . to_string( ) , false ) ,
4142+ ] ;
4143+ compare ( expected, tokens) ;
4144+ }
4145+
4146+ #[ test]
4147+ fn tokenize_multiline_comment_with_c_style_comment_and_version ( ) {
4148+ let sql_multi = String :: from ( "0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1" ) ;
4149+ let dialect = MySqlDialect { } ;
4150+ let tokens = Tokenizer :: new ( & dialect, & sql_multi) . tokenize ( ) . unwrap ( ) ;
4151+ let expected = vec ! [
4152+ Token :: Number ( "0" . to_string( ) , false ) ,
4153+ Token :: Whitespace ( Whitespace :: Space ) ,
4154+ Token :: Word ( Word {
4155+ value: "KEY_BLOCK_SIZE" . to_string( ) ,
4156+ quote_style: None ,
4157+ keyword: Keyword :: KEY_BLOCK_SIZE ,
4158+ } ) ,
4159+ Token :: Whitespace ( Whitespace :: Space ) ,
4160+ Token :: Eq ,
4161+ Token :: Whitespace ( Whitespace :: Space ) ,
4162+ Token :: Number ( "1024" . to_string( ) , false ) ,
4163+ Token :: Whitespace ( Whitespace :: Space ) ,
4164+ Token :: Number ( "1" . to_string( ) , false ) ,
4165+ ] ;
4166+ compare ( expected, tokens) ;
4167+ }
40734168}
0 commit comments