@@ -945,10 +945,66 @@ impl<'a> Tokenizer<'a> {
945945 while let Some ( token) = self . next_token ( & mut state, buf. last ( ) . map ( |t| & t. token ) ) ? {
946946 let span = location. span_to ( state. location ( ) ) ;
947947
948- buf. push ( TokenWithSpan { token, span } ) ;
948+ // Check if this is a multiline comment hint that should be expanded
949+ match & token {
950+ Token :: Whitespace ( Whitespace :: MultiLineComment ( comment) )
951+ if self . dialect . supports_multiline_comment_hints ( )
952+ && comment. starts_with ( '!' ) =>
953+ {
954+ // Re-tokenize the hints and add them to the buffer
955+ self . tokenize_comment_hints ( comment, span, buf) ?;
956+ }
957+ _ => {
958+ buf. push ( TokenWithSpan { token, span } ) ;
959+ }
960+ }
961+
962+ location = state. location ( ) ;
963+ }
964+ Ok ( ( ) )
965+ }
966+
967+ /// Re-tokenize optimizer hints from a multiline comment and add them to the buffer.
968+ /// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024`
969+ fn tokenize_comment_hints (
970+ & self ,
971+ comment : & str ,
972+ span : Span ,
973+ buf : & mut Vec < TokenWithSpan > ,
974+ ) -> Result < ( ) , TokenizerError > {
975+ // Strip the leading '!' and any version digits (e.g., "50110")
976+ let hint_content = comment
977+ . strip_prefix ( '!' )
978+ . unwrap_or ( comment)
979+ . trim_start_matches ( |c : char | c. is_ascii_digit ( ) )
980+ . trim ( ) ;
981+
982+ // If there's no content after stripping, nothing to tokenize
983+ if hint_content. is_empty ( ) {
984+ return Ok ( ( ) ) ;
985+ }
949986
987+ // Create a new tokenizer for the hint content
988+ let mut inner = Tokenizer :: new ( self . dialect , hint_content) . with_unescape ( self . unescape ) ;
989+
990+ // Create a state for tracking position within the hint
991+ let mut state = State {
992+ peekable : hint_content. chars ( ) . peekable ( ) ,
993+ line : span. start . line ,
994+ col : span. start . column ,
995+ } ;
996+
997+ // Tokenize the hint content and add tokens to the buffer
998+ let mut location = state. location ( ) ;
999+ while let Some ( token) = inner. next_token ( & mut state, buf. last ( ) . map ( |t| & t. token ) ) ? {
1000+ let token_span = location. span_to ( state. location ( ) ) ;
1001+ buf. push ( TokenWithSpan {
1002+ token,
1003+ span : token_span,
1004+ } ) ;
9501005 location = state. location ( ) ;
9511006 }
1007+
9521008 Ok ( ( ) )
9531009 }
9541010
@@ -980,7 +1036,7 @@ impl<'a> Tokenizer<'a> {
9801036
9811037 /// Get the next token or return None
9821038 fn next_token (
983- & self ,
1039+ & mut self ,
9841040 chars : & mut State ,
9851041 prev_token : Option < & Token > ,
9861042 ) -> Result < Option < Token > , TokenizerError > {
@@ -2227,13 +2283,12 @@ impl<'a> Tokenizer<'a> {
22272283 }
22282284
22292285 fn tokenize_multiline_comment (
2230- & self ,
2286+ & mut self ,
22312287 chars : & mut State ,
22322288 ) -> Result < Option < Token > , TokenizerError > {
22332289 let mut s = String :: new ( ) ;
22342290 let mut nested = 1 ;
22352291 let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
2236-
22372292 loop {
22382293 match chars. next ( ) {
22392294 Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
@@ -4218,6 +4273,47 @@ mod tests {
42184273 Token :: Whitespace ( Whitespace :: Space ) ,
42194274 Token :: make_word( "y" , None ) ,
42204275 ] ,
4221- )
4276+ ) ;
4277+ }
4278+
4279+ #[ test]
4280+ fn tokenize_multiline_comment_with_comment_hint ( ) {
4281+ let sql = String :: from ( "0/*! word */1" ) ;
4282+
4283+ let dialect = MySqlDialect { } ;
4284+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4285+ let expected = vec ! [
4286+ Token :: Number ( "0" . to_string( ) , false ) ,
4287+ Token :: Word ( Word {
4288+ value: "word" . to_string( ) ,
4289+ quote_style: None ,
4290+ keyword: Keyword :: NoKeyword ,
4291+ } ) ,
4292+ Token :: Number ( "1" . to_string( ) , false ) ,
4293+ ] ;
4294+ compare ( expected, tokens) ;
4295+ }
4296+
4297+ #[ test]
4298+ fn tokenize_multiline_comment_with_comment_hint_and_version ( ) {
4299+ let sql_multi = String :: from ( "0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1" ) ;
4300+ let dialect = MySqlDialect { } ;
4301+ let tokens = Tokenizer :: new ( & dialect, & sql_multi) . tokenize ( ) . unwrap ( ) ;
4302+ let expected = vec ! [
4303+ Token :: Number ( "0" . to_string( ) , false ) ,
4304+ Token :: Whitespace ( Whitespace :: Space ) ,
4305+ Token :: Word ( Word {
4306+ value: "KEY_BLOCK_SIZE" . to_string( ) ,
4307+ quote_style: None ,
4308+ keyword: Keyword :: KEY_BLOCK_SIZE ,
4309+ } ) ,
4310+ Token :: Whitespace ( Whitespace :: Space ) ,
4311+ Token :: Eq ,
4312+ Token :: Whitespace ( Whitespace :: Space ) ,
4313+ Token :: Number ( "1024" . to_string( ) , false ) ,
4314+ Token :: Whitespace ( Whitespace :: Space ) ,
4315+ Token :: Number ( "1" . to_string( ) , false ) ,
4316+ ] ;
4317+ compare ( expected, tokens) ;
42224318 }
42234319}
0 commit comments