Skip to content

Commit f8df333

Browse files
Add support for comment hints
This commit adds support for comment hints supported by MySQL. It parses and consumes the optional version number after the `!` character and return all tokens inside a comment hint.
1 parent f642dd5 commit f8df333

4 files changed

Lines changed: 114 additions & 4 deletions

File tree

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ impl Dialect for GenericDialect {
156156
true
157157
}
158158

159+
fn supports_multiline_comment_hints(&self) -> bool {
160+
true
161+
}
162+
159163
fn supports_user_host_grantee(&self) -> bool {
160164
true
161165
}

src/dialect/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,12 @@ pub trait Dialect: Debug + Any {
898898
false
899899
}
900900

901+
/// Returns true if the dialect supports optimizer hints in multiline comments
902+
/// e.g. `/*!50110 KEY_BLOCK_SIZE = 1024*/`
903+
fn supports_multiline_comment_hints(&self) -> bool {
904+
false
905+
}
906+
901907
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
902908
/// as an alias assignment operator, rather than a boolean expression.
903909
/// For example: the following statements are equivalent for such a dialect:

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ impl Dialect for MySqlDialect {
8484
true
8585
}
8686

87+
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
88+
fn supports_multiline_comment_hints(&self) -> bool {
89+
true
90+
}
91+
8792
fn parse_infix(
8893
&self,
8994
parser: &mut crate::parser::Parser,

src/tokenizer.rs

Lines changed: 99 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -901,13 +901,69 @@ impl<'a> Tokenizer<'a> {
901901
while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
902902
let span = location.span_to(state.location());
903903

904-
buf.push(TokenWithSpan { token, span });
904+
// Check if this is a multiline comment hint that should be expanded
905+
match &token {
906+
Token::Whitespace(Whitespace::MultiLineComment(comment))
907+
if self.dialect.supports_multiline_comment_hints()
908+
&& comment.starts_with('!') =>
909+
{
910+
// Re-tokenize the hints and add them to the buffer
911+
self.tokenize_comment_hints(comment, span, buf)?;
912+
}
913+
_ => {
914+
buf.push(TokenWithSpan { token, span });
915+
}
916+
}
905917

906918
location = state.location();
907919
}
908920
Ok(())
909921
}
910922

923+
/// Re-tokenize optimizer hints from a multiline comment and add them to the buffer.
924+
/// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024`
925+
fn tokenize_comment_hints(
926+
&self,
927+
comment: &str,
928+
span: Span,
929+
buf: &mut Vec<TokenWithSpan>,
930+
) -> Result<(), TokenizerError> {
931+
// Strip the leading '!' and any version digits (e.g., "50110")
932+
let hint_content = comment
933+
.strip_prefix('!')
934+
.unwrap_or(comment)
935+
.trim_start_matches(|c: char| c.is_ascii_digit())
936+
.trim();
937+
938+
// If there's no content after stripping, nothing to tokenize
939+
if hint_content.is_empty() {
940+
return Ok(());
941+
}
942+
943+
// Create a new tokenizer for the hint content
944+
let mut inner = Tokenizer::new(self.dialect, hint_content).with_unescape(self.unescape);
945+
946+
// Create a state for tracking position within the hint
947+
let mut state = State {
948+
peekable: hint_content.chars().peekable(),
949+
line: span.start.line,
950+
col: span.start.column,
951+
};
952+
953+
// Tokenize the hint content and add tokens to the buffer
954+
let mut location = state.location();
955+
while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? {
956+
let token_span = location.span_to(state.location());
957+
buf.push(TokenWithSpan {
958+
token,
959+
span: token_span,
960+
});
961+
location = state.location();
962+
}
963+
964+
Ok(())
965+
}
966+
911967
// Tokenize the identifier or keywords in `ch`
912968
fn tokenize_identifier_or_keyword(
913969
&self,
@@ -936,7 +992,7 @@ impl<'a> Tokenizer<'a> {
936992

937993
/// Get the next token or return None
938994
fn next_token(
939-
&self,
995+
&mut self,
940996
chars: &mut State,
941997
prev_token: Option<&Token>,
942998
) -> Result<Option<Token>, TokenizerError> {
@@ -2102,13 +2158,12 @@ impl<'a> Tokenizer<'a> {
21022158
}
21032159

21042160
fn tokenize_multiline_comment(
2105-
&self,
2161+
&mut self,
21062162
chars: &mut State,
21072163
) -> Result<Option<Token>, TokenizerError> {
21082164
let mut s = String::new();
21092165
let mut nested = 1;
21102166
let supports_nested_comments = self.dialect.supports_nested_comments();
2111-
21122167
loop {
21132168
match chars.next() {
21142169
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
@@ -4070,4 +4125,44 @@ mod tests {
40704125
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
40714126
}
40724127
}
4128+
#[test]
4129+
fn tokenize_multiline_comment_with_c_style_comment() {
4130+
let sql = String::from("0/*! word */1");
4131+
4132+
let dialect = MySqlDialect {};
4133+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4134+
let expected = vec![
4135+
Token::Number("0".to_string(), false),
4136+
Token::Word(Word {
4137+
value: "word".to_string(),
4138+
quote_style: None,
4139+
keyword: Keyword::NoKeyword,
4140+
}),
4141+
Token::Number("1".to_string(), false),
4142+
];
4143+
compare(expected, tokens);
4144+
}
4145+
4146+
#[test]
4147+
fn tokenize_multiline_comment_with_c_style_comment_and_version() {
4148+
let sql_multi = String::from("0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1");
4149+
let dialect = MySqlDialect {};
4150+
let tokens = Tokenizer::new(&dialect, &sql_multi).tokenize().unwrap();
4151+
let expected = vec![
4152+
Token::Number("0".to_string(), false),
4153+
Token::Whitespace(Whitespace::Space),
4154+
Token::Word(Word {
4155+
value: "KEY_BLOCK_SIZE".to_string(),
4156+
quote_style: None,
4157+
keyword: Keyword::KEY_BLOCK_SIZE,
4158+
}),
4159+
Token::Whitespace(Whitespace::Space),
4160+
Token::Eq,
4161+
Token::Whitespace(Whitespace::Space),
4162+
Token::Number("1024".to_string(), false),
4163+
Token::Whitespace(Whitespace::Space),
4164+
Token::Number("1".to_string(), false),
4165+
];
4166+
compare(expected, tokens);
4167+
}
40734168
}

0 commit comments

Comments
 (0)