Skip to content

Commit c9858ff

Browse files
Add support for comment hints
This commit adds support for comment hints supported by MySQL. It parses and consumes the optional version number after the `!` character and return all tokens inside a comment hint.
1 parent 2ac82e9 commit c9858ff

4 files changed

Lines changed: 116 additions & 5 deletions

File tree

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ impl Dialect for GenericDialect {
176176
true
177177
}
178178

179+
fn supports_multiline_comment_hints(&self) -> bool {
180+
true
181+
}
182+
179183
fn supports_user_host_grantee(&self) -> bool {
180184
true
181185
}

src/dialect/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,12 @@ pub trait Dialect: Debug + Any {
10861086
false
10871087
}
10881088

1089+
/// Returns true if the dialect supports optimizer hints in multiline comments
1090+
/// e.g. `/*!50110 KEY_BLOCK_SIZE = 1024*/`
1091+
fn supports_multiline_comment_hints(&self) -> bool {
1092+
false
1093+
}
1094+
10891095
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
10901096
/// as an alias assignment operator, rather than a boolean expression.
10911097
/// For example: the following statements are equivalent for such a dialect:

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ impl Dialect for MySqlDialect {
8888
true
8989
}
9090

91+
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
92+
fn supports_multiline_comment_hints(&self) -> bool {
93+
true
94+
}
95+
9196
fn parse_infix(
9297
&self,
9398
parser: &mut crate::parser::Parser,

src/tokenizer.rs

Lines changed: 101 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -945,10 +945,66 @@ impl<'a> Tokenizer<'a> {
945945
while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
946946
let span = location.span_to(state.location());
947947

948-
buf.push(TokenWithSpan { token, span });
948+
// Check if this is a multiline comment hint that should be expanded
949+
match &token {
950+
Token::Whitespace(Whitespace::MultiLineComment(comment))
951+
if self.dialect.supports_multiline_comment_hints()
952+
&& comment.starts_with('!') =>
953+
{
954+
// Re-tokenize the hints and add them to the buffer
955+
self.tokenize_comment_hints(comment, span, buf)?;
956+
}
957+
_ => {
958+
buf.push(TokenWithSpan { token, span });
959+
}
960+
}
961+
962+
location = state.location();
963+
}
964+
Ok(())
965+
}
966+
967+
/// Re-tokenize optimizer hints from a multiline comment and add them to the buffer.
968+
/// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024`
969+
fn tokenize_comment_hints(
970+
&self,
971+
comment: &str,
972+
span: Span,
973+
buf: &mut Vec<TokenWithSpan>,
974+
) -> Result<(), TokenizerError> {
975+
// Strip the leading '!' and any version digits (e.g., "50110")
976+
let hint_content = comment
977+
.strip_prefix('!')
978+
.unwrap_or(comment)
979+
.trim_start_matches(|c: char| c.is_ascii_digit())
980+
.trim();
981+
982+
// If there's no content after stripping, nothing to tokenize
983+
if hint_content.is_empty() {
984+
return Ok(());
985+
}
949986

987+
// Create a new tokenizer for the hint content
988+
let mut inner = Tokenizer::new(self.dialect, hint_content).with_unescape(self.unescape);
989+
990+
// Create a state for tracking position within the hint
991+
let mut state = State {
992+
peekable: hint_content.chars().peekable(),
993+
line: span.start.line,
994+
col: span.start.column,
995+
};
996+
997+
// Tokenize the hint content and add tokens to the buffer
998+
let mut location = state.location();
999+
while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? {
1000+
let token_span = location.span_to(state.location());
1001+
buf.push(TokenWithSpan {
1002+
token,
1003+
span: token_span,
1004+
});
9501005
location = state.location();
9511006
}
1007+
9521008
Ok(())
9531009
}
9541010

@@ -980,7 +1036,7 @@ impl<'a> Tokenizer<'a> {
9801036

9811037
/// Get the next token or return None
9821038
fn next_token(
983-
&self,
1039+
&mut self,
9841040
chars: &mut State,
9851041
prev_token: Option<&Token>,
9861042
) -> Result<Option<Token>, TokenizerError> {
@@ -2227,13 +2283,12 @@ impl<'a> Tokenizer<'a> {
22272283
}
22282284

22292285
fn tokenize_multiline_comment(
2230-
&self,
2286+
&mut self,
22312287
chars: &mut State,
22322288
) -> Result<Option<Token>, TokenizerError> {
22332289
let mut s = String::new();
22342290
let mut nested = 1;
22352291
let supports_nested_comments = self.dialect.supports_nested_comments();
2236-
22372292
loop {
22382293
match chars.next() {
22392294
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
@@ -4218,6 +4273,47 @@ mod tests {
42184273
Token::Whitespace(Whitespace::Space),
42194274
Token::make_word("y", None),
42204275
],
4221-
)
4276+
);
4277+
}
4278+
4279+
#[test]
4280+
fn tokenize_multiline_comment_with_comment_hint() {
4281+
let sql = String::from("0/*! word */1");
4282+
4283+
let dialect = MySqlDialect {};
4284+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4285+
let expected = vec![
4286+
Token::Number("0".to_string(), false),
4287+
Token::Word(Word {
4288+
value: "word".to_string(),
4289+
quote_style: None,
4290+
keyword: Keyword::NoKeyword,
4291+
}),
4292+
Token::Number("1".to_string(), false),
4293+
];
4294+
compare(expected, tokens);
4295+
}
4296+
4297+
#[test]
4298+
fn tokenize_multiline_comment_with_comment_hint_and_version() {
4299+
let sql_multi = String::from("0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1");
4300+
let dialect = MySqlDialect {};
4301+
let tokens = Tokenizer::new(&dialect, &sql_multi).tokenize().unwrap();
4302+
let expected = vec![
4303+
Token::Number("0".to_string(), false),
4304+
Token::Whitespace(Whitespace::Space),
4305+
Token::Word(Word {
4306+
value: "KEY_BLOCK_SIZE".to_string(),
4307+
quote_style: None,
4308+
keyword: Keyword::KEY_BLOCK_SIZE,
4309+
}),
4310+
Token::Whitespace(Whitespace::Space),
4311+
Token::Eq,
4312+
Token::Whitespace(Whitespace::Space),
4313+
Token::Number("1024".to_string(), false),
4314+
Token::Whitespace(Whitespace::Space),
4315+
Token::Number("1".to_string(), false),
4316+
];
4317+
compare(expected, tokens);
42224318
}
42234319
}

0 commit comments

Comments
 (0)