Skip to content

Commit 57a64c7

Browse files
Add support for comment hints
This commit adds support for comment hints supported by MySQL. It parses and consumes the optional version number after the `!` character and return all tokens inside a comment hint.
1 parent 2ac82e9 commit 57a64c7

4 files changed

Lines changed: 154 additions & 3 deletions

File tree

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ impl Dialect for GenericDialect {
176176
true
177177
}
178178

179+
fn supports_multiline_comment_hints(&self) -> bool {
180+
true
181+
}
182+
179183
fn supports_user_host_grantee(&self) -> bool {
180184
true
181185
}

src/dialect/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,12 @@ pub trait Dialect: Debug + Any {
10861086
false
10871087
}
10881088

1089+
/// Returns true if the dialect supports optimizer hints in multiline comments
1090+
/// e.g. `/*!50110 KEY_BLOCK_SIZE = 1024*/`
1091+
fn supports_multiline_comment_hints(&self) -> bool {
1092+
false
1093+
}
1094+
10891095
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
10901096
/// as an alias assignment operator, rather than a boolean expression.
10911097
/// For example: the following statements are equivalent for such a dialect:

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ impl Dialect for MySqlDialect {
8888
true
8989
}
9090

91+
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
92+
fn supports_multiline_comment_hints(&self) -> bool {
93+
true
94+
}
95+
9196
fn parse_infix(
9297
&self,
9398
parser: &mut crate::parser::Parser,

src/tokenizer.rs

Lines changed: 139 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -945,10 +945,65 @@ impl<'a> Tokenizer<'a> {
945945
while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
946946
let span = location.span_to(state.location());
947947

948-
buf.push(TokenWithSpan { token, span });
948+
// Check if this is a multiline comment hint that should be expanded
949+
match &token {
950+
Token::Whitespace(Whitespace::MultiLineComment(comment))
951+
if self.dialect.supports_multiline_comment_hints()
952+
&& comment.starts_with('!') =>
953+
{
954+
// Re-tokenize the hints and add them to the buffer
955+
self.tokenize_comment_hints(comment, span, buf)?;
956+
}
957+
_ => {
958+
buf.push(TokenWithSpan { token, span });
959+
}
960+
}
961+
962+
location = state.location();
963+
}
964+
Ok(())
965+
}
966+
967+
/// Re-tokenize optimizer hints from a multiline comment and add them to the buffer.
968+
/// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024`
969+
fn tokenize_comment_hints(
970+
&self,
971+
comment: &str,
972+
span: Span,
973+
buf: &mut Vec<TokenWithSpan>,
974+
) -> Result<(), TokenizerError> {
975+
// Strip the leading '!' and any version digits (e.g., "50110")
976+
let hint_content = comment
977+
.strip_prefix('!')
978+
.unwrap_or(comment)
979+
.trim_start_matches(|c: char| c.is_ascii_digit());
980+
981+
// If there's no content after stripping, nothing to tokenize
982+
if hint_content.is_empty() {
983+
return Ok(());
984+
}
985+
986+
// Create a new tokenizer for the hint content
987+
let inner = Tokenizer::new(self.dialect, hint_content).with_unescape(self.unescape);
988+
989+
// Create a state for tracking position within the hint
990+
let mut state = State {
991+
peekable: hint_content.chars().peekable(),
992+
line: span.start.line,
993+
col: span.start.column,
994+
};
949995

996+
// Tokenize the hint content and add tokens to the buffer
997+
let mut location = state.location();
998+
while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? {
999+
let token_span = location.span_to(state.location());
1000+
buf.push(TokenWithSpan {
1001+
token,
1002+
span: token_span,
1003+
});
9501004
location = state.location();
9511005
}
1006+
9521007
Ok(())
9531008
}
9541009

@@ -2233,7 +2288,6 @@ impl<'a> Tokenizer<'a> {
22332288
let mut s = String::new();
22342289
let mut nested = 1;
22352290
let supports_nested_comments = self.dialect.supports_nested_comments();
2236-
22372291
loop {
22382292
match chars.next() {
22392293
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
@@ -4218,6 +4272,88 @@ mod tests {
42184272
Token::Whitespace(Whitespace::Space),
42194273
Token::make_word("y", None),
42204274
],
4221-
)
4275+
);
4276+
}
4277+
4278+
#[test]
4279+
fn tokenize_multiline_comment_with_comment_hint() {
4280+
let sql = String::from("0/*! word */1");
4281+
4282+
let dialect = MySqlDialect {};
4283+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4284+
let expected = vec![
4285+
Token::Number("0".to_string(), false),
4286+
Token::Whitespace(Whitespace::Space),
4287+
Token::Word(Word {
4288+
value: "word".to_string(),
4289+
quote_style: None,
4290+
keyword: Keyword::NoKeyword,
4291+
}),
4292+
Token::Whitespace(Whitespace::Space),
4293+
Token::Number("1".to_string(), false),
4294+
];
4295+
compare(expected, tokens);
4296+
}
4297+
4298+
#[test]
4299+
fn tokenize_multiline_comment_with_comment_hint_and_version() {
4300+
let sql_multi = String::from("0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1");
4301+
let dialect = MySqlDialect {};
4302+
let tokens = Tokenizer::new(&dialect, &sql_multi).tokenize().unwrap();
4303+
let expected = vec![
4304+
Token::Number("0".to_string(), false),
4305+
Token::Whitespace(Whitespace::Space),
4306+
Token::Whitespace(Whitespace::Space),
4307+
Token::Word(Word {
4308+
value: "KEY_BLOCK_SIZE".to_string(),
4309+
quote_style: None,
4310+
keyword: Keyword::KEY_BLOCK_SIZE,
4311+
}),
4312+
Token::Whitespace(Whitespace::Space),
4313+
Token::Eq,
4314+
Token::Whitespace(Whitespace::Space),
4315+
Token::Number("1024".to_string(), false),
4316+
Token::Whitespace(Whitespace::Space),
4317+
Token::Number("1".to_string(), false),
4318+
];
4319+
compare(expected, tokens);
4320+
4321+
let tokens = Tokenizer::new(&dialect, "0 /*!50110 */ 1")
4322+
.tokenize()
4323+
.unwrap();
4324+
compare(
4325+
vec![
4326+
Token::Number("0".to_string(), false),
4327+
Token::Whitespace(Whitespace::Space),
4328+
Token::Whitespace(Whitespace::Space),
4329+
Token::Whitespace(Whitespace::Space),
4330+
Token::Number("1".to_string(), false),
4331+
],
4332+
tokens,
4333+
);
4334+
4335+
let tokens = Tokenizer::new(&dialect, "0 /*!*/ 1").tokenize().unwrap();
4336+
compare(
4337+
vec![
4338+
Token::Number("0".to_string(), false),
4339+
Token::Whitespace(Whitespace::Space),
4340+
Token::Whitespace(Whitespace::Space),
4341+
Token::Number("1".to_string(), false),
4342+
],
4343+
tokens,
4344+
);
4345+
let tokens = Tokenizer::new(&dialect, "0 /*! */ 1").tokenize().unwrap();
4346+
compare(
4347+
vec![
4348+
Token::Number("0".to_string(), false),
4349+
Token::Whitespace(Whitespace::Space),
4350+
Token::Whitespace(Whitespace::Space),
4351+
Token::Whitespace(Whitespace::Space),
4352+
Token::Whitespace(Whitespace::Space),
4353+
Token::Whitespace(Whitespace::Space),
4354+
Token::Number("1".to_string(), false),
4355+
],
4356+
tokens,
4357+
);
42224358
}
42234359
}

0 commit comments

Comments
 (0)