Skip to content

Commit 5f4114d

Browse files
Add support for comment hints
This commit adds support for comment hints supported by MySQL. It parses and consumes the optional version number after the `!` character and return all tokens inside a comment hint.
1 parent 2ac82e9 commit 5f4114d

4 files changed

Lines changed: 152 additions & 3 deletions

File tree

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ impl Dialect for GenericDialect {
176176
true
177177
}
178178

179+
fn supports_multiline_comment_hints(&self) -> bool {
180+
true
181+
}
182+
179183
fn supports_user_host_grantee(&self) -> bool {
180184
true
181185
}

src/dialect/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,12 @@ pub trait Dialect: Debug + Any {
10861086
false
10871087
}
10881088

1089+
/// Returns true if the dialect supports optimizer hints in multiline comments
1090+
/// e.g. `/*!50110 KEY_BLOCK_SIZE = 1024*/`
1091+
fn supports_multiline_comment_hints(&self) -> bool {
1092+
false
1093+
}
1094+
10891095
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
10901096
/// as an alias assignment operator, rather than a boolean expression.
10911097
/// For example: the following statements are equivalent for such a dialect:

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ impl Dialect for MySqlDialect {
8888
true
8989
}
9090

91+
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
92+
fn supports_multiline_comment_hints(&self) -> bool {
93+
true
94+
}
95+
9196
fn parse_infix(
9297
&self,
9398
parser: &mut crate::parser::Parser,

src/tokenizer.rs

Lines changed: 137 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -945,13 +945,68 @@ impl<'a> Tokenizer<'a> {
945945
while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
946946
let span = location.span_to(state.location());
947947

948-
buf.push(TokenWithSpan { token, span });
948+
// Check if this is a multiline comment hint that should be expanded
949+
match &token {
950+
Token::Whitespace(Whitespace::MultiLineComment(comment))
951+
if self.dialect.supports_multiline_comment_hints()
952+
&& comment.starts_with('!') =>
953+
{
954+
// Re-tokenize the hints and add them to the buffer
955+
self.tokenize_comment_hints(comment, span, buf)?;
956+
}
957+
_ => {
958+
buf.push(TokenWithSpan { token, span });
959+
}
960+
}
949961

950962
location = state.location();
951963
}
952964
Ok(())
953965
}
954966

967+
/// Re-tokenize optimizer hints from a multiline comment and add them to the buffer.
968+
/// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024`
969+
fn tokenize_comment_hints(
970+
&self,
971+
comment: &str,
972+
span: Span,
973+
buf: &mut Vec<TokenWithSpan>,
974+
) -> Result<(), TokenizerError> {
975+
// Strip the leading '!' and any version digits (e.g., "50110")
976+
let hint_content = comment
977+
.strip_prefix('!')
978+
.unwrap_or(comment)
979+
.trim_start_matches(|c: char| c.is_ascii_digit());
980+
981+
// If there's no content after stripping, nothing to tokenize
982+
if hint_content.is_empty() {
983+
return Ok(());
984+
}
985+
986+
// Create a new tokenizer for the hint content
987+
let inner = Tokenizer::new(self.dialect, hint_content).with_unescape(self.unescape);
988+
989+
// Create a state for tracking position within the hint
990+
let mut state = State {
991+
peekable: hint_content.chars().peekable(),
992+
line: span.start.line,
993+
col: span.start.column,
994+
};
995+
996+
// Tokenize the hint content and add tokens to the buffer
997+
let mut location = state.location();
998+
while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? {
999+
let token_span = location.span_to(state.location());
1000+
buf.push(TokenWithSpan {
1001+
token,
1002+
span: token_span,
1003+
});
1004+
location = state.location();
1005+
}
1006+
1007+
Ok(())
1008+
}
1009+
9551010
// Tokenize the identifier or keywords in `ch`
9561011
fn tokenize_identifier_or_keyword(
9571012
&self,
@@ -2233,7 +2288,6 @@ impl<'a> Tokenizer<'a> {
22332288
let mut s = String::new();
22342289
let mut nested = 1;
22352290
let supports_nested_comments = self.dialect.supports_nested_comments();
2236-
22372291
loop {
22382292
match chars.next() {
22392293
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
@@ -4218,6 +4272,86 @@ mod tests {
42184272
Token::Whitespace(Whitespace::Space),
42194273
Token::make_word("y", None),
42204274
],
4221-
)
4275+
);
4276+
}
4277+
4278+
#[test]
4279+
fn tokenize_multiline_comment_with_comment_hint() {
4280+
let sql = String::from("0/*! word */1");
4281+
4282+
let dialect = MySqlDialect {};
4283+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4284+
let expected = vec![
4285+
Token::Number("0".to_string(), false),
4286+
Token::Whitespace(Whitespace::Space),
4287+
Token::Word(Word {
4288+
value: "word".to_string(),
4289+
quote_style: None,
4290+
keyword: Keyword::NoKeyword,
4291+
}),
4292+
Token::Whitespace(Whitespace::Space),
4293+
Token::Number("1".to_string(), false),
4294+
];
4295+
compare(expected, tokens);
4296+
}
4297+
4298+
#[test]
4299+
fn tokenize_multiline_comment_with_comment_hint_and_version() {
4300+
let sql_multi = String::from("0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1");
4301+
let dialect = MySqlDialect {};
4302+
let tokens = Tokenizer::new(&dialect, &sql_multi).tokenize().unwrap();
4303+
let expected = vec![
4304+
Token::Number("0".to_string(), false),
4305+
Token::Whitespace(Whitespace::Space),
4306+
Token::Whitespace(Whitespace::Space),
4307+
Token::Word(Word {
4308+
value: "KEY_BLOCK_SIZE".to_string(),
4309+
quote_style: None,
4310+
keyword: Keyword::KEY_BLOCK_SIZE,
4311+
}),
4312+
Token::Whitespace(Whitespace::Space),
4313+
Token::Eq,
4314+
Token::Whitespace(Whitespace::Space),
4315+
Token::Number("1024".to_string(), false),
4316+
Token::Whitespace(Whitespace::Space),
4317+
Token::Number("1".to_string(), false),
4318+
];
4319+
compare(expected, tokens);
4320+
4321+
let tokens = Tokenizer::new(&dialect, "0 /*!50110 */ 1").tokenize().unwrap();
4322+
compare(
4323+
vec![
4324+
Token::Number("0".to_string(), false),
4325+
Token::Whitespace(Whitespace::Space),
4326+
Token::Whitespace(Whitespace::Space),
4327+
Token::Whitespace(Whitespace::Space),
4328+
Token::Number("1".to_string(), false),
4329+
],
4330+
tokens,
4331+
);
4332+
4333+
let tokens = Tokenizer::new(&dialect, "0 /*!*/ 1").tokenize().unwrap();
4334+
compare(
4335+
vec![
4336+
Token::Number("0".to_string(), false),
4337+
Token::Whitespace(Whitespace::Space),
4338+
Token::Whitespace(Whitespace::Space),
4339+
Token::Number("1".to_string(), false),
4340+
],
4341+
tokens,
4342+
);
4343+
let tokens = Tokenizer::new(&dialect, "0 /*! */ 1").tokenize().unwrap();
4344+
compare(
4345+
vec![
4346+
Token::Number("0".to_string(), false),
4347+
Token::Whitespace(Whitespace::Space),
4348+
Token::Whitespace(Whitespace::Space),
4349+
Token::Whitespace(Whitespace::Space),
4350+
Token::Whitespace(Whitespace::Space),
4351+
Token::Whitespace(Whitespace::Space),
4352+
Token::Number("1".to_string(), false),
4353+
],
4354+
tokens,
4355+
);
42224356
}
42234357
}

0 commit comments

Comments
 (0)