Skip to content

Commit 77c58ad

Browse files
Add support for C-style comments
This commit adds support for C-style comments supported by MySQL. It parses and consumes the optional version number after the `!` character.
1 parent f642dd5 commit 77c58ad

4 files changed

Lines changed: 76 additions & 1 deletion

File tree

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ impl Dialect for GenericDialect {
156156
true
157157
}
158158

159+
fn supports_c_style_comments(&self) -> bool {
160+
true
161+
}
162+
159163
fn supports_user_host_grantee(&self) -> bool {
160164
true
161165
}

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,11 @@ pub trait Dialect: Debug + Any {
898898
false
899899
}
900900

901+
/// Returns true if the dialect supports hint and C-style comments
902+
fn supports_c_style_comments(&self) -> bool {
903+
false
904+
}
905+
901906
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
902907
/// as an alias assignment operator, rather than a boolean expression.
903908
/// For example: the following statements are equivalent for such a dialect:

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ impl Dialect for MySqlDialect {
8484
true
8585
}
8686

87+
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
88+
fn supports_c_style_comments(&self) -> bool {
89+
true
90+
}
91+
8792
fn parse_infix(
8893
&self,
8994
parser: &mut crate::parser::Parser,

src/tokenizer.rs

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> {
21072107
) -> Result<Option<Token>, TokenizerError> {
21082108
let mut s = String::new();
21092109
let mut nested = 1;
2110+
let mut c_style_comments = false;
21102111
let supports_nested_comments = self.dialect.supports_nested_comments();
2111-
2112+
let supports_c_style_comments = self.dialect.supports_c_style_comments();
21122113
loop {
21132114
match chars.next() {
21142115
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
@@ -2117,10 +2118,35 @@ impl<'a> Tokenizer<'a> {
21172118
s.push('*');
21182119
nested += 1;
21192120
}
2121+
Some('!') if supports_c_style_comments => {
2122+
c_style_comments = true;
2123+
loop {
2124+
match chars.peek() {
2125+
Some('0')
2126+
| Some('1')
2127+
| Some('2')
2128+
| Some('3')
2129+
| Some('4')
2130+
| Some('5')
2131+
| Some('6')
2132+
| Some('7')
2133+
| Some('8')
2134+
| Some('9') => {
2135+
chars.next(); // consume the digit
2136+
}
2137+
_ => {
2138+
break;
2139+
}
2140+
}
2141+
}
2142+
}
21202143
Some('*') if matches!(chars.peek(), Some('/')) => {
21212144
chars.next(); // consume the '/'
21222145
nested -= 1;
21232146
if nested == 0 {
2147+
if c_style_comments {
2148+
break Ok(Some(Token::make_word(&s, None)));
2149+
}
21242150
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
21252151
}
21262152
s.push('*');
@@ -4070,4 +4096,39 @@ mod tests {
40704096
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
40714097
}
40724098
}
4099+
#[test]
4100+
fn tokenize_multiline_comment_with_c_style_comment() {
4101+
let sql = String::from("0/*!word*/1");
4102+
4103+
let dialect = MySqlDialect {};
4104+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4105+
let expected = vec![
4106+
Token::Number("0".to_string(), false),
4107+
Token::Word(Word {
4108+
value: "word".to_string(),
4109+
quote_style: None,
4110+
keyword: Keyword::NoKeyword,
4111+
}),
4112+
Token::Number("1".to_string(), false),
4113+
];
4114+
compare(expected, tokens);
4115+
}
4116+
4117+
#[test]
4118+
fn tokenize_multiline_comment_with_c_style_comment_and_version() {
4119+
let sql = String::from("0/*!8000000word*/1");
4120+
4121+
let dialect = MySqlDialect {};
4122+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4123+
let expected = vec![
4124+
Token::Number("0".to_string(), false),
4125+
Token::Word(Word {
4126+
value: "word".to_string(),
4127+
quote_style: None,
4128+
keyword: Keyword::NoKeyword,
4129+
}),
4130+
Token::Number("1".to_string(), false),
4131+
];
4132+
compare(expected, tokens);
4133+
}
40734134
}

0 commit comments

Comments
 (0)