Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/dialect/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@ impl Dialect for MySqlDialect {
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
// Identifiers which begin with a digit are recognized while tokenizing numbers,
// so they can be distinguished from exponent numeric literals.
// MySQL also implements non ascii utf-8 charecters
ch.is_alphabetic()
|| ch == '_'
|| ch == '$'
|| ch == '@'
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
|| !ch.is_ascii()
}

fn is_identifier_part(&self, ch: char) -> bool {
Expand Down
7 changes: 3 additions & 4 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,9 @@ impl Dialect for PostgreSqlDialect {
}

fn is_identifier_start(&self, ch: char) -> bool {
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
// We don't yet support identifiers beginning with "letters with
// diacritical marks"
ch.is_alphabetic() || ch == '_'
ch.is_alphabetic() || ch == '_' ||
// PostgreSQL implements Unicode characters in identifiers.
!ch.is_ascii()
}

fn is_identifier_part(&self, ch: char) -> bool {
Expand Down
4 changes: 2 additions & 2 deletions src/dialect/redshift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ impl Dialect for RedshiftSqlDialect {
}

fn is_identifier_start(&self, ch: char) -> bool {
// Extends Postgres dialect with sharp and UTF-8 multibyte chars
// UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect.
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii()
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
}

fn is_identifier_part(&self, ch: char) -> bool {
Expand Down
13 changes: 11 additions & 2 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11151,9 +11151,7 @@ fn parse_non_latin_identifiers() {
let supported_dialects = TestedDialects::new(vec![
Box::new(GenericDialect {}),
Box::new(DuckDbDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(MySqlDialect {}),
]);
assert!(supported_dialects
.parse_sql_statements("SELECT 💝 FROM table1")
Expand Down Expand Up @@ -16147,3 +16145,14 @@ fn test_identifier_unicode_support() {
]);
let _ = dialects.verified_stmt(sql);
}

#[test]
fn test_identifier_unicode_start() {
let sql = r#"SELECT 💝phone AS 💝 FROM customers"#;
let dialects = TestedDialects::new(vec![
Box::new(MySqlDialect {}),
Box::new(RedshiftSqlDialect {}),
Box::new(PostgreSqlDialect {}),
]);
let _ = dialects.verified_stmt(sql);
}