Skip to content

Commit c002910

Browse files
committed
MySQL: Add support for DEFAULT CHARACTER SET in CREATE DATABASE
Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE options in CREATE DATABASE statements. This adds two new fields to CreateDatabase: default_charset and default_collation. Supports the following syntax variants: - DEFAULT CHARACTER SET [=] charset_name - CHARACTER SET [=] charset_name - DEFAULT CHARSET [=] charset_name - CHARSET [=] charset_name - DEFAULT COLLATE [=] collation_name - COLLATE [=] collation_name
1 parent 3ac5670 commit c002910

4 files changed

Lines changed: 165 additions & 0 deletions

File tree

src/ast/helpers/stmt_create_database.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ pub struct CreateDatabaseBuilder {
8585
pub storage_serialization_policy: Option<StorageSerializationPolicy>,
8686
/// Optional comment attached to the database.
8787
pub comment: Option<String>,
88+
/// Optional default character set (MySQL).
89+
///
90+
/// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
91+
pub default_charset: Option<String>,
92+
/// Optional default collation (MySQL).
93+
///
94+
/// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
95+
pub default_collation: Option<String>,
8896
/// Optional catalog sync configuration.
8997
pub catalog_sync: Option<String>,
9098
/// Optional catalog sync namespace mode.
@@ -120,6 +128,8 @@ impl CreateDatabaseBuilder {
120128
default_ddl_collation: None,
121129
storage_serialization_policy: None,
122130
comment: None,
131+
default_charset: None,
132+
default_collation: None,
123133
catalog_sync: None,
124134
catalog_sync_namespace_mode: None,
125135
catalog_sync_namespace_flatten_delimiter: None,
@@ -218,6 +228,18 @@ impl CreateDatabaseBuilder {
218228
self
219229
}
220230

231+
/// Set the default character set for the database.
232+
pub fn default_charset(mut self, default_charset: Option<String>) -> Self {
233+
self.default_charset = default_charset;
234+
self
235+
}
236+
237+
/// Set the default collation for the database.
238+
pub fn default_collation(mut self, default_collation: Option<String>) -> Self {
239+
self.default_collation = default_collation;
240+
self
241+
}
242+
221243
/// Set the catalog sync for the database.
222244
pub fn catalog_sync(mut self, catalog_sync: Option<String>) -> Self {
223245
self.catalog_sync = catalog_sync;
@@ -272,6 +294,8 @@ impl CreateDatabaseBuilder {
272294
default_ddl_collation: self.default_ddl_collation,
273295
storage_serialization_policy: self.storage_serialization_policy,
274296
comment: self.comment,
297+
default_charset: self.default_charset,
298+
default_collation: self.default_collation,
275299
catalog_sync: self.catalog_sync,
276300
catalog_sync_namespace_mode: self.catalog_sync_namespace_mode,
277301
catalog_sync_namespace_flatten_delimiter: self.catalog_sync_namespace_flatten_delimiter,
@@ -302,6 +326,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
302326
default_ddl_collation,
303327
storage_serialization_policy,
304328
comment,
329+
default_charset,
330+
default_collation,
305331
catalog_sync,
306332
catalog_sync_namespace_mode,
307333
catalog_sync_namespace_flatten_delimiter,
@@ -323,6 +349,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
323349
default_ddl_collation,
324350
storage_serialization_policy,
325351
comment,
352+
default_charset,
353+
default_collation,
326354
catalog_sync,
327355
catalog_sync_namespace_mode,
328356
catalog_sync_namespace_flatten_delimiter,

src/ast/mod.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4226,6 +4226,10 @@ pub enum Statement {
42264226
storage_serialization_policy: Option<StorageSerializationPolicy>,
42274227
/// Optional comment.
42284228
comment: Option<String>,
4229+
/// Optional default character set (MySQL).
4230+
default_charset: Option<String>,
4231+
/// Optional default collation (MySQL).
4232+
default_collation: Option<String>,
42294233
/// Optional catalog sync identifier.
42304234
catalog_sync: Option<String>,
42314235
/// Catalog sync namespace mode.
@@ -5080,6 +5084,8 @@ impl fmt::Display for Statement {
50805084
default_ddl_collation,
50815085
storage_serialization_policy,
50825086
comment,
5087+
default_charset,
5088+
default_collation,
50835089
catalog_sync,
50845090
catalog_sync_namespace_mode,
50855091
catalog_sync_namespace_flatten_delimiter,
@@ -5139,6 +5145,14 @@ impl fmt::Display for Statement {
51395145
write!(f, " COMMENT = '{comment}'")?;
51405146
}
51415147

5148+
if let Some(charset) = default_charset {
5149+
write!(f, " DEFAULT CHARACTER SET {charset}")?;
5150+
}
5151+
5152+
if let Some(collation) = default_collation {
5153+
write!(f, " DEFAULT COLLATE {collation}")?;
5154+
}
5155+
51425156
if let Some(sync) = catalog_sync {
51435157
write!(f, " CATALOG_SYNC = '{sync}'")?;
51445158
}

src/parser/mod.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5330,6 +5330,34 @@ impl<'a> Parser<'a> {
53305330
None
53315331
};
53325332

5333+
// Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE options
5334+
//
5335+
// Note: The docs only mention `CHARACTER SET`, but `CHARSET` is also supported.
5336+
// Furthermore, MySQL will only accept one character set, raising an error if there is more
5337+
// than one, but will accept multiple collations and use the last one.
5338+
//
5339+
// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
5340+
let mut default_charset = None;
5341+
let mut default_collation = None;
5342+
loop {
5343+
let has_default = self.parse_keyword(Keyword::DEFAULT);
5344+
if default_charset.is_none() && self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET])
5345+
|| self.parse_keyword(Keyword::CHARSET)
5346+
{
5347+
let _ = self.consume_token(&Token::Eq);
5348+
default_charset = Some(self.parse_identifier()?.value);
5349+
} else if self.parse_keyword(Keyword::COLLATE) {
5350+
let _ = self.consume_token(&Token::Eq);
5351+
default_collation = Some(self.parse_identifier()?.value);
5352+
} else if has_default {
5353+
// DEFAULT keyword not followed by CHARACTER SET, CHARSET, or COLLATE
5354+
self.prev_token();
5355+
break;
5356+
} else {
5357+
break;
5358+
}
5359+
}
5360+
53335361
Ok(Statement::CreateDatabase {
53345362
db_name,
53355363
if_not_exists: ine,
@@ -5346,6 +5374,8 @@ impl<'a> Parser<'a> {
53465374
default_ddl_collation: None,
53475375
storage_serialization_policy: None,
53485376
comment: None,
5377+
default_charset,
5378+
default_collation,
53495379
catalog_sync: None,
53505380
catalog_sync_namespace_mode: None,
53515381
catalog_sync_namespace_flatten_delimiter: None,

tests/sqlparser_mysql.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4432,3 +4432,96 @@ fn test_optimizer_hints() {
44324432
DELETE /*+ foobar */ FROM table_name",
44334433
);
44344434
}
4435+
4436+
#[test]
4437+
fn parse_create_database_with_charset() {
4438+
// Test DEFAULT CHARACTER SET with = sign
4439+
mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4");
4440+
4441+
// Test DEFAULT CHARACTER SET without = sign (normalized form)
4442+
mysql_and_generic().one_statement_parses_to(
4443+
"CREATE DATABASE mydb DEFAULT CHARACTER SET = utf8mb4",
4444+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4445+
);
4446+
4447+
// Test CHARACTER SET without DEFAULT
4448+
mysql_and_generic().one_statement_parses_to(
4449+
"CREATE DATABASE mydb CHARACTER SET utf8mb4",
4450+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4451+
);
4452+
4453+
// Test CHARSET shorthand
4454+
mysql_and_generic().one_statement_parses_to(
4455+
"CREATE DATABASE mydb CHARSET utf8mb4",
4456+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4457+
);
4458+
4459+
// Test DEFAULT CHARSET shorthand
4460+
mysql_and_generic().one_statement_parses_to(
4461+
"CREATE DATABASE mydb DEFAULT CHARSET utf8mb4",
4462+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
4463+
);
4464+
4465+
// Test DEFAULT COLLATE
4466+
mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci");
4467+
4468+
// Test COLLATE without DEFAULT
4469+
mysql_and_generic().one_statement_parses_to(
4470+
"CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci",
4471+
"CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci",
4472+
);
4473+
4474+
// Test both CHARACTER SET and COLLATE together
4475+
mysql_and_generic().verified_stmt(
4476+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4477+
);
4478+
4479+
// Test IF NOT EXISTS with CHARACTER SET
4480+
mysql_and_generic()
4481+
.verified_stmt("CREATE DATABASE IF NOT EXISTS mydb DEFAULT CHARACTER SET utf16");
4482+
4483+
// Test the exact syntax from the issue
4484+
mysql_and_generic().one_statement_parses_to(
4485+
"CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET = utf16",
4486+
"CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET utf16",
4487+
);
4488+
}
4489+
4490+
#[test]
4491+
fn parse_create_database_with_charset_errors() {
4492+
// Missing charset name after CHARACTER SET
4493+
assert!(mysql_and_generic()
4494+
.parse_sql_statements("CREATE DATABASE mydb DEFAULT CHARACTER SET")
4495+
.is_err());
4496+
4497+
// Missing charset name after CHARSET
4498+
assert!(mysql_and_generic()
4499+
.parse_sql_statements("CREATE DATABASE mydb CHARSET")
4500+
.is_err());
4501+
4502+
// Missing collation name after COLLATE
4503+
assert!(mysql_and_generic()
4504+
.parse_sql_statements("CREATE DATABASE mydb DEFAULT COLLATE")
4505+
.is_err());
4506+
4507+
// Equals sign but no value
4508+
assert!(mysql_and_generic()
4509+
.parse_sql_statements("CREATE DATABASE mydb CHARACTER SET =")
4510+
.is_err());
4511+
}
4512+
4513+
#[test]
4514+
fn parse_create_database_with_charset_option_ordering() {
4515+
// MySQL allows COLLATE before CHARACTER SET - output is normalized to CHARACTER SET first
4516+
// (matches MySQL's own SHOW CREATE DATABASE output order)
4517+
mysql_and_generic().one_statement_parses_to(
4518+
"CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci DEFAULT CHARACTER SET utf8mb4",
4519+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4520+
);
4521+
4522+
// COLLATE first without DEFAULT keywords
4523+
mysql_and_generic().one_statement_parses_to(
4524+
"CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci CHARACTER SET utf8mb4",
4525+
"CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci",
4526+
);
4527+
}

0 commit comments

Comments
 (0)