Skip to content

Commit de2b2fa

Browse files
committed
MySQL: Support CAST(... AS ... ARRAY) syntax
MySQL has a special case in `CAST` parsing where the type can be followed by `ARRAY`. This is only used for creating multi-valued indexes in InnoDB, so is only allowed in `CREATE TABLE` and other DDL statements when specifying keys. See the [docs]. Given those restrictions, we could be significantly more restrictive in parsing this, e.g. not parsing `ARRAY` unless we are in a key specification. Or, if there was such a thing as a suffix array type definition, we could parse it as a type. But as far as I know, that doesn't exist in any supported SQL dialects, and encountering `ARRAY` here is unambiguous. So it seemed simplest to be permissive and always parse it. The only downside I can see is we are now adding a field to `Expr::Cast`. [docs]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html#function_cast
1 parent 4de1ac9 commit de2b2fa

9 files changed

Lines changed: 78 additions & 9 deletions

File tree

src/ast/mod.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,12 @@ pub enum Expr {
951951
kind: CastKind,
952952
expr: Box<Expr>,
953953
data_type: DataType,
954+
/// [MySQL] allows CAST(... AS type ARRAY) in functional index definitions for InnoDB
955+
/// multi-valued indices. It's not really a datatype, and is only allowed in `CAST` in key
956+
/// specifications, so it's a flag here.
957+
///
958+
/// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html#function_cast
959+
array: bool,
954960
/// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by [BigQuery]
955961
///
956962
/// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
@@ -1724,14 +1730,18 @@ impl fmt::Display for Expr {
17241730
kind,
17251731
expr,
17261732
data_type,
1733+
array,
17271734
format,
17281735
} => match kind {
17291736
CastKind::Cast => {
1737+
write!(f, "CAST({expr} AS {data_type}")?;
1738+
if *array {
1739+
write!(f, " ARRAY")?;
1740+
}
17301741
if let Some(format) = format {
1731-
write!(f, "CAST({expr} AS {data_type} FORMAT {format})")
1732-
} else {
1733-
write!(f, "CAST({expr} AS {data_type})")
1742+
write!(f, " FORMAT {format}")?;
17341743
}
1744+
write!(f, ")")
17351745
}
17361746
CastKind::TryCast => {
17371747
if let Some(format) = format {

src/ast/spans.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1539,6 +1539,7 @@ impl Spanned for Expr {
15391539
kind: _,
15401540
expr,
15411541
data_type: _,
1542+
array: _,
15421543
format: _,
15431544
} => expr.span(),
15441545
Expr::AtTimeZone {
@@ -2800,7 +2801,7 @@ WHERE id = 1
28002801
UPDATE SET target_table.description = source_table.description
28012802
28022803
WHEN MATCHED AND target_table.x != 'X' THEN DELETE
2803-
WHEN NOT MATCHED AND 1 THEN INSERT (product, quantity) ROW
2804+
WHEN NOT MATCHED AND 1 THEN INSERT (product, quantity) ROW
28042805
"#;
28052806

28062807
let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap();

src/parser/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2539,12 +2539,14 @@ impl<'a> Parser<'a> {
25392539
let expr = self.parse_expr()?;
25402540
self.expect_keyword_is(Keyword::AS)?;
25412541
let data_type = self.parse_data_type()?;
2542+
let array = self.parse_keyword(Keyword::ARRAY);
25422543
let format = self.parse_optional_cast_format()?;
25432544
self.expect_token(&Token::RParen)?;
25442545
Ok(Expr::Cast {
25452546
kind,
25462547
expr: Box::new(expr),
25472548
data_type,
2549+
array,
25482550
format,
25492551
})
25502552
}
@@ -3803,6 +3805,7 @@ impl<'a> Parser<'a> {
38033805
kind: CastKind::DoubleColon,
38043806
expr: Box::new(expr),
38053807
data_type: self.parse_data_type()?,
3808+
array: false,
38063809
format: None,
38073810
})
38083811
} else if Token::ExclamationMark == *tok && self.dialect.supports_factorial_operator() {
@@ -4041,6 +4044,7 @@ impl<'a> Parser<'a> {
40414044
kind: CastKind::DoubleColon,
40424045
expr: Box::new(expr),
40434046
data_type: self.parse_data_type()?,
4047+
array: false,
40444048
format: None,
40454049
})
40464050
}

tests/sqlparser_common.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3004,6 +3004,7 @@ fn parse_cast() {
30043004
kind: CastKind::Cast,
30053005
expr: Box::new(Expr::Identifier(Ident::new("id"))),
30063006
data_type: DataType::BigInt(None),
3007+
array: false,
30073008
format: None,
30083009
},
30093010
expr_from_projection(only(&select.projection))
@@ -3016,6 +3017,7 @@ fn parse_cast() {
30163017
kind: CastKind::Cast,
30173018
expr: Box::new(Expr::Identifier(Ident::new("id"))),
30183019
data_type: DataType::TinyInt(None),
3020+
array: false,
30193021
format: None,
30203022
},
30213023
expr_from_projection(only(&select.projection))
@@ -3047,6 +3049,7 @@ fn parse_cast() {
30473049
length: 50,
30483050
unit: None,
30493051
})),
3052+
array: false,
30503053
format: None,
30513054
},
30523055
expr_from_projection(only(&select.projection))
@@ -3059,6 +3062,7 @@ fn parse_cast() {
30593062
kind: CastKind::Cast,
30603063
expr: Box::new(Expr::Identifier(Ident::new("id"))),
30613064
data_type: DataType::Clob(None),
3065+
array: false,
30623066
format: None,
30633067
},
30643068
expr_from_projection(only(&select.projection))
@@ -3071,6 +3075,7 @@ fn parse_cast() {
30713075
kind: CastKind::Cast,
30723076
expr: Box::new(Expr::Identifier(Ident::new("id"))),
30733077
data_type: DataType::Clob(Some(50)),
3078+
array: false,
30743079
format: None,
30753080
},
30763081
expr_from_projection(only(&select.projection))
@@ -3083,6 +3088,7 @@ fn parse_cast() {
30833088
kind: CastKind::Cast,
30843089
expr: Box::new(Expr::Identifier(Ident::new("id"))),
30853090
data_type: DataType::Binary(Some(50)),
3091+
array: false,
30863092
format: None,
30873093
},
30883094
expr_from_projection(only(&select.projection))
@@ -3095,6 +3101,7 @@ fn parse_cast() {
30953101
kind: CastKind::Cast,
30963102
expr: Box::new(Expr::Identifier(Ident::new("id"))),
30973103
data_type: DataType::Varbinary(Some(BinaryLength::IntegerLength { length: 50 })),
3104+
array: false,
30983105
format: None,
30993106
},
31003107
expr_from_projection(only(&select.projection))
@@ -3107,6 +3114,7 @@ fn parse_cast() {
31073114
kind: CastKind::Cast,
31083115
expr: Box::new(Expr::Identifier(Ident::new("id"))),
31093116
data_type: DataType::Blob(None),
3117+
array: false,
31103118
format: None,
31113119
},
31123120
expr_from_projection(only(&select.projection))
@@ -3119,6 +3127,7 @@ fn parse_cast() {
31193127
kind: CastKind::Cast,
31203128
expr: Box::new(Expr::Identifier(Ident::new("id"))),
31213129
data_type: DataType::Blob(Some(50)),
3130+
array: false,
31223131
format: None,
31233132
},
31243133
expr_from_projection(only(&select.projection))
@@ -3131,6 +3140,7 @@ fn parse_cast() {
31313140
kind: CastKind::Cast,
31323141
expr: Box::new(Expr::Identifier(Ident::new("details"))),
31333142
data_type: DataType::JSONB,
3143+
array: false,
31343144
format: None,
31353145
},
31363146
expr_from_projection(only(&select.projection))
@@ -3146,6 +3156,7 @@ fn parse_try_cast() {
31463156
kind: CastKind::TryCast,
31473157
expr: Box::new(Expr::Identifier(Ident::new("id"))),
31483158
data_type: DataType::BigInt(None),
3159+
array: false,
31493160
format: None,
31503161
},
31513162
expr_from_projection(only(&select.projection))
@@ -6446,6 +6457,7 @@ fn interval_disallow_interval_expr_double_colon() {
64466457
fractional_seconds_precision: None,
64476458
})),
64486459
data_type: DataType::Text,
6460+
array: false,
64496461
format: None,
64506462
}
64516463
)
@@ -9161,6 +9173,7 @@ fn parse_double_colon_cast_at_timezone() {
91619173
.with_empty_span()
91629174
)),
91639175
data_type: DataType::Timestamp(None, TimezoneInfo::None),
9176+
array: false,
91649177
format: None
91659178
}),
91669179
time_zone: Box::new(Expr::Value(
@@ -13293,6 +13306,7 @@ fn test_dictionary_syntax() {
1329313306
(Value::SingleQuotedString("2023-04-01".to_owned())).with_empty_span(),
1329413307
)),
1329513308
data_type: DataType::Timestamp(None, TimezoneInfo::None),
13309+
array: false,
1329613310
format: None,
1329713311
}),
1329813312
},
@@ -13304,6 +13318,7 @@ fn test_dictionary_syntax() {
1330413318
(Value::SingleQuotedString("2023-04-05".to_owned())).with_empty_span(),
1330513319
)),
1330613320
data_type: DataType::Timestamp(None, TimezoneInfo::None),
13321+
array: false,
1330713322
format: None,
1330813323
}),
1330913324
},
@@ -13547,6 +13562,7 @@ fn test_extract_seconds_ok() {
1354713562
fields: None,
1354813563
precision: None
1354913564
},
13565+
array: false,
1355013566
format: None,
1355113567
}),
1355213568
}
@@ -13575,6 +13591,7 @@ fn test_extract_seconds_ok() {
1357513591
fields: None,
1357613592
precision: None,
1357713593
},
13594+
array: false,
1357813595
format: None,
1357913596
}),
1358013597
})],
@@ -13632,6 +13649,7 @@ fn test_extract_seconds_single_quote_ok() {
1363213649
fields: None,
1363313650
precision: None
1363413651
},
13652+
array: false,
1363513653
format: None,
1363613654
}),
1363713655
}

tests/sqlparser_databricks.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ fn data_type_timestamp_ntz() {
347347
"created_at".into()
348348
)))),
349349
data_type: DataType::TimestampNtz(None),
350+
array: false,
350351
format: None
351352
}
352353
);

tests/sqlparser_duckdb.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ fn test_duckdb_specific_int_types() {
380380
Value::Number("123".parse().unwrap(), false).with_empty_span()
381381
)),
382382
data_type: data_type.clone(),
383+
array: false,
383384
format: None,
384385
},
385386
expr_from_projection(&select.projection[0])

tests/sqlparser_mysql.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,25 @@ fn test_functional_key_part() {
874874
)),
875875
}),
876876
data_type: DataType::Unsigned,
877+
array: false,
878+
format: None,
879+
})),
880+
);
881+
assert_eq!(
882+
index_column(mysql_and_generic().verified_stmt(
883+
r#"CREATE TABLE t (jsoncol JSON, PRIMARY KEY ((CAST(col ->> '$.fields' AS UNSIGNED ARRAY)) ASC))"#
884+
)),
885+
Expr::Nested(Box::new(Expr::Cast {
886+
kind: CastKind::Cast,
887+
expr: Box::new(Expr::BinaryOp {
888+
left: Box::new(Expr::Identifier(Ident::new("col"))),
889+
op: BinaryOperator::LongArrow,
890+
right: Box::new(Expr::Value(
891+
Value::SingleQuotedString("$.fields".to_string()).with_empty_span()
892+
)),
893+
}),
894+
data_type: DataType::Unsigned,
895+
array: true,
877896
format: None,
878897
})),
879898
);
@@ -4096,6 +4115,14 @@ fn parse_cast_integers() {
40964115
.expect_err("CAST doesn't allow display width");
40974116
}
40984117

4118+
#[test]
4119+
fn parse_cast_array() {
4120+
mysql().verified_expr("CAST(foo AS SIGNED ARRAY)");
4121+
mysql()
4122+
.run_parser_method("CAST(foo AS ARRAY)", |p| p.parse_expr())
4123+
.expect_err("ARRAY alone is not a type");
4124+
}
4125+
40994126
#[test]
41004127
fn parse_match_against_with_alias() {
41014128
let sql = "SELECT tbl.ProjectID FROM surveys.tbl1 AS tbl WHERE MATCH (tbl.ReferenceID) AGAINST ('AAA' IN BOOLEAN MODE)";

tests/sqlparser_postgres.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1706,6 +1706,7 @@ fn parse_execute() {
17061706
(Value::Number("1337".parse().unwrap(), false)).with_empty_span()
17071707
)),
17081708
data_type: DataType::SmallInt(None),
1709+
array: false,
17091710
format: None
17101711
},
17111712
alias: None
@@ -1717,6 +1718,7 @@ fn parse_execute() {
17171718
(Value::Number("7331".parse().unwrap(), false)).with_empty_span()
17181719
)),
17191720
data_type: DataType::SmallInt(None),
1721+
array: false,
17201722
format: None
17211723
},
17221724
alias: None
@@ -2343,6 +2345,7 @@ fn parse_array_index_expr() {
23432345
))),
23442346
None
23452347
)),
2348+
array: false,
23462349
format: None,
23472350
}))),
23482351
access_chain: vec![
@@ -5570,6 +5573,7 @@ fn parse_at_time_zone() {
55705573
Value::SingleQuotedString("America/Los_Angeles".to_owned()).with_empty_span(),
55715574
)),
55725575
data_type: DataType::Text,
5576+
array: false,
55735577
format: None,
55745578
}),
55755579
}),
@@ -6386,6 +6390,7 @@ fn arrow_cast_precedence() {
63866390
(Value::SingleQuotedString("bar".to_string())).with_empty_span()
63876391
)),
63886392
data_type: DataType::Text,
6393+
array: false,
63896394
format: None,
63906395
}),
63916396
}

tests/sqlparser_snowflake.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,8 +1101,8 @@ fn parse_create_dynamic_table() {
11011101
" EXTERNAL_VOLUME='my_external_volume'",
11021102
" CATALOG='SNOWFLAKE'",
11031103
" BASE_LOCATION='my_iceberg_table'",
1104-
" TARGET_LAG='20 minutes'",
1105-
" WAREHOUSE=mywh",
1104+
" TARGET_LAG='20 minutes'",
1105+
" WAREHOUSE=mywh",
11061106
" AS SELECT product_id, product_name FROM staging_table"
11071107
));
11081108

@@ -1250,6 +1250,7 @@ fn parse_array() {
12501250
kind: CastKind::Cast,
12511251
expr: Box::new(Expr::Identifier(Ident::new("a"))),
12521252
data_type: DataType::Array(ArrayElemTypeDef::None),
1253+
array: false,
12531254
format: None,
12541255
},
12551256
expr_from_projection(only(&select.projection))
@@ -1460,8 +1461,6 @@ fn parse_semi_structured_data_traversal() {
14601461
Expr::JsonAccess {
14611462
value: Box::new(Expr::Cast {
14621463
kind: CastKind::DoubleColon,
1463-
data_type: DataType::Array(ArrayElemTypeDef::None),
1464-
format: None,
14651464
expr: Box::new(Expr::JsonAccess {
14661465
value: Box::new(Expr::Identifier(Ident::new("a"))),
14671466
path: JsonPath {
@@ -1470,7 +1469,10 @@ fn parse_semi_structured_data_traversal() {
14701469
quoted: false
14711470
}]
14721471
}
1473-
})
1472+
}),
1473+
data_type: DataType::Array(ArrayElemTypeDef::None),
1474+
array: false,
1475+
format: None,
14741476
}),
14751477
path: JsonPath {
14761478
path: vec![JsonPathElem::Bracket {

0 commit comments

Comments
 (0)