Skip to content

Commit eda86d8

Browse files
authored
Add support for arbitrary map access expr (apache#1179)
1 parent 127be97 commit eda86d8

5 files changed

Lines changed: 194 additions & 116 deletions

File tree

src/ast/mod.rs

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,40 @@ pub enum CastFormat {
374374
ValueAtTimeZone(Value, Value),
375375
}
376376

377+
/// Represents the syntax/style used in a map access.
378+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
379+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
380+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
381+
pub enum MapAccessSyntax {
382+
/// Access using bracket notation. `mymap[mykey]`
383+
Bracket,
384+
/// Access using period notation. `mymap.mykey`
385+
Period,
386+
}
387+
388+
/// Expression used to access a value in a nested structure.
389+
///
390+
/// Example: `SAFE_OFFSET(0)` in
391+
/// ```sql
392+
/// SELECT mymap[SAFE_OFFSET(0)];
393+
/// ```
394+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
395+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
396+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
397+
pub struct MapAccessKey {
398+
pub key: Expr,
399+
pub syntax: MapAccessSyntax,
400+
}
401+
402+
impl fmt::Display for MapAccessKey {
403+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
404+
match self.syntax {
405+
MapAccessSyntax::Bracket => write!(f, "[{}]", self.key),
406+
MapAccessSyntax::Period => write!(f, ".{}", self.key),
407+
}
408+
}
409+
}
410+
377411
/// An SQL expression of any type.
378412
///
379413
/// The parser does not distinguish between expressions of different types
@@ -638,7 +672,7 @@ pub enum Expr {
638672
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
639673
MapAccess {
640674
column: Box<Expr>,
641-
keys: Vec<Expr>,
675+
keys: Vec<MapAccessKey>,
642676
},
643677
/// Scalar function call e.g. `LEFT(foo, 5)`
644678
Function(Function),
@@ -774,15 +808,7 @@ impl fmt::Display for Expr {
774808
match self {
775809
Expr::Identifier(s) => write!(f, "{s}"),
776810
Expr::MapAccess { column, keys } => {
777-
write!(f, "{column}")?;
778-
for k in keys {
779-
match k {
780-
k @ Expr::Value(Value::Number(_, _)) => write!(f, "[{k}]")?,
781-
Expr::Value(Value::SingleQuotedString(s)) => write!(f, "[\"{s}\"]")?,
782-
_ => write!(f, "[{k}]")?,
783-
}
784-
}
785-
Ok(())
811+
write!(f, "{column}{}", display_separated(keys, ""))
786812
}
787813
Expr::Wildcard => f.write_str("*"),
788814
Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix),

src/parser/mod.rs

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,23 +2608,43 @@ impl<'a> Parser<'a> {
26082608
}
26092609

26102610
pub fn parse_map_access(&mut self, expr: Expr) -> Result<Expr, ParserError> {
2611-
let key = self.parse_map_key()?;
2612-
let tok = self.consume_token(&Token::RBracket);
2613-
debug!("Tok: {}", tok);
2614-
let mut key_parts: Vec<Expr> = vec![key];
2615-
while self.consume_token(&Token::LBracket) {
2616-
let key = self.parse_map_key()?;
2617-
let tok = self.consume_token(&Token::RBracket);
2618-
debug!("Tok: {}", tok);
2619-
key_parts.push(key);
2620-
}
2621-
match expr {
2622-
e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess {
2623-
column: Box::new(e),
2624-
keys: key_parts,
2625-
}),
2626-
_ => Ok(expr),
2611+
let key = self.parse_expr()?;
2612+
self.expect_token(&Token::RBracket)?;
2613+
2614+
let mut keys = vec![MapAccessKey {
2615+
key,
2616+
syntax: MapAccessSyntax::Bracket,
2617+
}];
2618+
loop {
2619+
let key = match self.peek_token().token {
2620+
Token::LBracket => {
2621+
self.next_token(); // consume `[`
2622+
let key = self.parse_expr()?;
2623+
self.expect_token(&Token::RBracket)?;
2624+
MapAccessKey {
2625+
key,
2626+
syntax: MapAccessSyntax::Bracket,
2627+
}
2628+
}
2629+
// Access on BigQuery nested and repeated expressions can
2630+
// mix notations in the same expression.
2631+
// https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns
2632+
Token::Period if dialect_of!(self is BigQueryDialect) => {
2633+
self.next_token(); // consume `.`
2634+
MapAccessKey {
2635+
key: self.parse_expr()?,
2636+
syntax: MapAccessSyntax::Period,
2637+
}
2638+
}
2639+
_ => break,
2640+
};
2641+
keys.push(key);
26272642
}
2643+
2644+
Ok(Expr::MapAccess {
2645+
column: Box::new(expr),
2646+
keys,
2647+
})
26282648
}
26292649

26302650
/// Parses the parens following the `[ NOT ] IN` operator
@@ -6329,31 +6349,6 @@ impl<'a> Parser<'a> {
63296349
}
63306350
}
63316351

6332-
/// Parse a map key string
6333-
pub fn parse_map_key(&mut self) -> Result<Expr, ParserError> {
6334-
let next_token = self.next_token();
6335-
match next_token.token {
6336-
// handle bigquery offset subscript operator which overlaps with OFFSET operator
6337-
Token::Word(Word { value, keyword, .. })
6338-
if (dialect_of!(self is BigQueryDialect) && keyword == Keyword::OFFSET) =>
6339-
{
6340-
self.parse_function(ObjectName(vec![Ident::new(value)]))
6341-
}
6342-
Token::Word(Word { value, keyword, .. }) if (keyword == Keyword::NoKeyword) => {
6343-
if self.peek_token() == Token::LParen {
6344-
return self.parse_function(ObjectName(vec![Ident::new(value)]));
6345-
}
6346-
Ok(Expr::Value(Value::SingleQuotedString(value)))
6347-
}
6348-
Token::SingleQuotedString(s) => Ok(Expr::Value(Value::SingleQuotedString(s))),
6349-
#[cfg(not(feature = "bigdecimal"))]
6350-
Token::Number(s, _) => Ok(Expr::Value(Value::Number(s, false))),
6351-
#[cfg(feature = "bigdecimal")]
6352-
Token::Number(s, _) => Ok(Expr::Value(Value::Number(s.parse().unwrap(), false))),
6353-
_ => self.expected("literal string, number or function", next_token),
6354-
}
6355-
}
6356-
63576352
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
63586353
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
63596354
let (ty, trailing_bracket) = self.parse_data_type_helper()?;

tests/sqlparser_bigquery.rs

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,39 +1402,48 @@ fn bigquery_and_generic() -> TestedDialects {
14021402
}
14031403

14041404
#[test]
1405-
fn parse_map_access_offset() {
1406-
let sql = "SELECT d[offset(0)]";
1407-
let _select = bigquery().verified_only_select(sql);
1408-
assert_eq!(
1409-
_select.projection[0],
1410-
SelectItem::UnnamedExpr(Expr::MapAccess {
1411-
column: Box::new(Expr::Identifier(Ident {
1412-
value: "d".to_string(),
1413-
quote_style: None,
1414-
})),
1415-
keys: vec![Expr::Function(Function {
1416-
name: ObjectName(vec!["offset".into()]),
1417-
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
1418-
number("0")
1419-
))),],
1420-
null_treatment: None,
1421-
filter: None,
1422-
over: None,
1423-
distinct: false,
1424-
special: false,
1425-
order_by: vec![],
1426-
})],
1427-
})
1428-
);
1405+
fn parse_map_access_expr() {
1406+
let sql = "users[-1][safe_offset(2)].a.b";
1407+
let expr = bigquery().verified_expr(sql);
14291408

1430-
// test other operators
1431-
for sql in [
1432-
"SELECT d[SAFE_OFFSET(0)]",
1433-
"SELECT d[ORDINAL(0)]",
1434-
"SELECT d[SAFE_ORDINAL(0)]",
1435-
] {
1436-
bigquery().verified_only_select(sql);
1409+
fn map_access_key(key: Expr, syntax: MapAccessSyntax) -> MapAccessKey {
1410+
MapAccessKey { key, syntax }
14371411
}
1412+
let expected = Expr::MapAccess {
1413+
column: Expr::Identifier(Ident::new("users")).into(),
1414+
keys: vec![
1415+
map_access_key(
1416+
Expr::UnaryOp {
1417+
op: UnaryOperator::Minus,
1418+
expr: Expr::Value(number("1")).into(),
1419+
},
1420+
MapAccessSyntax::Bracket,
1421+
),
1422+
map_access_key(
1423+
Expr::Function(Function {
1424+
name: ObjectName(vec![Ident::new("safe_offset")]),
1425+
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
1426+
number("2"),
1427+
)))],
1428+
filter: None,
1429+
null_treatment: None,
1430+
over: None,
1431+
distinct: false,
1432+
special: false,
1433+
order_by: vec![],
1434+
}),
1435+
MapAccessSyntax::Bracket,
1436+
),
1437+
map_access_key(
1438+
Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("b")]),
1439+
MapAccessSyntax::Period,
1440+
),
1441+
],
1442+
};
1443+
assert_eq!(expr, expected);
1444+
1445+
let sql = "SELECT myfunc()[-1].a[SAFE_OFFSET(2)].b";
1446+
bigquery().verified_only_select(sql);
14381447
}
14391448

14401449
#[test]

tests/sqlparser_clickhouse.rs

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,26 @@ fn parse_map_access_expr() {
3939
value: "string_values".to_string(),
4040
quote_style: None,
4141
})),
42-
keys: vec![Expr::Function(Function {
43-
name: ObjectName(vec!["indexOf".into()]),
44-
args: vec![
45-
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new(
46-
"string_names"
47-
)))),
48-
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
49-
Value::SingleQuotedString("endpoint".to_string())
50-
))),
51-
],
52-
null_treatment: None,
53-
filter: None,
54-
over: None,
55-
distinct: false,
56-
special: false,
57-
order_by: vec![],
58-
})],
42+
keys: vec![MapAccessKey {
43+
key: Expr::Function(Function {
44+
name: ObjectName(vec!["indexOf".into()]),
45+
args: vec![
46+
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
47+
Ident::new("string_names")
48+
))),
49+
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
50+
Value::SingleQuotedString("endpoint".to_string())
51+
))),
52+
],
53+
null_treatment: None,
54+
filter: None,
55+
over: None,
56+
distinct: false,
57+
special: false,
58+
order_by: vec![],
59+
}),
60+
syntax: MapAccessSyntax::Bracket
61+
}],
5962
})],
6063
into: None,
6164
from: vec![TableWithJoins {
@@ -80,23 +83,26 @@ fn parse_map_access_expr() {
8083
right: Box::new(BinaryOp {
8184
left: Box::new(MapAccess {
8285
column: Box::new(Identifier(Ident::new("string_value"))),
83-
keys: vec![Expr::Function(Function {
84-
name: ObjectName(vec![Ident::new("indexOf")]),
85-
args: vec![
86-
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
87-
Ident::new("string_name")
88-
))),
89-
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
90-
Value::SingleQuotedString("app".to_string())
91-
))),
92-
],
93-
null_treatment: None,
94-
filter: None,
95-
over: None,
96-
distinct: false,
97-
special: false,
98-
order_by: vec![],
99-
})],
86+
keys: vec![MapAccessKey {
87+
key: Expr::Function(Function {
88+
name: ObjectName(vec![Ident::new("indexOf")]),
89+
args: vec![
90+
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
91+
Ident::new("string_name")
92+
))),
93+
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
94+
Value::SingleQuotedString("app".to_string())
95+
))),
96+
],
97+
null_treatment: None,
98+
filter: None,
99+
over: None,
100+
distinct: false,
101+
special: false,
102+
order_by: vec![],
103+
}),
104+
syntax: MapAccessSyntax::Bracket
105+
}],
100106
}),
101107
op: BinaryOperator::NotEq,
102108
right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))),

tests/sqlparser_common.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8643,3 +8643,45 @@ fn test_buffer_reuse() {
86438643
p.parse_statements().unwrap();
86448644
let _ = p.into_tokens();
86458645
}
8646+
8647+
#[test]
8648+
fn parse_map_access_expr() {
8649+
let sql = "users[-1][safe_offset(2)]";
8650+
let dialects = TestedDialects {
8651+
dialects: vec![Box::new(BigQueryDialect {}), Box::new(ClickHouseDialect {})],
8652+
options: None,
8653+
};
8654+
let expr = dialects.verified_expr(sql);
8655+
let expected = Expr::MapAccess {
8656+
column: Expr::Identifier(Ident::new("users")).into(),
8657+
keys: vec![
8658+
MapAccessKey {
8659+
key: Expr::UnaryOp {
8660+
op: UnaryOperator::Minus,
8661+
expr: Expr::Value(number("1")).into(),
8662+
},
8663+
syntax: MapAccessSyntax::Bracket,
8664+
},
8665+
MapAccessKey {
8666+
key: Expr::Function(Function {
8667+
name: ObjectName(vec![Ident::new("safe_offset")]),
8668+
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
8669+
number("2"),
8670+
)))],
8671+
filter: None,
8672+
null_treatment: None,
8673+
over: None,
8674+
distinct: false,
8675+
special: false,
8676+
order_by: vec![],
8677+
}),
8678+
syntax: MapAccessSyntax::Bracket,
8679+
},
8680+
],
8681+
};
8682+
assert_eq!(expr, expected);
8683+
8684+
for sql in ["users[1]", "a[array_length(b) - 1 + 2][c + 3][d * 4]"] {
8685+
let _ = dialects.verified_expr(sql);
8686+
}
8687+
}

0 commit comments

Comments
 (0)