Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4191,8 +4191,9 @@ impl<'a> Parser<'a> {
match token.token {
Token::Word(Word {
value,
// path segments in SF dot notation can be unquoted or double-quoted
quote_style: quote_style @ (Some('"') | None),
// path segments in SF dot notation can be unquoted or double-quoted;
// Databricks also supports backtick-quoted identifiers
quote_style: quote_style @ (Some('"') | Some('`') | None),
// some experimentation suggests that snowflake permits
// any keyword here unquoted.
keyword: _,
Expand All @@ -4210,6 +4211,15 @@ impl<'a> Parser<'a> {
}
}

fn parse_json_path_bracket_key(&mut self) -> Result<Expr, ParserError> {
// Databricks supports [*] wildcard accessor
if self.consume_token(&Token::Mul) {
Ok(Expr::Wildcard(AttachedToken::empty()))
} else {
self.parse_expr()
}
}

fn parse_json_access(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let path = self.parse_json_path()?;
Ok(Expr::JsonAccess {
Expand All @@ -4223,13 +4233,26 @@ impl<'a> Parser<'a> {
loop {
match self.next_token().token {
Token::Colon if path.is_empty() => {
path.push(self.parse_json_path_object_key()?);
if self.peek_token_ref().token == Token::LBracket {
// A bracket element directly after the colon, e.g. `raw:['field']`.
// Push an empty Dot so the display re-emits the leading `:` for syntax roundtrip.
path.push(JsonPathElem::Dot {
key: String::new(),
quoted: false,
});
self.next_token();
let key = self.parse_json_path_bracket_key()?;
self.expect_token(&Token::RBracket)?;
path.push(JsonPathElem::Bracket { key });
Comment thread
whirlun marked this conversation as resolved.
Outdated
} else {
path.push(self.parse_json_path_object_key()?);
}
}
Token::Period if !path.is_empty() => {
path.push(self.parse_json_path_object_key()?);
}
Token::LBracket => {
let key = self.parse_expr()?;
let key = self.parse_json_path_bracket_key()?;
Comment thread
whirlun marked this conversation as resolved.
Outdated
self.expect_token(&Token::RBracket)?;

path.push(JsonPathElem::Bracket { key });
Expand Down
147 changes: 147 additions & 0 deletions tests/sqlparser_databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -600,3 +600,150 @@ fn parse_databricks_struct_type() {
_ => unreachable!(),
}
}

// https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html
Comment thread
whirlun marked this conversation as resolved.
Outdated
#[test]
fn parse_databricks_json_accessor() {
// Basic colon accessor — unquoted field names are case-insensitive
databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data");

// Unquoted field access is case-insensitive; bracket notation is case-sensitive.
databricks().verified_only_select(
"SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive FROM store_data",
);

// Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output.
databricks().one_statement_parses_to(
"SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM store_data",
r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM store_data"#,
);

// Dot notation
databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data");

// String-key bracket notation after a dot segment
databricks()
.verified_only_select("SELECT raw:store['bicycle'], raw:store['BICYCLE'] FROM store_data");

// Integer-index bracket notation
databricks()
.verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] FROM store_data");

// Wildcard [*] — including chained and mixed positions
databricks().verified_only_select(
"SELECT raw:store.basket[*], raw:store.basket[*][0] AS first_of_baskets, \
raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS all_elements_flattened, \
raw:store.basket[0][2].b AS subfield FROM store_data",
);

// Dot access following a wildcard bracket
databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM store_data");

// Double-colon cast — type keyword normalises to upper case
databricks().one_statement_parses_to(
"SELECT raw:store.bicycle.price::double FROM store_data",
"SELECT raw:store.bicycle.price::DOUBLE FROM store_data",
);

// --- AST structure assertions ---

// Simple dot access
assert_eq!(
databricks().verified_expr("raw:owner"),
Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("raw"))),
path: JsonPath {
path: vec![JsonPathElem::Dot {
key: "owner".to_owned(),
quoted: false,
}],
},
}
);

// Multi-level dot access
assert_eq!(
databricks().verified_expr("raw:store.bicycle"),
Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("raw"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "store".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "bicycle".to_owned(),
quoted: false,
},
],
},
}
);

// Dot path followed by an integer-index bracket
assert_eq!(
databricks().verified_expr("raw:store.fruit[0]"),
Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("raw"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "store".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "fruit".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::value(number("0")),
},
],
},
}
);

// [*] is stored as Expr::Wildcard inside a Bracket element
assert_eq!(
databricks().verified_expr("raw:store.basket[*]"),
Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("raw"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: "store".to_owned(),
quoted: false,
},
JsonPathElem::Dot {
key: "basket".to_owned(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::Wildcard(AttachedToken::empty()),
},
],
},
}
);

// raw:['OWNER'] — bracket directly after the colon. An empty-key sentinel Dot is prepended
// so that the display re-emits the leading `:`, enabling a correct round-trip.
assert_eq!(
databricks().verified_expr("raw:['OWNER']"),
Expr::JsonAccess {
value: Box::new(Expr::Identifier(Ident::new("raw"))),
path: JsonPath {
path: vec![
JsonPathElem::Dot {
key: String::new(),
quoted: false,
},
JsonPathElem::Bracket {
key: Expr::value(Value::SingleQuotedString("OWNER".to_owned())),
},
],
},
}
);
Comment thread
whirlun marked this conversation as resolved.
Outdated
}