Skip to content

Commit ce320d0

Browse files
committed
Merge sigma origin/main into upstream/main for sync
# Conflicts: # src/ast/mod.rs # src/ast/spans.rs # src/lib.rs # src/parser/mod.rs # tests/sqlparser_snowflake.rs
2 parents 913cf0e + fdd7a07 commit ce320d0

File tree

14 files changed

+449
-32
lines changed

14 files changed

+449
-32
lines changed

.github/workflows/semgrep.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: Semgrep
2+
on:
3+
pull_request:
4+
workflow_dispatch:
5+
6+
jobs:
7+
semgrep:
8+
name: Run Semgrep
9+
runs-on: ubuntu-latest
10+
timeout-minutes: 30
11+
container:
12+
# A Docker image with Semgrep installed. Do not change this.
13+
image: returntocorp/semgrep
14+
if: (github.actor != 'dependabot[bot]')
15+
steps:
16+
- uses: actions/checkout@v4
17+
- run: semgrep ci
18+
env:
19+
SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN_PUBLIC }}

examples/cli.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ $ cargo run --example cli - [--dialectname]
4848

4949
let dialect: Box<dyn Dialect> = match std::env::args().nth(2).unwrap_or_default().as_ref() {
5050
"--ansi" => Box::new(AnsiDialect {}),
51+
"--databricks" => Box::new(DatabricksDialect {}),
5152
"--bigquery" => Box::new(BigQueryDialect {}),
5253
"--postgres" => Box::new(PostgreSqlDialect {}),
5354
"--ms" => Box::new(MsSqlDialect {}),

src/ast/mod.rs

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,9 @@ pub enum JsonPathElem {
646646
/// Accesses an object field or array element using bracket notation,
647647
/// e.g. `obj['foo']`.
648648
///
649+
/// Note that on Databricks this is *not* equivalent to dot notation; the
650+
/// former is case-insensitive but the latter is not.
651+
///
649652
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#bracket-notation>.
650653
Bracket {
651654
/// The expression used as the bracket key (string or numeric expression).
@@ -659,6 +662,11 @@ pub enum JsonPathElem {
659662
/// The expression used as the bracket key (string or numeric expression).
660663
key: Expr,
661664
},
665+
/// Accesses all elements in the given (generally array) element. Used for
666+
/// constructs like `foo:bar[*].baz`.
667+
///
668+
/// See <https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-json-path-expression#extract-values-from-arrays>
669+
AllElements,
662670
}
663671

664672
/// A JSON path.
@@ -669,18 +677,23 @@ pub enum JsonPathElem {
669677
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
670678
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
671679
pub struct JsonPath {
680+
/// True if the path should start with a colon. Some dialects (e.g. Snowflake) allow
681+
/// `a['b']`, whereas others (e.g. Databricks) require the colon even in this case
682+
/// (so `a:['b']`).
683+
pub has_colon: bool,
672684
/// Sequence of path elements that form the JSON path.
673685
pub path: Vec<JsonPathElem>,
674686
}
675687

676688
impl fmt::Display for JsonPath {
677689
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
690+
if self.has_colon {
691+
write!(f, ":")?;
692+
}
678693
for (i, elem) in self.path.iter().enumerate() {
679694
match elem {
680695
JsonPathElem::Dot { key, quoted } => {
681-
if i == 0 {
682-
write!(f, ":")?;
683-
} else {
696+
if i != 0 {
684697
write!(f, ".")?;
685698
}
686699

@@ -693,6 +706,9 @@ impl fmt::Display for JsonPath {
693706
JsonPathElem::Bracket { key } => {
694707
write!(f, "[{key}]")?;
695708
}
709+
JsonPathElem::AllElements => {
710+
write!(f, "[*]")?;
711+
}
696712
JsonPathElem::ColonBracket { key } => {
697713
write!(f, ":[{key}]")?;
698714
}
@@ -915,6 +931,13 @@ pub enum Expr {
915931
/// `true` when the `NOT` modifier is present.
916932
negated: bool,
917933
},
934+
/// XXX not valid SQL syntax, this is a hack needed to support parameter substitution
935+
/// `[ NOT ] IN <in_expr>`
936+
InExpr {
937+
expr: Box<Expr>,
938+
in_expr: Box<Expr>,
939+
negated: bool,
940+
},
918941
/// `[ NOT ] IN UNNEST(array_expression)`
919942
InUnnest {
920943
/// Left-hand expression to test for membership.
@@ -1719,6 +1742,17 @@ impl fmt::Display for Expr {
17191742
if *negated { "NOT " } else { "" },
17201743
subquery
17211744
),
1745+
Expr::InExpr {
1746+
expr,
1747+
in_expr,
1748+
negated,
1749+
} => write!(
1750+
f,
1751+
"{} {}IN {}",
1752+
expr,
1753+
if *negated { "NOT " } else { "" },
1754+
in_expr,
1755+
),
17221756
Expr::InUnnest {
17231757
expr,
17241758
array_expr,

src/ast/query.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,6 +1492,12 @@ pub enum TableFactor {
14921492
/// Optional table sample modifier
14931493
sample: Option<TableSampleKind>,
14941494
},
1495+
/// A pass-through query string that is not parsed.
1496+
/// This is useful while building/rewriting queries with a known valid SQL string and to avoid parsing it.
1497+
PassThroughQuery {
1498+
query: String,
1499+
alias: Option<TableAlias>,
1500+
},
14951501
/// `TABLE(<expr>)[ AS <alias> ]`
14961502
TableFunction {
14971503
/// Expression representing the table function call.
@@ -2253,6 +2259,13 @@ impl fmt::Display for TableFactor {
22532259
if let Some(alias) = alias {
22542260
write!(f, " {alias}")?;
22552261
}
2262+
Ok(())
2263+
}
2264+
TableFactor::PassThroughQuery { query, alias } => {
2265+
write!(f, "({query})")?;
2266+
if let Some(alias) = alias {
2267+
write!(f, " {alias}")?;
2268+
}
22562269
if let Some(TableSampleKind::AfterTableAlias(sample)) = sample {
22572270
write!(f, " {sample}")?;
22582271
}

src/ast/spans.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,11 @@ impl Spanned for Expr {
14791479
array_expr,
14801480
negated: _,
14811481
} => expr.span().union(&array_expr.span()),
1482+
Expr::InExpr {
1483+
expr,
1484+
in_expr,
1485+
negated: _,
1486+
} => expr.span().union(&in_expr.span()),
14821487
Expr::Between {
14831488
expr,
14841489
negated: _,
@@ -1787,7 +1792,7 @@ impl Spanned for FunctionArgumentClause {
17871792
/// see Spanned impl for JsonPathElem for more information
17881793
impl Spanned for JsonPath {
17891794
fn span(&self) -> Span {
1790-
let JsonPath { path } = self;
1795+
let JsonPath { path, has_colon: _ } = self;
17911796

17921797
union_spans(path.iter().map(|i| i.span()))
17931798
}
@@ -1797,11 +1802,13 @@ impl Spanned for JsonPath {
17971802
///
17981803
/// Missing spans:
17991804
/// - [JsonPathElem::Dot]
1805+
/// - [JsonPathElem::AllElements]
18001806
impl Spanned for JsonPathElem {
18011807
fn span(&self) -> Span {
18021808
match self {
18031809
JsonPathElem::Dot { .. } => Span::empty(),
18041810
JsonPathElem::Bracket { key } => key.span(),
1811+
JsonPathElem::AllElements => Span::empty(),
18051812
JsonPathElem::ColonBracket { key } => key.span(),
18061813
}
18071814
}
@@ -1952,6 +1959,8 @@ impl Spanned for TableFactor {
19521959
} => subquery
19531960
.span()
19541961
.union_opt(&alias.as_ref().map(|alias| alias.span())),
1962+
// This is usually created at runtime, so we don't have a span for it
1963+
TableFactor::PassThroughQuery { query: _, alias: _ } => Span::empty(),
19551964
TableFactor::TableFunction { expr, alias } => expr
19561965
.span()
19571966
.union_opt(&alias.as_ref().map(|alias| alias.span())),

src/dialect/databricks.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::dialect::Dialect;
18+
use crate::dialect::{Dialect, Precedence};
19+
use crate::parser::{Parser, ParserError};
20+
use crate::tokenizer::Token;
1921

2022
/// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/)
2123
///
@@ -39,6 +41,19 @@ impl Dialect for DatabricksDialect {
3941
matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
4042
}
4143

44+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
45+
let token = parser.peek_token();
46+
// : is used for JSON path access
47+
match token.token {
48+
Token::Colon => Some(Ok(self.prec_value(Precedence::Period))),
49+
_ => None,
50+
}
51+
}
52+
53+
fn supports_semi_structured_array_all_elements(&self) -> bool {
54+
true
55+
}
56+
4257
fn supports_filter_during_aggregation(&self) -> bool {
4358
true
4459
}
@@ -76,6 +91,11 @@ impl Dialect for DatabricksDialect {
7691
true
7792
}
7893

94+
// https://docs.databricks.com/aws/en/sql/language-manual/data-types/string-type#literals
95+
fn supports_string_literal_backslash_escape(&self) -> bool {
96+
true
97+
}
98+
7999
/// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-groupby.html>
80100
fn supports_group_by_with_modifier(&self) -> bool {
81101
true

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,11 @@ pub trait Dialect: Debug + Any {
12071207
false
12081208
}
12091209

1210+
/// Returns true if the dialect supports writing `[*]` to select all elements in a JSON array.
1211+
fn supports_semi_structured_array_all_elements(&self) -> bool {
1212+
false
1213+
}
1214+
12101215
/// Returns true if the specified keyword is reserved and cannot be
12111216
/// used as an identifier without special handling like quoting.
12121217
fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@
153153
// Splitting complex nodes (expressions, statements, types) into separate types
154154
// would bloat the API and hide intent. Extra memory is a worthwhile tradeoff.
155155
#![allow(clippy::large_enum_variant)]
156+
// TODO: Fix and remove this.
157+
#![expect(clippy::unnecessary_unwrap)]
156158
#![forbid(clippy::unreachable)]
157159
#![forbid(missing_docs)]
158160

src/parser/mod.rs

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4224,8 +4224,10 @@ impl<'a> Parser<'a> {
42244224
})
42254225
}
42264226

4227+
// Parser is either looking at a : or a bracket expression.
42274228
fn parse_json_path(&mut self) -> Result<JsonPath, ParserError> {
42284229
let mut path = Vec::new();
4230+
let mut has_colon = false;
42294231
loop {
42304232
match self.next_token().token {
42314233
Token::Colon if path.is_empty() && self.peek_token_ref() == &Token::LBracket => {
@@ -4235,16 +4237,19 @@ impl<'a> Parser<'a> {
42354237
path.push(JsonPathElem::ColonBracket { key });
42364238
}
42374239
Token::Colon if path.is_empty() => {
4238-
path.push(self.parse_json_path_object_key()?);
4240+
has_colon = true;
4241+
if *self.peek_token_ref() == Token::LBracket {
4242+
path.push(self.parse_json_path_bracket_element()?);
4243+
} else {
4244+
path.push(self.parse_json_path_object_key()?);
4245+
}
42394246
}
42404247
Token::Period if !path.is_empty() => {
42414248
path.push(self.parse_json_path_object_key()?);
42424249
}
42434250
Token::LBracket => {
4244-
let key = self.parse_wildcard_expr()?;
4245-
self.expect_token(&Token::RBracket)?;
4246-
4247-
path.push(JsonPathElem::Bracket { key });
4251+
self.prev_token();
4252+
path.push(self.parse_json_path_bracket_element()?);
42484253
}
42494254
_ => {
42504255
self.prev_token();
@@ -4254,7 +4259,23 @@ impl<'a> Parser<'a> {
42544259
}
42554260

42564261
debug_assert!(!path.is_empty());
4257-
Ok(JsonPath { path })
4262+
Ok(JsonPath { has_colon, path })
4263+
}
4264+
4265+
/// Parses a single bracketed element in a JSON path expression, including both brackets.
4266+
fn parse_json_path_bracket_element(&mut self) -> Result<JsonPathElem, ParserError> {
4267+
self.expect_token(&Token::LBracket)?;
4268+
let elem = if *self.peek_token_ref() == Token::Mul
4269+
&& self.dialect.supports_semi_structured_array_all_elements()
4270+
{
4271+
self.expect_token(&Token::Mul)?;
4272+
JsonPathElem::AllElements
4273+
} else {
4274+
let key = self.parse_expr()?;
4275+
JsonPathElem::Bracket { key }
4276+
};
4277+
self.expect_token(&Token::RBracket)?;
4278+
Ok(elem)
42584279
}
42594280

42604281
/// Parses the parens following the `[ NOT ] IN` operator.
@@ -4271,25 +4292,34 @@ impl<'a> Parser<'a> {
42714292
negated,
42724293
});
42734294
}
4274-
self.expect_token(&Token::LParen)?;
4275-
let in_op = match self.maybe_parse(|p| p.parse_query())? {
4276-
Some(subquery) => Expr::InSubquery {
4277-
expr: Box::new(expr),
4278-
subquery,
4279-
negated,
4280-
},
4281-
None => Expr::InList {
4282-
expr: Box::new(expr),
4283-
list: if self.dialect.supports_in_empty_list() {
4284-
self.parse_comma_separated0(Parser::parse_expr, Token::RParen)?
4285-
} else {
4286-
self.parse_comma_separated(Parser::parse_expr)?
4295+
if self.consume_token(&Token::LParen) {
4296+
let in_op = match self.maybe_parse(|p| p.parse_query())? {
4297+
Some(subquery) => Expr::InSubquery {
4298+
expr: Box::new(expr),
4299+
subquery,
4300+
negated,
42874301
},
4302+
None => Expr::InList {
4303+
expr: Box::new(expr),
4304+
list: if self.dialect.supports_in_empty_list() {
4305+
self.parse_comma_separated0(Parser::parse_expr, Token::RParen)?
4306+
} else {
4307+
self.parse_comma_separated(Parser::parse_expr)?
4308+
},
4309+
negated,
4310+
},
4311+
};
4312+
self.expect_token(&Token::RParen)?;
4313+
Ok(in_op)
4314+
} else {
4315+
// parse an expr
4316+
let in_expr = self.parse_expr()?;
4317+
Ok(Expr::InExpr {
4318+
expr: Box::new(expr),
4319+
in_expr: Box::new(in_expr),
42884320
negated,
4289-
},
4290-
};
4291-
self.expect_token(&Token::RParen)?;
4292-
Ok(in_op)
4321+
})
4322+
}
42934323
}
42944324

42954325
/// Parses `BETWEEN <low> AND <high>`, assuming the `BETWEEN` keyword was already consumed.
@@ -15586,7 +15616,8 @@ impl<'a> Parser<'a> {
1558615616
| TableFactor::Unpivot { alias, .. }
1558715617
| TableFactor::MatchRecognize { alias, .. }
1558815618
| TableFactor::SemanticView { alias, .. }
15589-
| TableFactor::NestedJoin { alias, .. } => {
15619+
| TableFactor::NestedJoin { alias, .. }
15620+
| TableFactor::PassThroughQuery { alias, .. } => {
1559015621
// but not `FROM (mytable AS alias1) AS alias2`.
1559115622
if let Some(inner_alias) = alias {
1559215623
return Err(ParserError::ParserError(format!(

0 commit comments

Comments
 (0)