Skip to content

Commit 6a9b6f5

Browse files
authored
Support for (+) outer join syntax (apache#1145)
1 parent b284fdf commit 6a9b6f5

3 files changed

Lines changed: 111 additions & 2 deletions

File tree

src/ast/mod.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,21 @@ pub enum Expr {
713713
/// Qualified wildcard, e.g. `alias.*` or `schema.table.*`.
714714
/// (Same caveats apply to `QualifiedWildcard` as to `Wildcard`.)
715715
QualifiedWildcard(ObjectName),
716+
/// Some dialects support an older syntax for outer joins where columns are
717+
/// marked with the `(+)` operator in the WHERE clause, for example:
718+
///
719+
/// ```sql
720+
/// SELECT t1.c1, t2.c2 FROM t1, t2 WHERE t1.c1 = t2.c2 (+)
721+
/// ```
722+
///
723+
/// which is equivalent to
724+
///
725+
/// ```sql
726+
/// SELECT t1.c1, t2.c2 FROM t1 LEFT OUTER JOIN t2 ON t1.c1 = t2.c2
727+
/// ```
728+
///
729+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/where#joins-in-the-where-clause>.
730+
OuterJoin(Box<Expr>),
716731
}
717732

718733
impl fmt::Display for CastFormat {
@@ -1174,6 +1189,9 @@ impl fmt::Display for Expr {
11741189

11751190
Ok(())
11761191
}
1192+
Expr::OuterJoin(expr) => {
1193+
write!(f, "{expr} (+)")
1194+
}
11771195
}
11781196
}
11791197
}

src/parser/mod.rs

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,19 @@ impl<'a> Parser<'a> {
998998
if ends_with_wildcard {
999999
Ok(Expr::QualifiedWildcard(ObjectName(id_parts)))
10001000
} else if self.consume_token(&Token::LParen) {
1001-
self.prev_token();
1002-
self.parse_function(ObjectName(id_parts))
1001+
if dialect_of!(self is SnowflakeDialect | MsSqlDialect)
1002+
&& self.consume_tokens(&[Token::Plus, Token::RParen])
1003+
{
1004+
Ok(Expr::OuterJoin(Box::new(
1005+
match <[Ident; 1]>::try_from(id_parts) {
1006+
Ok([ident]) => Expr::Identifier(ident),
1007+
Err(parts) => Expr::CompoundIdentifier(parts),
1008+
},
1009+
)))
1010+
} else {
1011+
self.prev_token();
1012+
self.parse_function(ObjectName(id_parts))
1013+
}
10031014
} else {
10041015
Ok(Expr::CompoundIdentifier(id_parts))
10051016
}
@@ -2860,6 +2871,21 @@ impl<'a> Parser<'a> {
28602871
}
28612872
}
28622873

2874+
/// If the current and subsequent tokens exactly match the `tokens`
2875+
/// sequence, consume them and returns true. Otherwise, no tokens are
2876+
/// consumed and returns false
2877+
#[must_use]
2878+
pub fn consume_tokens(&mut self, tokens: &[Token]) -> bool {
2879+
let index = self.index;
2880+
for token in tokens {
2881+
if !self.consume_token(token) {
2882+
self.index = index;
2883+
return false;
2884+
}
2885+
}
2886+
true
2887+
}
2888+
28632889
/// Bail out if the current token is not an expected keyword, or consume it if it is
28642890
pub fn expect_token(&mut self, expected: &Token) -> Result<(), ParserError> {
28652891
if self.consume_token(expected) {

tests/sqlparser_snowflake.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,3 +1173,68 @@ fn parse_top() {
11731173
"SELECT TOP 4 c1 FROM testtable",
11741174
);
11751175
}
1176+
1177+
#[test]
1178+
fn parse_comma_outer_join() {
1179+
// compound identifiers
1180+
let case1 =
1181+
snowflake().verified_only_select("SELECT t1.c1, t2.c2 FROM t1, t2 WHERE t1.c1 = t2.c2 (+)");
1182+
assert_eq!(
1183+
case1.selection,
1184+
Some(Expr::BinaryOp {
1185+
left: Box::new(Expr::CompoundIdentifier(vec![
1186+
Ident::new("t1"),
1187+
Ident::new("c1")
1188+
])),
1189+
op: BinaryOperator::Eq,
1190+
right: Box::new(Expr::OuterJoin(Box::new(Expr::CompoundIdentifier(vec![
1191+
Ident::new("t2"),
1192+
Ident::new("c2")
1193+
]))))
1194+
})
1195+
);
1196+
1197+
// regular identifiers
1198+
let case2 =
1199+
snowflake().verified_only_select("SELECT t1.c1, t2.c2 FROM t1, t2 WHERE c1 = c2 (+)");
1200+
assert_eq!(
1201+
case2.selection,
1202+
Some(Expr::BinaryOp {
1203+
left: Box::new(Expr::Identifier(Ident::new("c1"))),
1204+
op: BinaryOperator::Eq,
1205+
right: Box::new(Expr::OuterJoin(Box::new(Expr::Identifier(Ident::new(
1206+
"c2"
1207+
)))))
1208+
})
1209+
);
1210+
1211+
// ensure we can still parse function calls with a unary plus arg
1212+
let case3 =
1213+
snowflake().verified_only_select("SELECT t1.c1, t2.c2 FROM t1, t2 WHERE c1 = myudf(+42)");
1214+
assert_eq!(
1215+
case3.selection,
1216+
Some(Expr::BinaryOp {
1217+
left: Box::new(Expr::Identifier(Ident::new("c1"))),
1218+
op: BinaryOperator::Eq,
1219+
right: Box::new(Expr::Function(Function {
1220+
name: ObjectName(vec![Ident::new("myudf")]),
1221+
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp {
1222+
op: UnaryOperator::Plus,
1223+
expr: Box::new(Expr::Value(number("42")))
1224+
}))],
1225+
filter: None,
1226+
null_treatment: None,
1227+
over: None,
1228+
distinct: false,
1229+
special: false,
1230+
order_by: vec![]
1231+
}))
1232+
})
1233+
);
1234+
1235+
// permissive with whitespace
1236+
snowflake().verified_only_select_with_canonical(
1237+
"SELECT t1.c1, t2.c2 FROM t1, t2 WHERE t1.c1 = t2.c2( + )",
1238+
"SELECT t1.c1, t2.c2 FROM t1, t2 WHERE t1.c1 = t2.c2 (+)",
1239+
);
1240+
}

0 commit comments

Comments
 (0)