Skip to content

Commit 39980e8

Browse files
authored
Support Snowflake MATCH_RECOGNIZE syntax (apache#1222)
1 parent bf89b7d commit 39980e8

9 files changed

Lines changed: 847 additions & 9 deletions

File tree

src/ast/mod.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,15 @@ pub use self::ddl::{
4040
pub use self::dml::{Delete, Insert};
4141
pub use self::operator::{BinaryOperator, UnaryOperator};
4242
pub use self::query::{
43-
Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause,
44-
ForJson, ForXml, GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint,
45-
JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType,
46-
NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem,
47-
ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator,
48-
SetQuantifier, Table, TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity,
49-
ValueTableMode, Values, WildcardAdditionalOptions, With,
43+
AfterMatchSkip, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem,
44+
ExcludeSelectItem, Fetch, ForClause, ForJson, ForXml, GroupByExpr, IdentWithAlias,
45+
IlikeSelectItem, Join, JoinConstraint, JoinOperator, JsonTableColumn,
46+
JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern,
47+
MatchRecognizeSymbol, Measure, NamedWindowDefinition, NonBlock, Offset, OffsetRows,
48+
OrderByExpr, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement,
49+
ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator,
50+
SetQuantifier, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, TableWithJoins,
51+
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
5052
};
5153
pub use self::value::{
5254
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,

src/ast/query.rs

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,238 @@ pub enum TableFactor {
852852
columns: Vec<Ident>,
853853
alias: Option<TableAlias>,
854854
},
855+
/// A `MATCH_RECOGNIZE` operation on a table.
856+
///
857+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize>.
858+
MatchRecognize {
859+
table: Box<TableFactor>,
860+
/// `PARTITION BY <expr> [, ... ]`
861+
partition_by: Vec<Expr>,
862+
/// `ORDER BY <expr> [, ... ]`
863+
order_by: Vec<OrderByExpr>,
864+
/// `MEASURES <expr> [AS] <alias> [, ... ]`
865+
measures: Vec<Measure>,
866+
/// `ONE ROW PER MATCH | ALL ROWS PER MATCH [ <option> ]`
867+
rows_per_match: Option<RowsPerMatch>,
868+
/// `AFTER MATCH SKIP <option>`
869+
after_match_skip: Option<AfterMatchSkip>,
870+
/// `PATTERN ( <pattern> )`
871+
pattern: MatchRecognizePattern,
872+
/// `DEFINE <symbol> AS <expr> [, ... ]`
873+
symbols: Vec<SymbolDefinition>,
874+
alias: Option<TableAlias>,
875+
},
876+
}
877+
878+
/// An item in the `MEASURES` subclause of a `MATCH_RECOGNIZE` operation.
879+
///
880+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#measures-specifying-additional-output-columns>.
881+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
882+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
883+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
884+
pub struct Measure {
885+
pub expr: Expr,
886+
pub alias: Ident,
887+
}
888+
889+
impl fmt::Display for Measure {
890+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
891+
write!(f, "{} AS {}", self.expr, self.alias)
892+
}
893+
}
894+
895+
/// The rows per match option in a `MATCH_RECOGNIZE` operation.
896+
///
897+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#row-s-per-match-specifying-the-rows-to-return>.
898+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
899+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
900+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
901+
pub enum RowsPerMatch {
902+
/// `ONE ROW PER MATCH`
903+
OneRow,
904+
/// `ALL ROWS PER MATCH <mode>`
905+
AllRows(Option<EmptyMatchesMode>),
906+
}
907+
908+
impl fmt::Display for RowsPerMatch {
909+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
910+
match self {
911+
RowsPerMatch::OneRow => write!(f, "ONE ROW PER MATCH"),
912+
RowsPerMatch::AllRows(mode) => {
913+
write!(f, "ALL ROWS PER MATCH")?;
914+
if let Some(mode) = mode {
915+
write!(f, " {}", mode)?;
916+
}
917+
Ok(())
918+
}
919+
}
920+
}
921+
}
922+
923+
/// The after match skip option in a `MATCH_RECOGNIZE` operation.
924+
///
925+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#after-match-skip-specifying-where-to-continue-after-a-match>.
926+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
927+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
928+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
929+
pub enum AfterMatchSkip {
930+
/// `PAST LAST ROW`
931+
PastLastRow,
932+
/// `TO NEXT ROW`
933+
ToNextRow,
934+
/// `TO FIRST <symbol>`
935+
ToFirst(Ident),
936+
/// `TO LAST <symbol>`
937+
ToLast(Ident),
938+
}
939+
940+
impl fmt::Display for AfterMatchSkip {
941+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
942+
write!(f, "AFTER MATCH SKIP ")?;
943+
match self {
944+
AfterMatchSkip::PastLastRow => write!(f, "PAST LAST ROW"),
945+
AfterMatchSkip::ToNextRow => write!(f, " TO NEXT ROW"),
946+
AfterMatchSkip::ToFirst(symbol) => write!(f, "TO FIRST {symbol}"),
947+
AfterMatchSkip::ToLast(symbol) => write!(f, "TO LAST {symbol}"),
948+
}
949+
}
950+
}
951+
952+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
953+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
954+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
955+
pub enum EmptyMatchesMode {
956+
/// `SHOW EMPTY MATCHES`
957+
Show,
958+
/// `OMIT EMPTY MATCHES`
959+
Omit,
960+
/// `WITH UNMATCHED ROWS`
961+
WithUnmatched,
962+
}
963+
964+
impl fmt::Display for EmptyMatchesMode {
965+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
966+
match self {
967+
EmptyMatchesMode::Show => write!(f, "SHOW EMPTY MATCHES"),
968+
EmptyMatchesMode::Omit => write!(f, "OMIT EMPTY MATCHES"),
969+
EmptyMatchesMode::WithUnmatched => write!(f, "WITH UNMATCHED ROWS"),
970+
}
971+
}
972+
}
973+
974+
/// A symbol defined in a `MATCH_RECOGNIZE` operation.
975+
///
976+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#define-defining-symbols>.
977+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
978+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
979+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
980+
pub struct SymbolDefinition {
981+
pub symbol: Ident,
982+
pub definition: Expr,
983+
}
984+
985+
impl fmt::Display for SymbolDefinition {
986+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
987+
write!(f, "{} AS {}", self.symbol, self.definition)
988+
}
989+
}
990+
991+
/// A symbol in a `MATCH_RECOGNIZE` pattern.
992+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
993+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
994+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
995+
pub enum MatchRecognizeSymbol {
996+
/// A named symbol, e.g. `S1`.
997+
Named(Ident),
998+
/// A virtual symbol representing the start of the of partition (`^`).
999+
Start,
1000+
/// A virtual symbol representing the end of the partition (`$`).
1001+
End,
1002+
}
1003+
1004+
impl fmt::Display for MatchRecognizeSymbol {
1005+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1006+
match self {
1007+
MatchRecognizeSymbol::Named(symbol) => write!(f, "{symbol}"),
1008+
MatchRecognizeSymbol::Start => write!(f, "^"),
1009+
MatchRecognizeSymbol::End => write!(f, "$"),
1010+
}
1011+
}
1012+
}
1013+
1014+
/// The pattern in a `MATCH_RECOGNIZE` operation.
1015+
///
1016+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/match_recognize#pattern-specifying-the-pattern-to-match>.
1017+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1018+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1019+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1020+
pub enum MatchRecognizePattern {
1021+
/// A named symbol such as `S1` or a virtual symbol such as `^`.
1022+
Symbol(MatchRecognizeSymbol),
1023+
/// {- symbol -}
1024+
Exclude(MatchRecognizeSymbol),
1025+
/// PERMUTE(symbol_1, ..., symbol_n)
1026+
Permute(Vec<MatchRecognizeSymbol>),
1027+
/// pattern_1 pattern_2 ... pattern_n
1028+
Concat(Vec<MatchRecognizePattern>),
1029+
/// ( pattern )
1030+
Group(Box<MatchRecognizePattern>),
1031+
/// pattern_1 | pattern_2 | ... | pattern_n
1032+
Alternation(Vec<MatchRecognizePattern>),
1033+
/// e.g. pattern*
1034+
Repetition(Box<MatchRecognizePattern>, RepetitionQuantifier),
1035+
}
1036+
1037+
impl fmt::Display for MatchRecognizePattern {
1038+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1039+
use MatchRecognizePattern::*;
1040+
match self {
1041+
Symbol(symbol) => write!(f, "{}", symbol),
1042+
Exclude(symbol) => write!(f, "{{- {symbol} -}}"),
1043+
Permute(symbols) => write!(f, "PERMUTE({})", display_comma_separated(symbols)),
1044+
Concat(patterns) => write!(f, "{}", display_separated(patterns, " ")),
1045+
Group(pattern) => write!(f, "( {pattern} )"),
1046+
Alternation(patterns) => write!(f, "{}", display_separated(patterns, " | ")),
1047+
Repetition(pattern, op) => write!(f, "{pattern}{op}"),
1048+
}
1049+
}
1050+
}
1051+
1052+
/// Determines the minimum and maximum allowed occurrences of a pattern in a
1053+
/// `MATCH_RECOGNIZE` operation.
1054+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1055+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1056+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1057+
pub enum RepetitionQuantifier {
1058+
/// `*`
1059+
ZeroOrMore,
1060+
/// `+`
1061+
OneOrMore,
1062+
/// `?`
1063+
AtMostOne,
1064+
/// `{n}`
1065+
Exactly(u32),
1066+
/// `{n,}`
1067+
AtLeast(u32),
1068+
/// `{,n}`
1069+
AtMost(u32),
1070+
/// `{n,m}
1071+
Range(u32, u32),
1072+
}
1073+
1074+
impl fmt::Display for RepetitionQuantifier {
1075+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1076+
use RepetitionQuantifier::*;
1077+
match self {
1078+
ZeroOrMore => write!(f, "*"),
1079+
OneOrMore => write!(f, "+"),
1080+
AtMostOne => write!(f, "?"),
1081+
Exactly(n) => write!(f, "{{{n}}}"),
1082+
AtLeast(n) => write!(f, "{{{n},}}"),
1083+
AtMost(n) => write!(f, "{{,{n}}}"),
1084+
Range(n, m) => write!(f, "{{{n},{m}}}"),
1085+
}
1086+
}
8551087
}
8561088

8571089
impl fmt::Display for TableFactor {
@@ -1005,6 +1237,40 @@ impl fmt::Display for TableFactor {
10051237
}
10061238
Ok(())
10071239
}
1240+
TableFactor::MatchRecognize {
1241+
table,
1242+
partition_by,
1243+
order_by,
1244+
measures,
1245+
rows_per_match,
1246+
after_match_skip,
1247+
pattern,
1248+
symbols,
1249+
alias,
1250+
} => {
1251+
write!(f, "{table} MATCH_RECOGNIZE(")?;
1252+
if !partition_by.is_empty() {
1253+
write!(f, "PARTITION BY {} ", display_comma_separated(partition_by))?;
1254+
}
1255+
if !order_by.is_empty() {
1256+
write!(f, "ORDER BY {} ", display_comma_separated(order_by))?;
1257+
}
1258+
if !measures.is_empty() {
1259+
write!(f, "MEASURES {} ", display_comma_separated(measures))?;
1260+
}
1261+
if let Some(rows_per_match) = rows_per_match {
1262+
write!(f, "{rows_per_match} ")?;
1263+
}
1264+
if let Some(after_match_skip) = after_match_skip {
1265+
write!(f, "{after_match_skip} ")?;
1266+
}
1267+
write!(f, "PATTERN ({pattern}) ")?;
1268+
write!(f, "DEFINE {})", display_comma_separated(symbols))?;
1269+
if alias.is_some() {
1270+
write!(f, " AS {}", alias.as_ref().unwrap())?;
1271+
}
1272+
Ok(())
1273+
}
10081274
}
10091275
}
10101276
}

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ impl Dialect for GenericDialect {
3939
true
4040
}
4141

42+
fn supports_match_recognize(&self) -> bool {
43+
true
44+
}
45+
4246
fn supports_start_transaction_modifier(&self) -> bool {
4347
true
4448
}

src/dialect/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ pub trait Dialect: Debug + Any {
152152
fn supports_group_by_expr(&self) -> bool {
153153
false
154154
}
155+
/// Returns true if the dialect supports the MATCH_RECOGNIZE operation.
156+
fn supports_match_recognize(&self) -> bool {
157+
false
158+
}
155159
/// Returns true if the dialect supports `(NOT) IN ()` expressions
156160
fn supports_in_empty_list(&self) -> bool {
157161
false

src/dialect/snowflake.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ impl Dialect for SnowflakeDialect {
5555
true
5656
}
5757

58+
fn supports_match_recognize(&self) -> bool {
59+
true
60+
}
61+
5862
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
5963
if parser.parse_keyword(Keyword::CREATE) {
6064
// possibly CREATE STAGE

src/keywords.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ define_keywords!(
214214
DEFAULT,
215215
DEFERRABLE,
216216
DEFERRED,
217+
DEFINE,
217218
DEFINED,
218219
DELAYED,
219220
DELETE,
@@ -418,9 +419,12 @@ define_keywords!(
418419
MAP,
419420
MATCH,
420421
MATCHED,
422+
MATCHES,
423+
MATCH_RECOGNIZE,
421424
MATERIALIZED,
422425
MAX,
423426
MAXVALUE,
427+
MEASURES,
424428
MEDIUMINT,
425429
MEMBER,
426430
MERGE,
@@ -482,7 +486,9 @@ define_keywords!(
482486
OF,
483487
OFFSET,
484488
OLD,
489+
OMIT,
485490
ON,
491+
ONE,
486492
ONLY,
487493
OPEN,
488494
OPERATOR,
@@ -509,8 +515,10 @@ define_keywords!(
509515
PARTITIONED,
510516
PARTITIONS,
511517
PASSWORD,
518+
PAST,
512519
PATH,
513520
PATTERN,
521+
PER,
514522
PERCENT,
515523
PERCENTILE_CONT,
516524
PERCENTILE_DISC,
@@ -712,6 +720,7 @@ define_keywords!(
712720
UNLOAD,
713721
UNLOCK,
714722
UNLOGGED,
723+
UNMATCHED,
715724
UNNEST,
716725
UNPIVOT,
717726
UNSAFE,
@@ -808,6 +817,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
808817
Keyword::FOR,
809818
// for MYSQL PARTITION SELECTION
810819
Keyword::PARTITION,
820+
// Reserved for snowflake MATCH_RECOGNIZE
821+
Keyword::MATCH_RECOGNIZE,
811822
];
812823

813824
/// Can't be used as a column alias, so that `SELECT <expr> alias`

0 commit comments

Comments
 (0)