Skip to content

Commit 7cb1654

Browse files
Add support for PostgreSQL ^@ starts-with operator (apache#1091)
1 parent a71b3f5 commit 7cb1654

4 files changed

Lines changed: 26 additions & 8 deletions

File tree

src/ast/operator.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ pub enum BinaryOperator {
131131
PGRegexNotMatch,
132132
/// String does not match regular expression (case insensitively), e.g. `a !~* b` (PostgreSQL-specific)
133133
PGRegexNotIMatch,
134+
/// String "starts with", eg: `a ^@ b` (PostgreSQL-specific)
135+
PGStartsWith,
134136
/// PostgreSQL-specific custom operator.
135137
///
136138
/// See [CREATE OPERATOR](https://www.postgresql.org/docs/current/sql-createoperator.html)
@@ -172,6 +174,7 @@ impl fmt::Display for BinaryOperator {
172174
BinaryOperator::PGRegexIMatch => f.write_str("~*"),
173175
BinaryOperator::PGRegexNotMatch => f.write_str("!~"),
174176
BinaryOperator::PGRegexNotIMatch => f.write_str("!~*"),
177+
BinaryOperator::PGStartsWith => f.write_str("^@"),
175178
BinaryOperator::PGCustomBinaryOperator(idents) => {
176179
write!(f, "OPERATOR({})", display_separated(idents, "."))
177180
}

src/parser/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2196,6 +2196,9 @@ impl<'a> Parser<'a> {
21962196
Token::Overlap if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
21972197
Some(BinaryOperator::PGOverlap)
21982198
}
2199+
Token::CaretAt if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
2200+
Some(BinaryOperator::PGStartsWith)
2201+
}
21992202
Token::Tilde => Some(BinaryOperator::PGRegexMatch),
22002203
Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch),
22012204
Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch),
@@ -2630,6 +2633,7 @@ impl<'a> Parser<'a> {
26302633
| Token::LongArrow
26312634
| Token::Arrow
26322635
| Token::Overlap
2636+
| Token::CaretAt
26332637
| Token::HashArrow
26342638
| Token::HashLongArrow
26352639
| Token::AtArrow

src/tokenizer.rs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ pub enum Token {
6060
DoubleQuotedString(String),
6161
/// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
6262
DollarQuotedString(DollarQuotedString),
63-
/// Byte string literal: i.e: b'string' or B'string'
63+
/// Byte string literal: i.e: b'string' or B'string' (note that some backends, such as
64+
/// PostgreSQL, may treat this syntax as a bit string literal instead, i.e: b'10010101')
6465
SingleQuotedByteStringLiteral(String),
6566
/// Byte string literal: i.e: b"string" or B"string"
6667
DoubleQuotedByteStringLiteral(String),
@@ -114,7 +115,7 @@ pub enum Token {
114115
Period,
115116
/// Colon `:`
116117
Colon,
117-
/// DoubleColon `::` (used for casting in postgresql)
118+
/// DoubleColon `::` (used for casting in PostgreSQL)
118119
DoubleColon,
119120
/// Assignment `:=` (used for keyword argument in DuckDB macros)
120121
DuckAssignment,
@@ -152,27 +153,29 @@ pub enum Token {
152153
ShiftLeft,
153154
/// `>>`, a bitwise shift right operator in PostgreSQL
154155
ShiftRight,
155-
/// '&&', an overlap operator in PostgreSQL
156+
/// `&&`, an overlap operator in PostgreSQL
156157
Overlap,
157158
/// Exclamation Mark `!` used for PostgreSQL factorial operator
158159
ExclamationMark,
159160
/// Double Exclamation Mark `!!` used for PostgreSQL prefix factorial operator
160161
DoubleExclamationMark,
161162
/// AtSign `@` used for PostgreSQL abs operator
162163
AtSign,
164+
/// `^@`, a "starts with" string operator in PostgreSQL
165+
CaretAt,
163166
/// `|/`, a square root math operator in PostgreSQL
164167
PGSquareRoot,
165168
/// `||/`, a cube root math operator in PostgreSQL
166169
PGCubeRoot,
167170
/// `?` or `$` , a prepared statement arg placeholder
168171
Placeholder(String),
169-
/// ->, used as a operator to extract json field in PostgreSQL
172+
/// `->`, used as a operator to extract json field in PostgreSQL
170173
Arrow,
171-
/// ->>, used as a operator to extract json field as text in PostgreSQL
174+
/// `->>`, used as a operator to extract json field as text in PostgreSQL
172175
LongArrow,
173-
/// #> Extracts JSON sub-object at the specified path
176+
/// `#>`, extracts JSON sub-object at the specified path
174177
HashArrow,
175-
/// #>> Extracts JSON sub-object at the specified path as text
178+
/// `#>>`, extracts JSON sub-object at the specified path as text
176179
HashLongArrow,
177180
/// jsonb @> jsonb -> boolean: Test whether left json contains the right json
178181
AtArrow,
@@ -247,6 +250,7 @@ impl fmt::Display for Token {
247250
Token::ExclamationMarkTilde => f.write_str("!~"),
248251
Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"),
249252
Token::AtSign => f.write_str("@"),
253+
Token::CaretAt => f.write_str("^@"),
250254
Token::ShiftLeft => f.write_str("<<"),
251255
Token::ShiftRight => f.write_str(">>"),
252256
Token::Overlap => f.write_str("&&"),
@@ -940,7 +944,13 @@ impl<'a> Tokenizer<'a> {
940944
_ => Ok(Some(Token::Ampersand)),
941945
}
942946
}
943-
'^' => self.consume_and_return(chars, Token::Caret),
947+
'^' => {
948+
chars.next(); // consume the '^'
949+
match chars.peek() {
950+
Some('@') => self.consume_and_return(chars, Token::CaretAt),
951+
_ => Ok(Some(Token::Caret)),
952+
}
953+
}
944954
'{' => self.consume_and_return(chars, Token::LBrace),
945955
'}' => self.consume_and_return(chars, Token::RBrace),
946956
'#' if dialect_of!(self is SnowflakeDialect) => {

tests/sqlparser_postgres.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,7 @@ fn parse_pg_binary_ops() {
17281728
(">>", BinaryOperator::PGBitwiseShiftRight, pg_and_generic()),
17291729
("<<", BinaryOperator::PGBitwiseShiftLeft, pg_and_generic()),
17301730
("&&", BinaryOperator::PGOverlap, pg()),
1731+
("^@", BinaryOperator::PGStartsWith, pg()),
17311732
];
17321733

17331734
for (str_op, op, dialects) in binary_ops {

0 commit comments

Comments
 (0)