Skip to content

Commit 5d9eb33

Browse files
author
Alexander Beedie
committed
Optimise prefix/identifier parsing
1 parent e81eb14 commit 5d9eb33

1 file changed

Lines changed: 62 additions & 33 deletions

File tree

src/parser/mod.rs

Lines changed: 62 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,12 +1615,12 @@ impl<'a> Parser<'a> {
16151615
/// Tries to parse an expression by a word that is not known to have a special meaning in the dialect.
16161616
fn parse_expr_prefix_by_unreserved_word(
16171617
&mut self,
1618-
w: &Word,
1618+
w: Word,
16191619
w_span: Span,
16201620
) -> Result<Expr, ParserError> {
16211621
match self.peek_token().token {
16221622
Token::LParen if !self.peek_outer_join_operator() => {
1623-
let id_parts = vec![w.to_ident(w_span)];
1623+
let id_parts = vec![w.into_ident(w_span)];
16241624
self.parse_function(ObjectName::from(id_parts))
16251625
}
16261626
// string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
@@ -1630,7 +1630,7 @@ impl<'a> Parser<'a> {
16301630
if w.value.starts_with('_') =>
16311631
{
16321632
Ok(Expr::Prefixed {
1633-
prefix: w.to_ident(w_span),
1633+
prefix: w.into_ident(w_span),
16341634
value: self.parse_introduced_string_expr()?.into(),
16351635
})
16361636
}
@@ -1641,7 +1641,7 @@ impl<'a> Parser<'a> {
16411641
if w.value.starts_with('_') =>
16421642
{
16431643
Ok(Expr::Prefixed {
1644-
prefix: w.to_ident(w_span),
1644+
prefix: w.into_ident(w_span),
16451645
value: self.parse_introduced_string_expr()?.into(),
16461646
})
16471647
}
@@ -1652,7 +1652,7 @@ impl<'a> Parser<'a> {
16521652
self.expect_token(&Token::Arrow)?;
16531653
Ok(Expr::Lambda(LambdaFunction {
16541654
params: OneOrManyWithParens::One(LambdaFunctionParameter {
1655-
name: w.to_ident(w_span),
1655+
name: w.into_ident(w_span),
16561656
data_type: None,
16571657
}),
16581658
body: Box::new(self.parse_expr()?),
@@ -1670,14 +1670,14 @@ impl<'a> Parser<'a> {
16701670
self.expect_token(&Token::Arrow)?;
16711671
Ok(Expr::Lambda(LambdaFunction {
16721672
params: OneOrManyWithParens::One(LambdaFunctionParameter {
1673-
name: w.to_ident(w_span),
1673+
name: w.into_ident(w_span),
16741674
data_type: Some(data_type),
16751675
}),
16761676
body: Box::new(self.parse_expr()?),
16771677
syntax: LambdaSyntax::Arrow,
16781678
}))
16791679
}
1680-
_ => Ok(Expr::Identifier(w.to_ident(w_span))),
1680+
_ => Ok(Expr::Identifier(w.into_ident(w_span))),
16811681
}
16821682
}
16831683

@@ -1755,31 +1755,60 @@ impl<'a> Parser<'a> {
17551755
// ^^^^^^^^^^^^^^^^ ^^^^^^^^
17561756
// interval expression identifier
17571757
//
1758-
// We first try to parse the word and following tokens as a special expression, and if that fails,
1759-
// we rollback and try to parse it as an identifier.
1760-
let w = w.clone();
1761-
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
1762-
// This word indicated an expression prefix and parsing was successful
1763-
Ok(Some(expr)) => Ok(expr),
1764-
1765-
// No expression prefix associated with this word
1766-
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, span)?),
1767-
1768-
// If parsing of the word as a special expression failed, we are facing two options:
1769-
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`)
1770-
// 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1771-
// We first try to parse the word as an identifier and if that fails
1772-
// we rollback and return the parsing error we got from trying to parse a
1773-
// special expression (to maintain backwards compatibility of parsing errors).
1774-
Err(e) => {
1775-
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1776-
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
1777-
parser.parse_expr_prefix_by_unreserved_word(&w, span)
1778-
}) {
1779-
return Ok(expr);
1758+
if w.keyword == Keyword::NoKeyword {
1759+
// Fast path: for non-keyword words not followed by
1760+
// special tokens, produce an identifier directly.
1761+
let peek = &self.peek_token_ref().token;
1762+
let is_special = matches!(
1763+
peek,
1764+
Token::LParen
1765+
| Token::Arrow
1766+
| Token::SingleQuotedString(_)
1767+
| Token::DoubleQuotedString(_)
1768+
| Token::HexStringLiteral(_)
1769+
);
1770+
// Typed lambda: `a INT -> a * 2`
1771+
let is_typed_lambda = matches!(peek, Token::Word(_))
1772+
&& self.dialect.supports_lambda_functions()
1773+
&& self.peek_nth_token_ref(1).token == Token::Arrow;
1774+
if !is_special && !is_typed_lambda {
1775+
Ok(Expr::Identifier(w.to_ident(span)))
1776+
} else {
1777+
// Non-keyword followed by special token (e.g. function call)
1778+
let w = w.clone();
1779+
Ok(self.parse_expr_prefix_by_unreserved_word(w, span)?)
1780+
}
1781+
} else {
1782+
// We first try to parse the word and following tokens as a special
1783+
// expression, and if that fails, we rollback and try to parse it
1784+
// as an identifier.
1785+
let w = w.clone();
1786+
match self
1787+
.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span))
1788+
{
1789+
// This word indicated an expression prefix and parsing was successful
1790+
Ok(Some(expr)) => Ok(expr),
1791+
1792+
// No expression prefix associated with this word
1793+
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(w, span)?),
1794+
1795+
// If parsing of the word as a special expression failed, we are facing
1796+
// two options:
1797+
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI`
1798+
// 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1799+
// We first try to parse the word as an identifier and if that fails
1800+
// we rollback and return the parsing error we got from trying to parse a
1801+
// special expression (to maintain backwards compatibility of parsing errors).
1802+
Err(e) => {
1803+
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1804+
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
1805+
parser.parse_expr_prefix_by_unreserved_word(w, span)
1806+
}) {
1807+
return Ok(expr);
1808+
}
17801809
}
1810+
return Err(e);
17811811
}
1782-
return Err(e);
17831812
}
17841813
}
17851814
} // End of Token::Word
@@ -5015,7 +5044,7 @@ impl<'a> Parser<'a> {
50155044
/// Returns `Ok(None)` if `f` returns any other error.
50165045
pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
50175046
where
5018-
F: FnMut(&mut Parser) -> Result<T, ParserError>,
5047+
F: FnOnce(&mut Parser) -> Result<T, ParserError>,
50195048
{
50205049
match self.try_parse(f) {
50215050
Ok(t) => Ok(Some(t)),
@@ -5025,9 +5054,9 @@ impl<'a> Parser<'a> {
50255054
}
50265055

50275056
/// Run a parser method `f`, reverting back to the current position if unsuccessful.
5028-
pub fn try_parse<T, F>(&mut self, mut f: F) -> Result<T, ParserError>
5057+
pub fn try_parse<T, F>(&mut self, f: F) -> Result<T, ParserError>
50295058
where
5030-
F: FnMut(&mut Parser) -> Result<T, ParserError>,
5059+
F: FnOnce(&mut Parser) -> Result<T, ParserError>,
50315060
{
50325061
let index = self.index;
50335062
match f(self) {

0 commit comments

Comments
 (0)