Skip to content

Commit 777387e

Browse files
Alexander Beediealexander-beedie
authored andcommitted
Optimise prefix/identifier parsing
1 parent 0b589b2 commit 777387e

1 file changed

Lines changed: 62 additions & 33 deletions

File tree

src/parser/mod.rs

Lines changed: 62 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,13 +1615,13 @@ impl<'a> Parser<'a> {
16151615
/// Tries to parse an expression by a word that is not known to have a special meaning in the dialect.
16161616
fn parse_expr_prefix_by_unreserved_word(
16171617
&mut self,
1618-
w: &Word,
1618+
w: Word,
16191619
w_span: Span,
16201620
) -> Result<Expr, ParserError> {
16211621
let is_outer_join = self.peek_outer_join_operator();
16221622
match &self.peek_token_ref().token {
16231623
Token::LParen if !is_outer_join => {
1624-
let id_parts = vec![w.to_ident(w_span)];
1624+
let id_parts = vec![w.into_ident(w_span)];
16251625
self.parse_function(ObjectName::from(id_parts))
16261626
}
16271627
// string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
@@ -1631,7 +1631,7 @@ impl<'a> Parser<'a> {
16311631
if w.value.starts_with('_') =>
16321632
{
16331633
Ok(Expr::Prefixed {
1634-
prefix: w.to_ident(w_span),
1634+
prefix: w.into_ident(w_span),
16351635
value: self.parse_introduced_string_expr()?.into(),
16361636
})
16371637
}
@@ -1642,7 +1642,7 @@ impl<'a> Parser<'a> {
16421642
if w.value.starts_with('_') =>
16431643
{
16441644
Ok(Expr::Prefixed {
1645-
prefix: w.to_ident(w_span),
1645+
prefix: w.into_ident(w_span),
16461646
value: self.parse_introduced_string_expr()?.into(),
16471647
})
16481648
}
@@ -1653,7 +1653,7 @@ impl<'a> Parser<'a> {
16531653
self.expect_token(&Token::Arrow)?;
16541654
Ok(Expr::Lambda(LambdaFunction {
16551655
params: OneOrManyWithParens::One(LambdaFunctionParameter {
1656-
name: w.to_ident(w_span),
1656+
name: w.into_ident(w_span),
16571657
data_type: None,
16581658
}),
16591659
body: Box::new(self.parse_expr()?),
@@ -1671,14 +1671,14 @@ impl<'a> Parser<'a> {
16711671
self.expect_token(&Token::Arrow)?;
16721672
Ok(Expr::Lambda(LambdaFunction {
16731673
params: OneOrManyWithParens::One(LambdaFunctionParameter {
1674-
name: w.to_ident(w_span),
1674+
name: w.into_ident(w_span),
16751675
data_type: Some(data_type),
16761676
}),
16771677
body: Box::new(self.parse_expr()?),
16781678
syntax: LambdaSyntax::Arrow,
16791679
}))
16801680
}
1681-
_ => Ok(Expr::Identifier(w.to_ident(w_span))),
1681+
_ => Ok(Expr::Identifier(w.into_ident(w_span))),
16821682
}
16831683
}
16841684

@@ -1756,31 +1756,60 @@ impl<'a> Parser<'a> {
17561756
// ^^^^^^^^^^^^^^^^ ^^^^^^^^
17571757
// interval expression identifier
17581758
//
1759-
// We first try to parse the word and following tokens as a special expression, and if that fails,
1760-
// we rollback and try to parse it as an identifier.
1761-
let w = w.clone();
1762-
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
1763-
// This word indicated an expression prefix and parsing was successful
1764-
Ok(Some(expr)) => Ok(expr),
1765-
1766-
// No expression prefix associated with this word
1767-
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, span)?),
1768-
1769-
// If parsing of the word as a special expression failed, we are facing two options:
1770-
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`)
1771-
// 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1772-
// We first try to parse the word as an identifier and if that fails
1773-
// we rollback and return the parsing error we got from trying to parse a
1774-
// special expression (to maintain backwards compatibility of parsing errors).
1775-
Err(e) => {
1776-
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1777-
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
1778-
parser.parse_expr_prefix_by_unreserved_word(&w, span)
1779-
}) {
1780-
return Ok(expr);
1759+
if w.keyword == Keyword::NoKeyword {
1760+
// Fast path: for non-keyword words not followed by
1761+
// special tokens, produce an identifier directly.
1762+
let peek = &self.peek_token_ref().token;
1763+
let is_special = matches!(
1764+
peek,
1765+
Token::LParen
1766+
| Token::Arrow
1767+
| Token::SingleQuotedString(_)
1768+
| Token::DoubleQuotedString(_)
1769+
| Token::HexStringLiteral(_)
1770+
);
1771+
// Typed lambda: `a INT -> a * 2`
1772+
let is_typed_lambda = matches!(peek, Token::Word(_))
1773+
&& self.dialect.supports_lambda_functions()
1774+
&& self.peek_nth_token_ref(1).token == Token::Arrow;
1775+
if !is_special && !is_typed_lambda {
1776+
Ok(Expr::Identifier(w.to_ident(span)))
1777+
} else {
1778+
// Non-keyword followed by special token (e.g. function call)
1779+
let w = w.clone();
1780+
Ok(self.parse_expr_prefix_by_unreserved_word(w, span)?)
1781+
}
1782+
} else {
1783+
// We first try to parse the word and following tokens as a special
1784+
// expression, and if that fails, we rollback and try to parse it
1785+
// as an identifier.
1786+
let w = w.clone();
1787+
match self
1788+
.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span))
1789+
{
1790+
// This word indicated an expression prefix and parsing was successful
1791+
Ok(Some(expr)) => Ok(expr),
1792+
1793+
// No expression prefix associated with this word
1794+
Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(w, span)?),
1795+
1796+
// If parsing of the word as a special expression failed, we are facing
1797+
// two options:
1798+
// 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI`
1799+
// 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl`
1800+
// We first try to parse the word as an identifier and if that fails
1801+
// we rollback and return the parsing error we got from trying to parse a
1802+
// special expression (to maintain backwards compatibility of parsing errors).
1803+
Err(e) => {
1804+
if !self.dialect.is_reserved_for_identifier(w.keyword) {
1805+
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
1806+
parser.parse_expr_prefix_by_unreserved_word(w, span)
1807+
}) {
1808+
return Ok(expr);
1809+
}
17811810
}
1811+
return Err(e);
17821812
}
1783-
return Err(e);
17841813
}
17851814
}
17861815
} // End of Token::Word
@@ -5016,7 +5045,7 @@ impl<'a> Parser<'a> {
50165045
/// Returns `Ok(None)` if `f` returns any other error.
50175046
pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
50185047
where
5019-
F: FnMut(&mut Parser) -> Result<T, ParserError>,
5048+
F: FnOnce(&mut Parser) -> Result<T, ParserError>,
50205049
{
50215050
match self.try_parse(f) {
50225051
Ok(t) => Ok(Some(t)),
@@ -5026,9 +5055,9 @@ impl<'a> Parser<'a> {
50265055
}
50275056

50285057
/// Run a parser method `f`, reverting back to the current position if unsuccessful.
5029-
pub fn try_parse<T, F>(&mut self, mut f: F) -> Result<T, ParserError>
5058+
pub fn try_parse<T, F>(&mut self, f: F) -> Result<T, ParserError>
50305059
where
5031-
F: FnMut(&mut Parser) -> Result<T, ParserError>,
5060+
F: FnOnce(&mut Parser) -> Result<T, ParserError>,
50325061
{
50335062
let index = self.index;
50345063
match f(self) {

0 commit comments

Comments
 (0)