Skip to content

Commit d72f0a9

Browse files
authored
Add APIs to reuse token buffers in Tokenizer (apache#1094)
1 parent b0b6288 commit d72f0a9

3 files changed

Lines changed: 32 additions & 4 deletions

File tree

src/parser/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8711,6 +8711,11 @@ impl<'a> Parser<'a> {
87118711
self.expect_token(&Token::RParen)?;
87128712
Ok(partitions)
87138713
}
8714+
8715+
/// Consume the parser and return its underlying token buffer
8716+
pub fn into_tokens(self) -> Vec<TokenWithLocation> {
8717+
self.tokens
8718+
}
87148719
}
87158720

87168721
impl Word {

src/tokenizer.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,21 +543,30 @@ impl<'a> Tokenizer<'a> {
543543

544544
/// Tokenize the statement and produce a vector of tokens with location information
545545
pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithLocation>, TokenizerError> {
546+
let mut tokens: Vec<TokenWithLocation> = vec![];
547+
self.tokenize_with_location_into_buf(&mut tokens)
548+
.map(|_| tokens)
549+
}
550+
551+
/// Tokenize the statement and append tokens with location information into the provided buffer.
552+
/// If an error is thrown, the buffer will contain all tokens that were successfully parsed before the error.
553+
pub fn tokenize_with_location_into_buf(
554+
&mut self,
555+
buf: &mut Vec<TokenWithLocation>,
556+
) -> Result<(), TokenizerError> {
546557
let mut state = State {
547558
peekable: self.query.chars().peekable(),
548559
line: 1,
549560
col: 1,
550561
};
551562

552-
let mut tokens: Vec<TokenWithLocation> = vec![];
553-
554563
let mut location = state.location();
555564
while let Some(token) = self.next_token(&mut state)? {
556-
tokens.push(TokenWithLocation { token, location });
565+
buf.push(TokenWithLocation { token, location });
557566

558567
location = state.location();
559568
}
560-
Ok(tokens)
569+
Ok(())
561570
}
562571

563572
// Tokenize the identifer or keywords in `ch`

tests/sqlparser_common.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use sqlparser::dialect::{
3131
};
3232
use sqlparser::keywords::ALL_KEYWORDS;
3333
use sqlparser::parser::{Parser, ParserError, ParserOptions};
34+
use sqlparser::tokenizer::Tokenizer;
3435
use test_utils::{
3536
all_dialects, alter_table_op, assert_eq_vec, expr_from_projection, join, number, only, table,
3637
table_alias, TestedDialects,
@@ -8080,3 +8081,16 @@ fn test_release_savepoint() {
80808081

80818082
one_statement_parses_to("RELEASE test1", "RELEASE SAVEPOINT test1");
80828083
}
8084+
8085+
#[test]
8086+
fn test_buffer_reuse() {
8087+
let d = GenericDialect {};
8088+
let q = "INSERT INTO customer WITH foo AS (SELECT 1) SELECT * FROM foo UNION VALUES (1)";
8089+
let mut buf = Vec::new();
8090+
Tokenizer::new(&d, q)
8091+
.tokenize_with_location_into_buf(&mut buf)
8092+
.unwrap();
8093+
let mut p = Parser::new(&d).with_tokens_with_locations(buf);
8094+
p.parse_statements().unwrap();
8095+
let _ = p.into_tokens();
8096+
}

0 commit comments

Comments
 (0)