Skip to content

Commit 589f832

Browse files
author
Альберт Скальт
committed
add ability to map tokens during tokenization
This patch adds a method to map tokens with provided mapper during tokenization. This way tokens could be replaced without an additional pass.
1 parent e4c5500 commit 589f832

1 file changed

Lines changed: 49 additions & 2 deletions

File tree

src/tokenizer.rs

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -925,7 +925,18 @@ impl<'a> Tokenizer<'a> {
925925
/// Tokenize the statement and produce a vector of tokens with location information
926926
pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithSpan>, TokenizerError> {
927927
let mut tokens: Vec<TokenWithSpan> = vec![];
928-
self.tokenize_with_location_into_buf(&mut tokens)
928+
self.tokenize_with_location_into_buf(&mut tokens, |token| token)
929+
.map(|_| tokens)
930+
}
931+
932+
/// Tokenize the statement and produce a vector of tokens, mapping each token
933+
/// with provided `mapper`
934+
pub fn tokenize_map(
935+
&mut self,
936+
mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan,
937+
) -> Result<Vec<TokenWithSpan>, TokenizerError> {
938+
let mut tokens: Vec<TokenWithSpan> = vec![];
939+
self.tokenize_with_location_into_buf(&mut tokens, mapper)
929940
.map(|_| tokens)
930941
}
931942

@@ -934,6 +945,7 @@ impl<'a> Tokenizer<'a> {
934945
pub fn tokenize_with_location_into_buf(
935946
&mut self,
936947
buf: &mut Vec<TokenWithSpan>,
948+
mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan,
937949
) -> Result<(), TokenizerError> {
938950
let mut state = State {
939951
peekable: self.query.chars().peekable(),
@@ -945,7 +957,8 @@ impl<'a> Tokenizer<'a> {
945957
while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
946958
let span = location.span_to(state.location());
947959

948-
buf.push(TokenWithSpan { token, span });
960+
let token = TokenWithSpan { token, span };
961+
buf.push(mapper(token));
949962

950963
location = state.location();
951964
}
@@ -2590,6 +2603,40 @@ mod tests {
25902603
compare(expected, tokens);
25912604
}
25922605

2606+
#[test]
2607+
fn tokenize_with_mapper() {
2608+
let sql = String::from("SELECT ?");
2609+
let dialect = GenericDialect {};
2610+
let mut param_num = 1;
2611+
2612+
let tokens = Tokenizer::new(&dialect, &sql)
2613+
.tokenize_map(|mut token_span| {
2614+
token_span.token = match token_span.token {
2615+
Token::Placeholder(n) => Token::Placeholder(if n == "?" {
2616+
let ret = format!("${}", param_num);
2617+
param_num += 1;
2618+
ret
2619+
} else {
2620+
n
2621+
}),
2622+
token => token,
2623+
};
2624+
token_span
2625+
})
2626+
.unwrap()
2627+
.into_iter()
2628+
.map(|t| t.token)
2629+
.collect();
2630+
2631+
let expected = vec![
2632+
Token::make_keyword("SELECT"),
2633+
Token::Whitespace(Whitespace::Space),
2634+
Token::Placeholder("$1".to_string()),
2635+
];
2636+
2637+
compare(expected, tokens);
2638+
}
2639+
25932640
#[test]
25942641
fn tokenize_clickhouse_double_equal() {
25952642
let sql = String::from("SELECT foo=='1'");

0 commit comments

Comments
 (0)