Skip to content

Commit cdc7f6b

Browse files
committed
smarter number literal parsing with state machine for scientific notation
1 parent 5190fa3 commit cdc7f6b

1 file changed

Lines changed: 55 additions & 13 deletions

File tree

src/tokenizer.rs

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -382,20 +382,9 @@ impl<'a> Tokenizer<'a> {
382382
}
383383
}
384384
// numbers
385-
'0'..='9' => {
385+
'0'..='9' | '.' => {
386386
// TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal
387-
let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.'));
388-
Ok(Some(Token::Number(s)))
389-
}
390-
'.' => {
391-
let dot = self.consume_and_return(chars, Token::Period).unwrap();
392-
// try and see if this is a number
393-
let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9'));
394-
if s.len() > 0 {
395-
Ok(Some(Token::Number(format!(".{}", s))))
396-
} else {
397-
Ok(dot)
398-
}
387+
Ok(Some(consume_number_literal_or_dot(chars, ch)))
399388
}
400389
// punctuation
401390
'(' => self.consume_and_return(chars, Token::LParen),
@@ -635,6 +624,59 @@ fn peeking_take_while(
635624
s
636625
}
637626

627+
/// handle parsing numbers, including scientific notation
628+
/// https://docs.snowflake.com/en/sql-reference/data-types-numeric.html
629+
fn consume_number_literal_or_dot(chars: &mut Peekable<Chars<'_>>, first: char) -> Token {
630+
let mut s = String::new();
631+
chars.next(); // consume
632+
s.push(first);
633+
#[derive(PartialEq)]
634+
enum NumState {
635+
WholeNum, // we look for digits or . or e
636+
Decimal, // we look for digits or e
637+
ExponentStart, // we look for either a +- sign or digits
638+
Exponent, // we only look for digits
639+
}
640+
let mut num_state = if first == '.' {
641+
NumState::Decimal
642+
} else {
643+
NumState::WholeNum
644+
};
645+
let mut is_second_char = true;
646+
while let Some(&ch) = chars.peek() {
647+
if num_state == NumState::Decimal && is_second_char && !matches!(ch, '0'..='9') {
648+
return Token::Period;
649+
}
650+
let add_to_string = match num_state {
651+
NumState::WholeNum | NumState::Decimal => match ch {
652+
'0'..='9' => true,
653+
'.' if num_state == NumState::WholeNum => {
654+
num_state = NumState::Decimal;
655+
true
656+
}
657+
'e' | 'E' => {
658+
num_state = NumState::ExponentStart;
659+
true
660+
}
661+
_ => false,
662+
},
663+
NumState::ExponentStart => {
664+
num_state = NumState::Exponent;
665+
matches!(ch, '0'..='9' | '-' | '+')
666+
}
667+
NumState::Exponent => matches!(ch, '0'..='9'),
668+
};
669+
if add_to_string {
670+
chars.next(); // consume
671+
s.push(ch);
672+
} else {
673+
break;
674+
}
675+
is_second_char = false;
676+
}
677+
Token::Number(s)
678+
}
679+
638680
#[cfg(test)]
639681
mod tests {
640682
use super::super::dialect::GenericDialect;

0 commit comments

Comments
 (0)