Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ast/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,7 @@ pub enum ExactNumberInfo {
/// Only precision information, e.g. `DECIMAL(10)`
Precision(u64),
/// Precision and scale information, e.g. `DECIMAL(10,2)`
PrecisionAndScale(u64, u64),
PrecisionAndScale(u64, i64),
}

impl fmt::Display for ExactNumberInfo {
Expand Down
84 changes: 80 additions & 4 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11213,7 +11213,7 @@ impl<'a> Parser<'a> {
if self.consume_token(&Token::LParen) {
let precision = self.parse_literal_uint()?;
let scale = if self.consume_token(&Token::Comma) {
Some(self.parse_literal_uint()?)
Some(self.parse_signed_integer()?)
} else {
None
};
Expand All @@ -11229,6 +11229,30 @@ impl<'a> Parser<'a> {
}
}

/// Parse an optionally signed integer literal.
fn parse_signed_integer(&mut self) -> Result<i64, ParserError> {
let next_token = self.next_token();
let (sign, number_token) = match next_token.token {
Token::Minus => {
let number_token = self.next_token();
(-1, number_token)
}
Token::Plus => {
let number_token = self.next_token();
(1, number_token)
}
_ => (1, next_token),
};
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was the pattern in the previous comment example insufficient? I think the current approach to optionally multiply by negative makes things slightly longer to follow what the code is trying to achieve.

fyi note ideally we use next_token_ref since that avoid cloning, especially given that parse::<i64> does not require an owned value

Copy link
Copy Markdown
Contributor Author

@IndexSeek IndexSeek Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I tried to implement your suggested pattern, it failed on test cases with explicit + signs (like NUMERIC(10,+5)) because parse_literal_uint() expected a number token. I should have posted my issue earlier.

I actually went back and tested that PostgreSQL doesn't even take that (ERROR: type modifiers must be simple constants or identifiers), so I was overcomplicating it and I will remove this test case.

fyi note ideally we use next_token_ref since that avoid cloning, especially given that parse:: does not require an owned value

I did not find next_token_ref but it looks that advance_token() + get_current_token() will give us this behavior (#1618)?

Here's what I am working with right now after removing that explicit "+" test:

    fn parse_signed_integer(&mut self) -> Result<i64, ParserError> {
        if !self.consume_token(&Token::Minus) {
            return i64::try_from(self.parse_literal_uint()?)
                .map_err(|_| ParserError::ParserError("Integer overflow".to_string()));
        }

        self.advance_token();
        let next_token = self.get_current_token();
        match &next_token.token {
            Token::Number(s, _) => {
                let positive_value = Self::parse::<i64>(s.clone(), next_token.span.start)?;
                Ok(-positive_value)
            }
            _ => self.expected_ref("literal int", next_token),
        }
    }

I believe this should only clone the string data and not the tokens now.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah that makes sense, I confused peek_token_ref and was hoping to avoid the clone entirely but looking at Self::parse now I see its not currently set up to take in references so the current approach to clone the string sounds reasonable! Here's a couple minor updates to your example I think should do what we intended?

    fn parse_signed_integer(&mut self) -> Result<i64, ParserError> {
        if self.consume_token(&Token::Minus) {
            return i64::try_from(self.parse_literal_uint()?)
                .map(|v| -v)
                .or_else(|_| self.expected_ref("i64 literal", self.peek_token_ref()))
        }

        let _ = self.consume_token(&Token::Plus);
        self.advance_token();
        let next_token = self.get_current_token();
        match &next_token.token {
            Token::Number(s, _) => Self::parse::<i64>(s.clone(), next_token.span.start),
            _ => self.expected_ref("literal int", next_token),
        }
    }

Copy link
Copy Markdown
Contributor Author

@IndexSeek IndexSeek Aug 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion! It does read cleaner not starting with negation, looking back, opening with if !self.consume_token(&Token::Minus) felt a bit strange.

I like how your solution still handles the + sign, but I noticed a few issues with the logic flow. I think that with an optional + token on let _ = self.consume_token(&Token::Plus); followed by self.advance_token() and then self.get_current_token() could advance twice and potentially skip the actual number token.

I did take a look at peek_token_ref and was wondering if we could use it like so?

    fn parse_signed_integer(&mut self) -> Result<i64, ParserError> {
        let is_negative = self.consume_token(&Token::Minus);

        if !is_negative {
            let _ = self.consume_token(&Token::Plus);
        }

        let current_token = self.peek_token_ref();
        match &current_token.token {
            Token::Number(s, _) => {
                let s = s.clone();
                let span_start = current_token.span.start;
                self.advance_token();
                let value = Self::parse::<i64>(s, span_start)?;
                Ok(if is_negative { -value } else { value })
            }
            _ => self.expected_ref("number", current_token),
        }
    }

I think that should avoid us needing to consume the token until we know we have a number.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we still parse the "+" character or error out? I know I originally added a test for it but I'm not sure any dialects support a "+" token in the scale definition for numeric types.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah the last snippet sounds reasonable to me! I think we should still parse the + character since the idea is that the api can be used in other contexts beyond the numeric type scale

Copy link
Copy Markdown
Contributor Author

@IndexSeek IndexSeek Aug 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds good to me! I also added back that test that was explicitly checking to ensure that "+" would work and be removed when converted back to a string, but we can remove those lines if it is redundant.


match number_token.token {
Token::Number(s, _) => {
let value = Self::parse::<i64>(s, number_token.span.start)?;
Ok(sign * value)
}
_ => self.expected("number", number_token),
}
}

pub fn parse_optional_type_modifiers(&mut self) -> Result<Option<Vec<String>>, ParserError> {
if self.consume_token(&Token::LParen) {
let mut modifiers = Vec::new();
Expand Down Expand Up @@ -17069,7 +17093,7 @@ mod tests {
use crate::ast::{
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, ObjectName, TimezoneInfo,
};
use crate::dialect::{AnsiDialect, GenericDialect};
use crate::dialect::{AnsiDialect, GenericDialect, PostgreSqlDialect};
use crate::test_utils::TestedDialects;

macro_rules! test_parse_data_type {
Expand Down Expand Up @@ -17275,8 +17299,11 @@ mod tests {
#[test]
fn test_ansii_exact_numeric_types() {
// Exact numeric types: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type>
let dialect =
TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]);
let dialect = TestedDialects::new(vec![
Box::new(GenericDialect {}),
Box::new(AnsiDialect {}),
Box::new(PostgreSqlDialect {}),
]);

test_parse_data_type!(dialect, "NUMERIC", DataType::Numeric(ExactNumberInfo::None));

Expand Down Expand Up @@ -17319,6 +17346,55 @@ mod tests {
"DEC(2,10)",
DataType::Dec(ExactNumberInfo::PrecisionAndScale(2, 10))
);

// Test negative scale values.
test_parse_data_type!(
dialect,
"NUMERIC(10,-2)",
DataType::Numeric(ExactNumberInfo::PrecisionAndScale(10, -2))
);

test_parse_data_type!(
dialect,
"DECIMAL(1000,-10)",
DataType::Decimal(ExactNumberInfo::PrecisionAndScale(1000, -10))
);

test_parse_data_type!(
dialect,
"DEC(5,-1000)",
DataType::Dec(ExactNumberInfo::PrecisionAndScale(5, -1000))
);

// Additional negative scale test cases
test_parse_data_type!(
dialect,
"NUMERIC(10,-5)",
DataType::Numeric(ExactNumberInfo::PrecisionAndScale(10, -5))
);

test_parse_data_type!(
dialect,
"DECIMAL(20,-10)",
DataType::Decimal(ExactNumberInfo::PrecisionAndScale(20, -10))
);

test_parse_data_type!(
dialect,
"DEC(5,-2)",
DataType::Dec(ExactNumberInfo::PrecisionAndScale(5, -2))
);

// Test positive scale with explicit plus sign
dialect.run_parser_method("NUMERIC(10,+5)", |parser| {
let data_type = parser.parse_data_type().unwrap();
assert_eq!(
DataType::Numeric(ExactNumberInfo::PrecisionAndScale(10, 5)),
data_type
);
// Note: Explicit '+' sign is not preserved in output, which is correct
assert_eq!("NUMERIC(10,5)", data_type.to_string());
});
}

#[test]
Expand Down