Skip to content

Commit cb1cb49

Browse files
committed
Support PARTITIONED BY with optional column types
Databricks allows partition columns to be specified without types when referencing columns already defined in the table specification: CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1) CREATE TABLE t (name STRING) PARTITIONED BY (year INT, month INT) This change introduces parse_column_def_for_partition() which makes the data type optional by checking if the next token is a comma or closing paren (indicating no type follows the column name).
1 parent 839fd80 commit cb1cb49

2 files changed

Lines changed: 94 additions & 1 deletion

File tree

src/parser/mod.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7880,14 +7880,41 @@ impl<'a> Parser<'a> {
78807880
pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
78817881
if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
78827882
self.expect_token(&Token::LParen)?;
7883-
let columns = self.parse_comma_separated(Parser::parse_column_def)?;
7883+
let columns = self.parse_comma_separated(Parser::parse_column_def_for_partition)?;
78847884
self.expect_token(&Token::RParen)?;
78857885
Ok(HiveDistributionStyle::PARTITIONED { columns })
78867886
} else {
78877887
Ok(HiveDistributionStyle::NONE)
78887888
}
78897889
}
78907890

7891+
/// Parse column definition for PARTITIONED BY clause.
7892+
///
7893+
/// Databricks allows partition columns without types when referencing
7894+
/// columns already defined in the table specification:
7895+
/// ```sql
7896+
/// CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)
7897+
/// CREATE TABLE t (col1 STRING) PARTITIONED BY (col2 INT)
7898+
/// ```
7899+
///
7900+
/// See [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html)
7901+
fn parse_column_def_for_partition(&mut self) -> Result<ColumnDef, ParserError> {
7902+
let name = self.parse_identifier()?;
7903+
7904+
// Check if the next token indicates there's no type specified
7905+
// (comma or closing paren means end of this column definition)
7906+
let data_type = match self.peek_token().token {
7907+
Token::Comma | Token::RParen => DataType::Unspecified,
7908+
_ => self.parse_data_type()?,
7909+
};
7910+
7911+
Ok(ColumnDef {
7912+
name,
7913+
data_type,
7914+
options: vec![],
7915+
})
7916+
}
7917+
78917918
/// Parse Hive formats.
78927919
pub fn parse_hive_formats(&mut self) -> Result<Option<HiveFormat>, ParserError> {
78937920
let mut hive_format: Option<HiveFormat> = None;

tests/sqlparser_databricks.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,3 +456,69 @@ fn parse_optimize_table() {
456456
ParserError::ParserError("Expected: an expression, found: )".to_string())
457457
);
458458
}
459+
460+
#[test]
461+
fn parse_create_table_partitioned_by() {
462+
// Databricks allows PARTITIONED BY with just column names (referencing existing columns)
463+
// https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html
464+
465+
// Single partition column without type
466+
databricks().verified_stmt("CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)");
467+
468+
// Multiple partition columns without types
469+
databricks()
470+
.verified_stmt("CREATE TABLE t (col1 STRING, col2 INT, col3 DATE) PARTITIONED BY (col1, col2)");
471+
472+
// Partition columns with types (new columns not in table spec)
473+
databricks().verified_stmt("CREATE TABLE t (name STRING) PARTITIONED BY (year INT, month INT)");
474+
475+
// Mixed: some with types, some without
476+
databricks().verified_stmt(
477+
"CREATE TABLE t (id INT, name STRING) PARTITIONED BY (region, year INT)",
478+
);
479+
480+
// Verify AST structure for column without type
481+
match databricks()
482+
.verified_stmt("CREATE TABLE t (col1 STRING) PARTITIONED BY (col1)")
483+
{
484+
Statement::CreateTable(CreateTable {
485+
name,
486+
columns,
487+
hive_distribution,
488+
..
489+
}) => {
490+
assert_eq!(name.to_string(), "t");
491+
assert_eq!(columns.len(), 1);
492+
assert_eq!(columns[0].name.to_string(), "col1");
493+
match hive_distribution {
494+
HiveDistributionStyle::PARTITIONED { columns: partition_cols } => {
495+
assert_eq!(partition_cols.len(), 1);
496+
assert_eq!(partition_cols[0].name.to_string(), "col1");
497+
assert_eq!(partition_cols[0].data_type, DataType::Unspecified);
498+
}
499+
_ => unreachable!(),
500+
}
501+
}
502+
_ => unreachable!(),
503+
}
504+
505+
// Verify AST structure for column with type
506+
match databricks()
507+
.verified_stmt("CREATE TABLE t (name STRING) PARTITIONED BY (year INT)")
508+
{
509+
Statement::CreateTable(CreateTable {
510+
hive_distribution,
511+
..
512+
}) => {
513+
match hive_distribution {
514+
HiveDistributionStyle::PARTITIONED { columns: partition_cols } => {
515+
assert_eq!(partition_cols.len(), 1);
516+
assert_eq!(partition_cols[0].name.to_string(), "year");
517+
assert_eq!(partition_cols[0].data_type, DataType::Int(None));
518+
}
519+
_ => unreachable!(),
520+
}
521+
}
522+
_ => unreachable!(),
523+
}
524+
}

0 commit comments

Comments
 (0)