Support PARTITIONED BY with optional column types

funcpp · funcpp · commit cb1cb49790f2 · 2026-01-22T14:11:05.000+09:00
Databricks allows partition columns to be specified without types when
referencing columns already defined in the table specification:

  CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)
  CREATE TABLE t (name STRING) PARTITIONED BY (year INT, month INT)

This change introduces parse_column_def_for_partition() which makes the
data type optional by checking if the next token is a comma or closing
paren (indicating no type follows the column name).
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -7880,14 +7880,41 @@ impl<'a> Parser<'a> {
     pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
         if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
             self.expect_token(&Token::LParen)?;
-            let columns = self.parse_comma_separated(Parser::parse_column_def)?;
+            let columns = self.parse_comma_separated(Parser::parse_column_def_for_partition)?;
             self.expect_token(&Token::RParen)?;
             Ok(HiveDistributionStyle::PARTITIONED { columns })
         } else {
             Ok(HiveDistributionStyle::NONE)
         }
     }
 
+    /// Parse column definition for PARTITIONED BY clause.
+    ///
+    /// Databricks allows partition columns without types when referencing
+    /// columns already defined in the table specification:
+    /// ```sql
+    /// CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)
+    /// CREATE TABLE t (col1 STRING) PARTITIONED BY (col2 INT)
+    /// ```
+    ///
+    /// See [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html)
+    fn parse_column_def_for_partition(&mut self) -> Result<ColumnDef, ParserError> {
+        let name = self.parse_identifier()?;
+
+        // Check if the next token indicates there's no type specified
+        // (comma or closing paren means end of this column definition)
+        let data_type = match self.peek_token().token {
+            Token::Comma | Token::RParen => DataType::Unspecified,
+            _ => self.parse_data_type()?,
+        };
+
+        Ok(ColumnDef {
+            name,
+            data_type,
+            options: vec![],
+        })
+    }
+
     /// Parse Hive formats.
     pub fn parse_hive_formats(&mut self) -> Result<Option<HiveFormat>, ParserError> {
         let mut hive_format: Option<HiveFormat> = None;
diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs
@@ -456,3 +456,69 @@ fn parse_optimize_table() {
         ParserError::ParserError("Expected: an expression, found: )".to_string())
     );
 }
+
+#[test]
+fn parse_create_table_partitioned_by() {
+    // Databricks allows PARTITIONED BY with just column names (referencing existing columns)
+    // https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html
+
+    // Single partition column without type
+    databricks().verified_stmt("CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)");
+
+    // Multiple partition columns without types
+    databricks()
+        .verified_stmt("CREATE TABLE t (col1 STRING, col2 INT, col3 DATE) PARTITIONED BY (col1, col2)");
+
+    // Partition columns with types (new columns not in table spec)
+    databricks().verified_stmt("CREATE TABLE t (name STRING) PARTITIONED BY (year INT, month INT)");
+
+    // Mixed: some with types, some without
+    databricks().verified_stmt(
+        "CREATE TABLE t (id INT, name STRING) PARTITIONED BY (region, year INT)",
+    );
+
+    // Verify AST structure for column without type
+    match databricks()
+        .verified_stmt("CREATE TABLE t (col1 STRING) PARTITIONED BY (col1)")
+    {
+        Statement::CreateTable(CreateTable {
+            name,
+            columns,
+            hive_distribution,
+            ..
+        }) => {
+            assert_eq!(name.to_string(), "t");
+            assert_eq!(columns.len(), 1);
+            assert_eq!(columns[0].name.to_string(), "col1");
+            match hive_distribution {
+                HiveDistributionStyle::PARTITIONED { columns: partition_cols } => {
+                    assert_eq!(partition_cols.len(), 1);
+                    assert_eq!(partition_cols[0].name.to_string(), "col1");
+                    assert_eq!(partition_cols[0].data_type, DataType::Unspecified);
+                }
+                _ => unreachable!(),
+            }
+        }
+        _ => unreachable!(),
+    }
+
+    // Verify AST structure for column with type
+    match databricks()
+        .verified_stmt("CREATE TABLE t (name STRING) PARTITIONED BY (year INT)")
+    {
+        Statement::CreateTable(CreateTable {
+            hive_distribution,
+            ..
+        }) => {
+            match hive_distribution {
+                HiveDistributionStyle::PARTITIONED { columns: partition_cols } => {
+                    assert_eq!(partition_cols.len(), 1);
+                    assert_eq!(partition_cols[0].name.to_string(), "year");
+                    assert_eq!(partition_cols[0].data_type, DataType::Int(None));
+                }
+                _ => unreachable!(),
+            }
+        }
+        _ => unreachable!(),
+    }
+}