Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/ast/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3203,6 +3203,7 @@ impl fmt::Display for CreateTable {
Some(HiveIOFormat::FileFormat { format }) if !self.external => {
write!(f, " STORED AS {format}")?
}
Some(HiveIOFormat::Using { format }) => write!(f, " USING {format}")?,
_ => (),
}
if let Some(serde_properties) = serde_properties.as_ref() {
Expand Down
9 changes: 9 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8658,6 +8658,15 @@ pub enum HiveIOFormat {
/// The file format used for storage.
format: FileFormat,
},
/// `USING <format>` syntax used by Spark SQL.
///
/// Example: `CREATE TABLE t (i INT) USING PARQUET`
///
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html>
Using {
/// The data source or format name, e.g. `parquet`, `delta`, `csv`.
format: Ident,
},
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)]
Expand Down
39 changes: 39 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ mod oracle;
mod postgresql;
mod redshift;
mod snowflake;
mod spark;
mod sqlite;

use core::any::{Any, TypeId};
Expand All @@ -51,6 +52,7 @@ pub use self::postgresql::PostgreSqlDialect;
pub use self::redshift::RedshiftSqlDialect;
pub use self::snowflake::parse_snowflake_stage_name;
pub use self::snowflake::SnowflakeDialect;
pub use self::spark::SparkSqlDialect;
pub use self::sqlite::SQLiteDialect;

/// Macro for streamlining the creation of derived `Dialect` objects.
Expand Down Expand Up @@ -1727,6 +1729,42 @@ pub trait Dialect: Debug + Any {
fn supports_xml_expressions(&self) -> bool {
false
}

/// Returns true if the dialect supports `USING <format>` in `CREATE TABLE`.
///
/// Example:
/// ```sql
/// CREATE TABLE t (i INT) USING PARQUET
/// ```
///
/// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html)
fn supports_create_table_using(&self) -> bool {
false
}

/// Returns true if the dialect treats `LONG` as an alias for `BIGINT`.
///
/// Example:
/// ```sql
/// CREATE TABLE t (id LONG)
/// ```
///
/// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-datatypes.html)
fn supports_long_type_as_bigint(&self) -> bool {
false
}

/// Returns true if the dialect supports `MAP<K, V>` angle-bracket syntax for the MAP data type.
///
/// Example:
/// ```sql
/// CREATE TABLE t (m MAP<STRING, INT>)
/// ```
///
/// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-datatypes.html)
fn supports_map_literal_with_angle_brackets(&self) -> bool {
false
}
}

/// Operators for which precedence must be defined.
Expand Down Expand Up @@ -1801,6 +1839,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef<str>) -> Option<Box<dyn Dialect
"ansi" => Some(Box::new(AnsiDialect {})),
"duckdb" => Some(Box::new(DuckDbDialect {})),
"databricks" => Some(Box::new(DatabricksDialect {})),
"spark" | "sparksql" => Some(Box::new(SparkSqlDialect {})),
"oracle" => Some(Box::new(OracleDialect {})),
_ => None,
}
Expand Down
145 changes: 145 additions & 0 deletions src/dialect/spark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[cfg(not(feature = "std"))]
use alloc::boxed::Box;

use crate::ast::{BinaryOperator, Expr};
use crate::dialect::Dialect;
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};

/// A [`Dialect`] for [Apache Spark SQL](https://spark.apache.org/docs/latest/sql-ref.html).
///
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax.html>.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SparkSqlDialect;

impl Dialect for SparkSqlDialect {
// See https://spark.apache.org/docs/latest/sql-ref-identifier.html
fn is_delimited_identifier_start(&self, ch: char) -> bool {
matches!(ch, '`')
}

fn is_identifier_start(&self, ch: char) -> bool {
matches!(ch, 'a'..='z' | 'A'..='Z' | '_')
}

fn is_identifier_part(&self, ch: char) -> bool {
matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
}

/// See <https://spark.apache.org/docs/latest/sql-ref-functions-builtin-agg.html>
fn supports_filter_during_aggregation(&self) -> bool {
true
}

/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
fn supports_group_by_expr(&self) -> bool {
true
}

/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
fn supports_group_by_with_modifier(&self) -> bool {
true
}

/// See <https://spark.apache.org/docs/latest/sql-ref-functions-builtin-higher-order-func.html>
fn supports_lambda_functions(&self) -> bool {
true
}

/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html>
fn supports_select_wildcard_except(&self) -> bool {
true
}

/// See <https://spark.apache.org/docs/latest/sql-ref-datatypes.html>
fn supports_struct_literal(&self) -> bool {
true
}

fn supports_nested_comments(&self) -> bool {
true
}

/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html>
fn supports_create_table_using(&self) -> bool {
true
}

/// `LONG` is an alias for `BIGINT` in Spark SQL.
///
/// See <https://spark.apache.org/docs/latest/sql-ref-datatypes.html>
fn supports_long_type_as_bigint(&self) -> bool {
true
}

/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html>
fn supports_values_as_table_factor(&self) -> bool {
true
}

fn require_interval_qualifier(&self) -> bool {
true
}

fn supports_bang_not_operator(&self) -> bool {
true
}

fn supports_select_item_multi_column_alias(&self) -> bool {
true
}

fn supports_cte_without_as(&self) -> bool {
true
}

/// See <https://spark.apache.org/docs/latest/sql-ref-datatypes.html>
fn supports_map_literal_with_angle_brackets(&self) -> bool {
true
}

/// Parse the `DIV` keyword as integer division.
///
/// Example: `SELECT 10 DIV 3` returns `3`.
///
/// See <https://spark.apache.org/docs/latest/sql-ref-functions-builtin-math.html>
fn parse_infix(
&self,
parser: &mut Parser,
expr: &Expr,
_precedence: u8,
) -> Option<Result<Expr, ParserError>> {
if parser.parse_keyword(Keyword::DIV) {
let left = Box::new(expr.clone());
let right = Box::new(match parser.parse_expr() {
Ok(expr) => expr,
Err(e) => return Some(Err(e)),
});
Some(Ok(Expr::BinaryOp {
left,
op: BinaryOperator::MyIntegerDivide,
right,
}))
} else {
None
}
}
}
28 changes: 26 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8311,6 +8311,7 @@ impl<'a> Parser<'a> {
Keyword::STORED,
Keyword::LOCATION,
Keyword::WITH,
Keyword::USING,
]) {
Some(Keyword::ROW) => {
hive_format
Expand Down Expand Up @@ -8350,6 +8351,16 @@ impl<'a> Parser<'a> {
break;
}
}
Some(Keyword::USING) if self.dialect.supports_create_table_using() => {
let format = self.parse_identifier()?;
hive_format.get_or_insert_with(HiveFormat::default).storage =
Some(HiveIOFormat::Using { format });
}
Some(Keyword::USING) => {
// USING is not a table format keyword in this dialect; put it back
self.prev_token();
break;
}
None => break,
_ => break,
}
Expand Down Expand Up @@ -12475,6 +12486,9 @@ impl<'a> Parser<'a> {
Keyword::TINYBLOB => Ok(DataType::TinyBlob),
Keyword::MEDIUMBLOB => Ok(DataType::MediumBlob),
Keyword::LONGBLOB => Ok(DataType::LongBlob),
Keyword::LONG if self.dialect.supports_long_type_as_bigint() => {
Ok(DataType::BigInt(None))
}
Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)),
Keyword::BIT => {
if self.parse_keyword(Keyword::VARYING) {
Expand Down Expand Up @@ -12609,8 +12623,7 @@ impl<'a> Parser<'a> {
let field_defs = self.parse_duckdb_struct_type_def()?;
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
}
Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | DatabricksDialect | GenericDialect) =>
{
Keyword::STRUCT if self.dialect.supports_struct_literal() => {
self.prev_token();
let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_struct_field_def)?;
Expand All @@ -12631,6 +12644,17 @@ impl<'a> Parser<'a> {
Keyword::LOWCARDINALITY if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => {
Ok(self.parse_sub_type(DataType::LowCardinality)?)
}
Keyword::MAP if self.dialect.supports_map_literal_with_angle_brackets() => {
self.expect_token(&Token::Lt)?;
let key_data_type = self.parse_data_type()?;
self.expect_token(&Token::Comma)?;
let (value_data_type, _trailing_bracket) = self.parse_data_type_helper()?;
trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?;
Ok(DataType::Map(
Box::new(key_data_type),
Box::new(value_data_type),
))
}
Keyword::MAP if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => {
self.prev_token();
let (key_data_type, value_data_type) = self.parse_click_house_map_def()?;
Expand Down
Loading
Loading