Skip to content

Commit 53dcc82

Browse files
andygroveclaude
andauthored
Spark SQL: Add SparkSqlDialect (#2305)
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 9f04ebe commit 53dcc82

File tree

6 files changed

+628
-92
lines changed

6 files changed

+628
-92
lines changed

src/ast/ddl.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3203,6 +3203,7 @@ impl fmt::Display for CreateTable {
32033203
Some(HiveIOFormat::FileFormat { format }) if !self.external => {
32043204
write!(f, " STORED AS {format}")?
32053205
}
3206+
Some(HiveIOFormat::Using { format }) => write!(f, " USING {format}")?,
32063207
_ => (),
32073208
}
32083209
if let Some(serde_properties) = serde_properties.as_ref() {

src/ast/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8658,6 +8658,15 @@ pub enum HiveIOFormat {
86588658
/// The file format used for storage.
86598659
format: FileFormat,
86608660
},
8661+
/// `USING <format>` syntax used by Spark SQL.
8662+
///
8663+
/// Example: `CREATE TABLE t (i INT) USING PARQUET`
8664+
///
8665+
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html>
8666+
Using {
8667+
/// The data source or format name, e.g. `parquet`, `delta`, `csv`.
8668+
format: Ident,
8669+
},
86618670
}
86628671

86638672
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)]

src/dialect/mod.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ mod oracle;
2828
mod postgresql;
2929
mod redshift;
3030
mod snowflake;
31+
mod spark;
3132
mod sqlite;
3233

3334
use core::any::{Any, TypeId};
@@ -51,6 +52,7 @@ pub use self::postgresql::PostgreSqlDialect;
5152
pub use self::redshift::RedshiftSqlDialect;
5253
pub use self::snowflake::parse_snowflake_stage_name;
5354
pub use self::snowflake::SnowflakeDialect;
55+
pub use self::spark::SparkSqlDialect;
5456
pub use self::sqlite::SQLiteDialect;
5557

5658
/// Macro for streamlining the creation of derived `Dialect` objects.
@@ -1727,6 +1729,42 @@ pub trait Dialect: Debug + Any {
17271729
fn supports_xml_expressions(&self) -> bool {
17281730
false
17291731
}
1732+
1733+
/// Returns true if the dialect supports `USING <format>` in `CREATE TABLE`.
1734+
///
1735+
/// Example:
1736+
/// ```sql
1737+
/// CREATE TABLE t (i INT) USING PARQUET
1738+
/// ```
1739+
///
1740+
/// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html)
1741+
fn supports_create_table_using(&self) -> bool {
1742+
false
1743+
}
1744+
1745+
/// Returns true if the dialect treats `LONG` as an alias for `BIGINT`.
1746+
///
1747+
/// Example:
1748+
/// ```sql
1749+
/// CREATE TABLE t (id LONG)
1750+
/// ```
1751+
///
1752+
/// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-datatypes.html)
1753+
fn supports_long_type_as_bigint(&self) -> bool {
1754+
false
1755+
}
1756+
1757+
/// Returns true if the dialect supports `MAP<K, V>` angle-bracket syntax for the MAP data type.
1758+
///
1759+
/// Example:
1760+
/// ```sql
1761+
/// CREATE TABLE t (m MAP<STRING, INT>)
1762+
/// ```
1763+
///
1764+
/// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-datatypes.html)
1765+
fn supports_map_literal_with_angle_brackets(&self) -> bool {
1766+
false
1767+
}
17301768
}
17311769

17321770
/// Operators for which precedence must be defined.
@@ -1801,6 +1839,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef<str>) -> Option<Box<dyn Dialect
18011839
"ansi" => Some(Box::new(AnsiDialect {})),
18021840
"duckdb" => Some(Box::new(DuckDbDialect {})),
18031841
"databricks" => Some(Box::new(DatabricksDialect {})),
1842+
"spark" | "sparksql" => Some(Box::new(SparkSqlDialect {})),
18041843
"oracle" => Some(Box::new(OracleDialect {})),
18051844
_ => None,
18061845
}

src/dialect/spark.rs

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#[cfg(not(feature = "std"))]
19+
use alloc::boxed::Box;
20+
21+
use crate::ast::{BinaryOperator, Expr};
22+
use crate::dialect::Dialect;
23+
use crate::keywords::Keyword;
24+
use crate::parser::{Parser, ParserError};
25+
26+
/// A [`Dialect`] for [Apache Spark SQL](https://spark.apache.org/docs/latest/sql-ref.html).
27+
///
28+
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax.html>.
29+
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
30+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
31+
pub struct SparkSqlDialect;
32+
33+
impl Dialect for SparkSqlDialect {
34+
// See https://spark.apache.org/docs/latest/sql-ref-identifier.html
35+
fn is_delimited_identifier_start(&self, ch: char) -> bool {
36+
matches!(ch, '`')
37+
}
38+
39+
fn is_identifier_start(&self, ch: char) -> bool {
40+
matches!(ch, 'a'..='z' | 'A'..='Z' | '_')
41+
}
42+
43+
fn is_identifier_part(&self, ch: char) -> bool {
44+
matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
45+
}
46+
47+
/// See <https://spark.apache.org/docs/latest/sql-ref-functions-builtin-agg.html>
48+
fn supports_filter_during_aggregation(&self) -> bool {
49+
true
50+
}
51+
52+
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
53+
fn supports_group_by_expr(&self) -> bool {
54+
true
55+
}
56+
57+
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
58+
fn supports_group_by_with_modifier(&self) -> bool {
59+
true
60+
}
61+
62+
/// See <https://spark.apache.org/docs/latest/sql-ref-functions-builtin-higher-order-func.html>
63+
fn supports_lambda_functions(&self) -> bool {
64+
true
65+
}
66+
67+
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html>
68+
fn supports_select_wildcard_except(&self) -> bool {
69+
true
70+
}
71+
72+
/// See <https://spark.apache.org/docs/latest/sql-ref-datatypes.html>
73+
fn supports_struct_literal(&self) -> bool {
74+
true
75+
}
76+
77+
fn supports_nested_comments(&self) -> bool {
78+
true
79+
}
80+
81+
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html>
82+
fn supports_create_table_using(&self) -> bool {
83+
true
84+
}
85+
86+
/// `LONG` is an alias for `BIGINT` in Spark SQL.
87+
///
88+
/// See <https://spark.apache.org/docs/latest/sql-ref-datatypes.html>
89+
fn supports_long_type_as_bigint(&self) -> bool {
90+
true
91+
}
92+
93+
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html>
94+
fn supports_values_as_table_factor(&self) -> bool {
95+
true
96+
}
97+
98+
fn require_interval_qualifier(&self) -> bool {
99+
true
100+
}
101+
102+
fn supports_bang_not_operator(&self) -> bool {
103+
true
104+
}
105+
106+
fn supports_select_item_multi_column_alias(&self) -> bool {
107+
true
108+
}
109+
110+
fn supports_cte_without_as(&self) -> bool {
111+
true
112+
}
113+
114+
/// See <https://spark.apache.org/docs/latest/sql-ref-datatypes.html>
115+
fn supports_map_literal_with_angle_brackets(&self) -> bool {
116+
true
117+
}
118+
119+
/// Parse the `DIV` keyword as integer division.
120+
///
121+
/// Example: `SELECT 10 DIV 3` returns `3`.
122+
///
123+
/// See <https://spark.apache.org/docs/latest/sql-ref-functions-builtin-math.html>
124+
fn parse_infix(
125+
&self,
126+
parser: &mut Parser,
127+
expr: &Expr,
128+
_precedence: u8,
129+
) -> Option<Result<Expr, ParserError>> {
130+
if parser.parse_keyword(Keyword::DIV) {
131+
let left = Box::new(expr.clone());
132+
let right = Box::new(match parser.parse_expr() {
133+
Ok(expr) => expr,
134+
Err(e) => return Some(Err(e)),
135+
});
136+
Some(Ok(Expr::BinaryOp {
137+
left,
138+
op: BinaryOperator::MyIntegerDivide,
139+
right,
140+
}))
141+
} else {
142+
None
143+
}
144+
}
145+
}

0 commit comments

Comments
 (0)