Skip to content

Commit 6acaa9c

Browse files
Working on unnest support for snowflake
1 parent 8d95d48 commit 6acaa9c

5 files changed

Lines changed: 417 additions & 17 deletions

File tree

datafusion/sql/src/unparser/ast.rs

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,11 +432,11 @@ pub struct RelationBuilder {
432432
}
433433

434434
#[derive(Clone)]
435-
#[expect(clippy::large_enum_variant)]
436435
enum TableFactorBuilder {
437436
Table(TableRelationBuilder),
438437
Derived(DerivedRelationBuilder),
439438
Unnest(UnnestRelationBuilder),
439+
Flatten(FlattenRelationBuilder),
440440
Empty,
441441
}
442442

@@ -458,6 +458,11 @@ impl RelationBuilder {
458458
self
459459
}
460460

461+
pub fn flatten(&mut self, value: FlattenRelationBuilder) -> &mut Self {
462+
self.relation = Some(TableFactorBuilder::Flatten(value));
463+
self
464+
}
465+
461466
pub fn empty(&mut self) -> &mut Self {
462467
self.relation = Some(TableFactorBuilder::Empty);
463468
self
@@ -474,6 +479,9 @@ impl RelationBuilder {
474479
Some(TableFactorBuilder::Unnest(ref mut rel_builder)) => {
475480
rel_builder.alias = value;
476481
}
482+
Some(TableFactorBuilder::Flatten(ref mut rel_builder)) => {
483+
rel_builder.alias = value;
484+
}
477485
Some(TableFactorBuilder::Empty) => (),
478486
None => (),
479487
}
@@ -484,6 +492,7 @@ impl RelationBuilder {
484492
Some(TableFactorBuilder::Table(ref value)) => Some(value.build()?),
485493
Some(TableFactorBuilder::Derived(ref value)) => Some(value.build()?),
486494
Some(TableFactorBuilder::Unnest(ref value)) => Some(value.build()?),
495+
Some(TableFactorBuilder::Flatten(ref value)) => Some(value.build()?),
487496
Some(TableFactorBuilder::Empty) => None,
488497
None => return Err(Into::into(UninitializedFieldError::from("relation"))),
489498
})
@@ -688,6 +697,94 @@ impl Default for UnnestRelationBuilder {
688697
}
689698
}
690699

700+
/// Default table alias for FLATTEN table factors.
701+
/// Snowflake requires an alias to reference output columns (e.g. `_unnest.VALUE`).
702+
pub const FLATTEN_DEFAULT_ALIAS: &str = "_unnest";
703+
704+
/// Builds a `LATERAL FLATTEN(INPUT => expr, OUTER => bool)` table factor
705+
/// for Snowflake-style unnesting.
706+
#[derive(Clone)]
707+
pub struct FlattenRelationBuilder {
708+
pub alias: Option<ast::TableAlias>,
709+
/// The input expression to flatten (e.g. a column reference).
710+
pub input_expr: Option<ast::Expr>,
711+
/// Whether to preserve rows for NULL/empty inputs (Snowflake `OUTER` param).
712+
pub outer: bool,
713+
}
714+
715+
impl FlattenRelationBuilder {
716+
pub fn alias(&mut self, value: Option<ast::TableAlias>) -> &mut Self {
717+
self.alias = value;
718+
self
719+
}
720+
721+
pub fn input_expr(&mut self, value: ast::Expr) -> &mut Self {
722+
self.input_expr = Some(value);
723+
self
724+
}
725+
726+
pub fn outer(&mut self, value: bool) -> &mut Self {
727+
self.outer = value;
728+
self
729+
}
730+
731+
pub fn build(&self) -> Result<ast::TableFactor, BuilderError> {
732+
let input = self.input_expr.clone().ok_or_else(|| {
733+
BuilderError::from(UninitializedFieldError::from("input_expr"))
734+
})?;
735+
736+
let mut args = vec![ast::FunctionArg::Named {
737+
name: ast::Ident::new("INPUT"),
738+
arg: ast::FunctionArgExpr::Expr(input),
739+
operator: ast::FunctionArgOperator::RightArrow,
740+
}];
741+
742+
if self.outer {
743+
args.push(ast::FunctionArg::Named {
744+
name: ast::Ident::new("OUTER"),
745+
arg: ast::FunctionArgExpr::Expr(ast::Expr::Value(
746+
ast::Value::Boolean(true).into(),
747+
)),
748+
operator: ast::FunctionArgOperator::RightArrow,
749+
});
750+
}
751+
752+
Ok(ast::TableFactor::Function {
753+
lateral: true,
754+
name: ast::ObjectName::from(vec![ast::Ident::new("FLATTEN")]),
755+
args,
756+
alias: self.alias.clone(),
757+
})
758+
}
759+
760+
/// Returns the alias name for this FLATTEN relation.
761+
/// Used to build qualified column references like `alias.VALUE`.
762+
pub fn alias_name(&self) -> &str {
763+
self.alias
764+
.as_ref()
765+
.map(|a| a.name.value.as_str())
766+
.unwrap_or(FLATTEN_DEFAULT_ALIAS)
767+
}
768+
769+
fn create_empty() -> Self {
770+
Self {
771+
alias: Some(ast::TableAlias {
772+
name: ast::Ident::new(FLATTEN_DEFAULT_ALIAS),
773+
columns: vec![],
774+
explicit: true,
775+
}),
776+
input_expr: None,
777+
outer: false,
778+
}
779+
}
780+
}
781+
782+
impl Default for FlattenRelationBuilder {
783+
fn default() -> Self {
784+
Self::create_empty()
785+
}
786+
}
787+
691788
/// Runtime error when a `build()` method is called and one or more required fields
692789
/// do not have a value.
693790
#[derive(Debug, Clone)]

datafusion/sql/src/unparser/dialect.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,15 @@ pub trait Dialect: Send + Sync {
206206
false
207207
}
208208

209+
/// Unparse the unnest plan as `LATERAL FLATTEN(INPUT => expr, ...)`.
210+
///
211+
/// Snowflake uses FLATTEN as a table function instead of the SQL-standard UNNEST.
212+
/// When this returns `true`, the unparser emits
213+
/// `LATERAL FLATTEN(INPUT => <col>, OUTER => <bool>)` in the FROM clause.
214+
fn unnest_as_lateral_flatten(&self) -> bool {
215+
false
216+
}
217+
209218
/// Allows the dialect to override column alias unparsing if the dialect has specific rules.
210219
/// Returns None if the default unparsing should be used, or Some(String) if there is
211220
/// a custom implementation for the alias.
@@ -664,6 +673,59 @@ impl BigQueryDialect {
664673
}
665674
}
666675

676+
/// Dialect for Snowflake SQL.
677+
///
678+
/// Key differences from the default dialect:
679+
/// - Uses double-quote identifier quoting
680+
/// - Supports `NULLS FIRST`/`NULLS LAST` in `ORDER BY`
681+
/// - Does not support empty select lists (`SELECT FROM t`)
682+
/// - Does not support column aliases in table alias definitions
683+
/// (Snowflake accepts the syntax but silently ignores the renames in join contexts)
684+
/// - Unparses `UNNEST` plans as `LATERAL FLATTEN(INPUT => expr, ...)`
685+
pub struct SnowflakeDialect {}
686+
687+
#[expect(clippy::new_without_default)]
688+
impl SnowflakeDialect {
689+
#[must_use]
690+
pub fn new() -> Self {
691+
Self {}
692+
}
693+
}
694+
695+
impl Dialect for SnowflakeDialect {
696+
fn identifier_quote_style(&self, _: &str) -> Option<char> {
697+
Some('"')
698+
}
699+
700+
fn supports_nulls_first_in_sort(&self) -> bool {
701+
true
702+
}
703+
704+
fn supports_empty_select_list(&self) -> bool {
705+
false
706+
}
707+
708+
fn supports_column_alias_in_table_alias(&self) -> bool {
709+
false
710+
}
711+
712+
fn timestamp_cast_dtype(
713+
&self,
714+
_time_unit: &TimeUnit,
715+
tz: &Option<Arc<str>>,
716+
) -> ast::DataType {
717+
if tz.is_some() {
718+
ast::DataType::Timestamp(None, TimezoneInfo::WithTimeZone)
719+
} else {
720+
ast::DataType::Timestamp(None, TimezoneInfo::None)
721+
}
722+
}
723+
724+
fn unnest_as_lateral_flatten(&self) -> bool {
725+
true
726+
}
727+
}
728+
667729
pub struct CustomDialect {
668730
identifier_quote_style: Option<char>,
669731
supports_nulls_first_in_sort: bool,
@@ -686,6 +748,7 @@ pub struct CustomDialect {
686748
window_func_support_window_frame: bool,
687749
full_qualified_col: bool,
688750
unnest_as_table_factor: bool,
751+
unnest_as_lateral_flatten: bool,
689752
}
690753

691754
impl Default for CustomDialect {
@@ -715,6 +778,7 @@ impl Default for CustomDialect {
715778
window_func_support_window_frame: true,
716779
full_qualified_col: false,
717780
unnest_as_table_factor: false,
781+
unnest_as_lateral_flatten: false,
718782
}
719783
}
720784
}
@@ -829,6 +893,10 @@ impl Dialect for CustomDialect {
829893
fn unnest_as_table_factor(&self) -> bool {
830894
self.unnest_as_table_factor
831895
}
896+
897+
fn unnest_as_lateral_flatten(&self) -> bool {
898+
self.unnest_as_lateral_flatten
899+
}
832900
}
833901

834902
/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
@@ -867,6 +935,7 @@ pub struct CustomDialectBuilder {
867935
window_func_support_window_frame: bool,
868936
full_qualified_col: bool,
869937
unnest_as_table_factor: bool,
938+
unnest_as_lateral_flatten: bool,
870939
}
871940

872941
impl Default for CustomDialectBuilder {
@@ -902,6 +971,7 @@ impl CustomDialectBuilder {
902971
window_func_support_window_frame: true,
903972
full_qualified_col: false,
904973
unnest_as_table_factor: false,
974+
unnest_as_lateral_flatten: false,
905975
}
906976
}
907977

@@ -929,6 +999,7 @@ impl CustomDialectBuilder {
929999
window_func_support_window_frame: self.window_func_support_window_frame,
9301000
full_qualified_col: self.full_qualified_col,
9311001
unnest_as_table_factor: self.unnest_as_table_factor,
1002+
unnest_as_lateral_flatten: self.unnest_as_lateral_flatten,
9321003
}
9331004
}
9341005

@@ -1075,4 +1146,12 @@ impl CustomDialectBuilder {
10751146
self.unnest_as_table_factor = unnest_as_table_factor;
10761147
self
10771148
}
1149+
1150+
pub fn with_unnest_as_lateral_flatten(
1151+
mut self,
1152+
unnest_as_lateral_flatten: bool,
1153+
) -> Self {
1154+
self.unnest_as_lateral_flatten = unnest_as_lateral_flatten;
1155+
self
1156+
}
10781157
}

0 commit comments

Comments
 (0)