Skip to content

Commit 54d8a1d

Browse files
More snowflake dialect fixes for unnes
1 parent 909d8c6 commit 54d8a1d

2 files changed

Lines changed: 150 additions & 16 deletions

File tree

datafusion/sql/src/unparser/plan.rs

Lines changed: 73 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ use crate::utils::UNNEST_PLACEHOLDER;
4545
use datafusion_common::{
4646
Column, DataFusionError, Result, ScalarValue, TableReference, assert_or_internal_err,
4747
internal_err, not_impl_err,
48-
tree_node::{TransformedResult, TreeNode},
48+
tree_node::{TransformedResult, TreeNode, TreeNodeRecursion},
4949
};
5050
use datafusion_expr::expr::{OUTER_REFERENCE_COLUMN_PREFIX, UNNEST_COLUMN_PREFIX};
5151
use datafusion_expr::{
@@ -420,17 +420,33 @@ impl Unparser<'_> {
420420
.select_to_sql_recursively(&new_plan, query, select, relation);
421421
}
422422

423-
// Projection can be top-level plan for unnest relation
423+
// Projection can be top-level plan for unnest relation.
424424
// The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have
425425
// only one expression, which is the placeholder column generated by the rewriter.
426-
let unnest_input_type = if p.expr.len() == 1 {
427-
Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
426+
//
427+
// Two cases:
428+
// - "bare": the expression IS the placeholder (+ aliases):
429+
// `__unnest_placeholder(...) AS item`
430+
// - "wrapped": the placeholder is inside a function call:
431+
// `json_as_text(__unnest_placeholder(...), 'type') AS target_type`
432+
let (unnest_input_type, placeholder_is_bare) = if p.expr.len() == 1 {
433+
if let Some(t) =
434+
Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
435+
{
436+
(Some(t), true)
437+
} else if let Some(t) =
438+
Self::find_unnest_placeholder_in_expr(&p.expr[0])
439+
{
440+
(Some(t), false)
441+
} else {
442+
(None, false)
443+
}
428444
} else {
429-
None
445+
(None, false)
430446
};
431447
// Extract the outermost user alias (e.g. "c1" from `UNNEST(...) AS c1`).
432448
// Internal aliases like "UNNEST(...)" are not user aliases.
433-
let user_alias = if unnest_input_type.is_some() {
449+
let user_alias = if placeholder_is_bare && unnest_input_type.is_some() {
434450
Self::extract_unnest_user_alias(&p.expr[0])
435451
} else {
436452
None
@@ -472,11 +488,15 @@ impl Unparser<'_> {
472488
relation.flatten(flatten_relation);
473489

474490
if !select.already_projected() {
475-
let value_expr = self.build_flatten_value_select_item(
476-
&alias_name,
477-
user_alias.as_deref(),
478-
);
479-
select.projection(vec![value_expr]);
491+
if placeholder_is_bare {
492+
let value_expr = self.build_flatten_value_select_item(
493+
&alias_name,
494+
user_alias.as_deref(),
495+
);
496+
select.projection(vec![value_expr]);
497+
} else {
498+
self.reconstruct_select_statement(plan, p, select)?;
499+
}
480500
}
481501

482502
return self.select_to_sql_recursively(
@@ -509,11 +529,15 @@ impl Unparser<'_> {
509529
flatten.outer(unnest.options.preserve_nulls);
510530

511531
if !select.already_projected() {
512-
let value_expr = self.build_flatten_value_select_item(
513-
&alias_name,
514-
user_alias.as_deref(),
515-
);
516-
select.projection(vec![value_expr]);
532+
if placeholder_is_bare {
533+
let value_expr = self.build_flatten_value_select_item(
534+
&alias_name,
535+
user_alias.as_deref(),
536+
);
537+
select.projection(vec![value_expr]);
538+
} else {
539+
self.reconstruct_select_statement(plan, p, select)?;
540+
}
517541
}
518542

519543
// Recurse into the Unnest → inner source to set the primary
@@ -1356,6 +1380,39 @@ impl Unparser<'_> {
13561380
None
13571381
}
13581382

1383+
/// Recursively search the expression tree for an unnest placeholder column.
1384+
///
1385+
/// Unlike [`Self::check_unnest_placeholder_with_outer_ref`] which only
1386+
/// matches when the placeholder IS the expression (modulo aliases), this
1387+
/// function finds placeholders buried inside function calls or other
1388+
/// transformations. For example:
1389+
///
1390+
/// ```text
1391+
/// Alias("target_type",
1392+
/// json_as_text(
1393+
/// Column("__unnest_placeholder(...)"), ← found here
1394+
/// Literal("type")))
1395+
/// ```
1396+
fn find_unnest_placeholder_in_expr(expr: &Expr) -> Option<UnnestInputType> {
1397+
let mut result = None;
1398+
let _ = expr.apply(|e| {
1399+
if let Expr::Column(Column { name, .. }) = e
1400+
&& let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER)
1401+
{
1402+
result = if prefix
1403+
.starts_with(&format!("({OUTER_REFERENCE_COLUMN_PREFIX}("))
1404+
{
1405+
Some(UnnestInputType::OuterReference)
1406+
} else {
1407+
Some(UnnestInputType::Scalar)
1408+
};
1409+
return Ok(TreeNodeRecursion::Stop);
1410+
}
1411+
Ok(TreeNodeRecursion::Continue)
1412+
});
1413+
result
1414+
}
1415+
13591416
/// Extract the outermost user-provided alias from an unnest expression.
13601417
/// Returns `None` if the outermost alias is DataFusion's internal display
13611418
/// name (e.g. `UNNEST(make_array(...))`), or if there is no alias at all.

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3283,6 +3283,83 @@ fn snowflake_flatten_limit_between_projection_and_unnest_with_subquery_alias()
32833283
Ok(())
32843284
}
32853285

3286+
#[test]
3287+
fn snowflake_flatten_composed_expression_wrapping_unnest() -> Result<(), DataFusionError>
3288+
{
3289+
// Build: Projection(CAST(placeholder AS Int64) AS item_id) → Unnest → Projection → TableScan
3290+
// The outer Projection wraps the unnest output in a function call.
3291+
// The FLATTEN code path must detect the placeholder inside the function
3292+
// and still emit LATERAL FLATTEN.
3293+
let schema = Schema::new(vec![Field::new(
3294+
"items",
3295+
DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
3296+
true,
3297+
)]);
3298+
3299+
let plan = table_scan(Some("source"), &schema, None)?
3300+
.project(vec![col("items").alias("__unnest_placeholder(items)")])?
3301+
.unnest_column("__unnest_placeholder(items)")?
3302+
.project(vec![
3303+
cast(col("__unnest_placeholder(items)"), DataType::Int64).alias("item_id"),
3304+
])?
3305+
.build()?;
3306+
3307+
let snowflake = SnowflakeDialect::new();
3308+
let unparser = Unparser::new(&snowflake);
3309+
let result = unparser.plan_to_sql(&plan)?;
3310+
let actual = result.to_string();
3311+
3312+
// Must contain LATERAL FLATTEN despite the placeholder being inside CAST
3313+
assert!(
3314+
actual.contains("LATERAL FLATTEN"),
3315+
"Expected LATERAL FLATTEN in SQL, got: {actual}"
3316+
);
3317+
assert!(
3318+
actual.contains("CAST"),
3319+
"Expected CAST in SQL, got: {actual}"
3320+
);
3321+
Ok(())
3322+
}
3323+
3324+
#[test]
3325+
fn snowflake_flatten_composed_expression_with_limit() -> Result<(), DataFusionError> {
3326+
// Combines both bugs: composed expression + Limit between Projection and Unnest
3327+
// Build: Projection(CAST(placeholder AS Int64) AS item_id) → Limit → Unnest → Projection → TableScan
3328+
let schema = Schema::new(vec![Field::new(
3329+
"items",
3330+
DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
3331+
true,
3332+
)]);
3333+
3334+
let plan = table_scan(Some("source"), &schema, None)?
3335+
.project(vec![col("items").alias("__unnest_placeholder(items)")])?
3336+
.unnest_column("__unnest_placeholder(items)")?
3337+
.limit(0, Some(5))?
3338+
.project(vec![
3339+
cast(col("__unnest_placeholder(items)"), DataType::Int64).alias("item_id"),
3340+
])?
3341+
.build()?;
3342+
3343+
let snowflake = SnowflakeDialect::new();
3344+
let unparser = Unparser::new(&snowflake);
3345+
let result = unparser.plan_to_sql(&plan)?;
3346+
let actual = result.to_string();
3347+
3348+
assert!(
3349+
actual.contains("LATERAL FLATTEN"),
3350+
"Expected LATERAL FLATTEN in SQL, got: {actual}"
3351+
);
3352+
assert!(
3353+
actual.contains("CAST"),
3354+
"Expected CAST in SQL, got: {actual}"
3355+
);
3356+
assert!(
3357+
actual.contains("LIMIT 5"),
3358+
"Expected LIMIT 5 in SQL, got: {actual}"
3359+
);
3360+
Ok(())
3361+
}
3362+
32863363
#[test]
32873364
fn snowflake_unnest_through_subquery_alias() -> Result<(), DataFusionError> {
32883365
// Build: Projection → Unnest → SubqueryAlias → Projection → TableScan

0 commit comments

Comments
 (0)