Skip to content

Commit 455fc83

Browse files
Unnest fixes for multi-arguments
1 parent c083c6c commit 455fc83

2 files changed

Lines changed: 103 additions & 14 deletions

File tree

datafusion/sql/src/unparser/plan.rs

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -421,20 +421,25 @@ impl Unparser<'_> {
421421
}
422422

423423
// Projection can be top-level plan for unnest relation.
424-
// The projection generated by the `RecursiveUnnestRewriter`
425-
// will have exactly one expression referencing the unnest
426-
// placeholder column. The placeholder may be at the top level
427-
// ("bare": `__unnest_placeholder(...) AS item`) or wrapped
428-
// inside a function call ("wrapped":
429-
// `json_as_text(__unnest_placeholder(...), 'type') AS t`).
430-
let (unnest_input_type, placeholder_is_bare) = if p.expr.len() == 1 {
431-
match Self::find_unnest_placeholder(&p.expr[0]) {
432-
Some((t, is_bare)) => (Some(t), is_bare),
433-
None => (None, false),
434-
}
435-
} else {
436-
(None, false)
437-
};
424+
// At least one expression will reference the unnest
425+
// placeholder column. The placeholder may be:
426+
// - "bare": the sole expression IS the placeholder (+ aliases)
427+
// - "wrapped": inside a function call, or one of several exprs
428+
// When bare, we emit `_unnest."VALUE" [AS alias]`.
429+
// Otherwise `reconstruct_select_statement` renders all
430+
// expressions and rewrites the placeholder via
431+
// `unproject_unnest_expr_as_flatten_value`.
432+
let (unnest_input_type, placeholder_is_bare) = p
433+
.expr
434+
.iter()
435+
.find_map(Self::find_unnest_placeholder)
436+
.map(|(t, is_bare)| {
437+
// Only bare when there is a single expression that
438+
// IS the placeholder — multi-expression projections
439+
// always need reconstruct_select_statement.
440+
(Some(t), is_bare && p.expr.len() == 1)
441+
})
442+
.unwrap_or((None, false));
438443
// Extract the outermost user alias (e.g. "c1" from `UNNEST(...) AS c1`).
439444
// Internal aliases like "UNNEST(...)" are not user aliases.
440445
let user_alias = if placeholder_is_bare && unnest_input_type.is_some() {
@@ -524,8 +529,11 @@ impl Unparser<'_> {
524529
return Ok(());
525530
}
526531
// Standard UNNEST table factor path (BigQuery, etc.).
532+
// Only fires for single-expression projections — multi-expr
533+
// falls through to reconstruct_select_statement.
527534
if self.dialect.unnest_as_table_factor()
528535
&& unnest_input_type.is_some()
536+
&& p.expr.len() == 1
529537
&& user_alias.is_none() // Skip if user alias present — fall through to reconstruct_select_statement which preserves aliases
530538
&& let Some((unnest, unnest_plan)) =
531539
self.peel_to_unnest_with_modifiers(p.input.as_ref(), query)?

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3360,6 +3360,87 @@ fn snowflake_flatten_composed_expression_with_limit() -> Result<(), DataFusionEr
33603360
Ok(())
33613361
}
33623362

3363+
#[test]
3364+
fn snowflake_flatten_multi_expression_projection() -> Result<(), DataFusionError> {
3365+
// Build: Projection([CAST(placeholder AS Int64) AS a, CAST(placeholder AS Utf8) AS b])
3366+
// → Unnest → Projection → TableScan
3367+
// The outer Projection has TWO expressions — both reference the placeholder.
3368+
// The FLATTEN code path must fire even when p.expr.len() > 1.
3369+
let schema = Schema::new(vec![Field::new(
3370+
"items",
3371+
DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
3372+
true,
3373+
)]);
3374+
3375+
let plan = table_scan(Some("source"), &schema, None)?
3376+
.project(vec![col("items").alias("__unnest_placeholder(items)")])?
3377+
.unnest_column("__unnest_placeholder(items)")?
3378+
.project(vec![
3379+
cast(col("__unnest_placeholder(items)"), DataType::Int64).alias("a"),
3380+
cast(col("__unnest_placeholder(items)"), DataType::Utf8).alias("b"),
3381+
])?
3382+
.build()?;
3383+
3384+
let snowflake = SnowflakeDialect::new();
3385+
let unparser = Unparser::new(&snowflake);
3386+
let result = unparser.plan_to_sql(&plan)?;
3387+
let actual = result.to_string();
3388+
3389+
assert!(
3390+
actual.contains("LATERAL FLATTEN"),
3391+
"Expected LATERAL FLATTEN in SQL, got: {actual}"
3392+
);
3393+
// Both expressions should be present
3394+
assert!(
3395+
actual.contains("CAST"),
3396+
"Expected CAST in SQL, got: {actual}"
3397+
);
3398+
assert!(
3399+
actual.contains(r#"AS "a""#),
3400+
"Expected alias 'a' in SQL, got: {actual}"
3401+
);
3402+
assert!(
3403+
actual.contains(r#"AS "b""#),
3404+
"Expected alias 'b' in SQL, got: {actual}"
3405+
);
3406+
Ok(())
3407+
}
3408+
3409+
#[test]
3410+
fn snowflake_flatten_multi_expression_with_limit() -> Result<(), DataFusionError> {
3411+
// Multi-expression + Limit between Projection and Unnest
3412+
let schema = Schema::new(vec![Field::new(
3413+
"items",
3414+
DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
3415+
true,
3416+
)]);
3417+
3418+
let plan = table_scan(Some("source"), &schema, None)?
3419+
.project(vec![col("items").alias("__unnest_placeholder(items)")])?
3420+
.unnest_column("__unnest_placeholder(items)")?
3421+
.limit(0, Some(10))?
3422+
.project(vec![
3423+
cast(col("__unnest_placeholder(items)"), DataType::Int64).alias("a"),
3424+
cast(col("__unnest_placeholder(items)"), DataType::Utf8).alias("b"),
3425+
])?
3426+
.build()?;
3427+
3428+
let snowflake = SnowflakeDialect::new();
3429+
let unparser = Unparser::new(&snowflake);
3430+
let result = unparser.plan_to_sql(&plan)?;
3431+
let actual = result.to_string();
3432+
3433+
assert!(
3434+
actual.contains("LATERAL FLATTEN"),
3435+
"Expected LATERAL FLATTEN in SQL, got: {actual}"
3436+
);
3437+
assert!(
3438+
actual.contains("LIMIT 10"),
3439+
"Expected LIMIT 10 in SQL, got: {actual}"
3440+
);
3441+
Ok(())
3442+
}
3443+
33633444
#[test]
33643445
fn snowflake_unnest_through_subquery_alias() -> Result<(), DataFusionError> {
33653446
// Build: Projection → Unnest → SubqueryAlias → Projection → TableScan

0 commit comments

Comments
 (0)