Skip to content

Commit 5f1d805

Browse files
Few more fixes
1 parent 455fc83 commit 5f1d805

2 files changed

Lines changed: 116 additions & 146 deletions

File tree

datafusion/sql/src/unparser/plan.rs

Lines changed: 104 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -298,29 +298,15 @@ impl Unparser<'_> {
298298
let items = exprs
299299
.iter()
300300
.map(|e| {
301-
if use_flatten {
302-
// For Snowflake FLATTEN: rewrite UNNEST output columns
303-
// to `_unnest."VALUE"`. After unproject_unnest_expr_as_flatten_value
304-
// runs, the expression is either:
305-
// - bare Column with UNNEST prefix (outer ref case)
306-
// - Alias { name: "UNNEST(...)", expr: Column(_unnest.VALUE) }
307-
// (the inner column is already rewritten but the alias preserves
308-
// the internal UNNEST display name)
309-
let is_unnest_col = match e {
310-
Expr::Column(col) => col
311-
.name
312-
.starts_with(&format!("{UNNEST_COLUMN_PREFIX}(")),
313-
Expr::Alias(Alias { name, .. }) => {
314-
name.starts_with(&format!("{UNNEST_COLUMN_PREFIX}("))
315-
}
316-
_ => false,
317-
};
318-
if is_unnest_col {
319-
return Ok(self.build_flatten_value_select_item(
320-
FLATTEN_DEFAULT_ALIAS,
321-
None,
322-
));
323-
}
301+
// After unproject_unnest_expr_as_flatten_value, an
302+
// internal UNNEST display-name alias may still wrap
303+
// the rewritten _unnest.VALUE column. Replace it
304+
// with the bare FLATTEN VALUE select item.
305+
if use_flatten && Self::has_internal_unnest_alias(e) {
306+
return Ok(self.build_flatten_value_select_item(
307+
FLATTEN_DEFAULT_ALIAS,
308+
None,
309+
));
324310
}
325311
self.select_item_to_sql(e)
326312
})
@@ -421,38 +407,47 @@ impl Unparser<'_> {
421407
}
422408

423409
// Projection can be top-level plan for unnest relation.
424-
// At least one expression will reference the unnest
425-
// placeholder column. The placeholder may be:
426-
// - "bare": the sole expression IS the placeholder (+ aliases)
427-
// - "wrapped": inside a function call, or one of several exprs
428-
// When bare, we emit `_unnest."VALUE" [AS alias]`.
429-
// Otherwise `reconstruct_select_statement` renders all
430-
// expressions and rewrites the placeholder via
431-
// `unproject_unnest_expr_as_flatten_value`.
432-
let (unnest_input_type, placeholder_is_bare) = p
433-
.expr
434-
.iter()
435-
.find_map(Self::find_unnest_placeholder)
436-
.map(|(t, is_bare)| {
437-
// Only bare when there is a single expression that
438-
// IS the placeholder — multi-expression projections
439-
// always need reconstruct_select_statement.
440-
(Some(t), is_bare && p.expr.len() == 1)
441-
})
442-
.unwrap_or((None, false));
443-
// Extract the outermost user alias (e.g. "c1" from `UNNEST(...) AS c1`).
444-
// Internal aliases like "UNNEST(...)" are not user aliases.
445-
let user_alias = if placeholder_is_bare && unnest_input_type.is_some() {
446-
Self::extract_unnest_user_alias(&p.expr[0])
447-
} else {
448-
None
449-
};
450-
// Snowflake LATERAL FLATTEN path.
410+
// The projection generated by the `RecursiveUnnestRewriter`
411+
// will have at least one expression referencing an unnest
412+
// placeholder column.
413+
let unnest_input_type: Option<UnnestInputType> =
414+
p.expr.iter().find_map(Self::find_unnest_placeholder);
415+
416+
// --- UNNEST table factor path (BigQuery, etc.) ---
417+
// Only fires for a single bare-placeholder projection.
418+
// Uses peel_to_unnest_with_modifiers (rather than matching
419+
// p.input directly) to handle Limit/Sort between Projection
420+
// and Unnest.
421+
if self.dialect.unnest_as_table_factor()
422+
&& p.expr.len() == 1
423+
&& Self::is_bare_unnest_placeholder(&p.expr[0])
424+
&& let Some((unnest, unnest_plan)) =
425+
self.peel_to_unnest_with_modifiers(p.input.as_ref(), query)?
426+
&& let Some(unnest_relation) =
427+
self.try_unnest_to_table_factor_sql(unnest)?
428+
{
429+
relation.unnest(unnest_relation);
430+
return self.select_to_sql_recursively(
431+
unnest_plan,
432+
query,
433+
select,
434+
relation,
435+
);
436+
}
437+
438+
// --- Snowflake LATERAL FLATTEN path ---
451439
// `peel_to_unnest_with_modifiers` walks through any
452-
// intermediate Limit/Sort nodes, applies their modifiers to
453-
// the query, and returns the Unnest + the LogicalPlan ref to
454-
// recurse into (bypassing the normal Limit/Sort handlers that
455-
// would create a derived subquery).
440+
// intermediate Limit/Sort nodes (the optimizer can insert
441+
// these between the Projection and the Unnest), applies
442+
// their modifiers to the query, and returns the Unnest +
443+
// the LogicalPlan ref to recurse into. This bypasses the
444+
// normal Limit/Sort handlers which would wrap the subtree
445+
// in a derived subquery.
446+
// SELECT rendering is delegated to
447+
// `reconstruct_select_statement`, which rewrites
448+
// placeholder columns to `"_unnest"."VALUE"` via
449+
// `unproject_unnest_expr_as_flatten_value` — this works
450+
// for bare, wrapped, and multi-expression projections.
456451
if self.dialect.unnest_as_lateral_flatten()
457452
&& unnest_input_type.is_some()
458453
&& let Some((unnest, unnest_plan)) =
@@ -469,28 +464,17 @@ impl Unparser<'_> {
469464
))
470465
})?;
471466

472-
// Set the SELECT projection.
467+
// An outer plan (e.g. a wrapping Projection) may have
468+
// already set SELECT columns; only set them once.
473469
if !select.already_projected() {
474-
if placeholder_is_bare {
475-
let value_expr = self.build_flatten_value_select_item(
476-
flatten.alias_name(),
477-
user_alias.as_deref(),
478-
);
479-
select.projection(vec![value_expr]);
480-
} else {
481-
// Composed expression wrapping the placeholder:
482-
// reconstruct_select_statement rewrites the
483-
// placeholder column to `_unnest."VALUE"` via
484-
// unproject_unnest_expr_as_flatten_value.
485-
self.reconstruct_select_statement(plan, p, select)?;
486-
}
470+
self.reconstruct_select_statement(plan, p, select)?;
487471
}
488472

489473
if matches!(
490474
inner_projection.input.as_ref(),
491475
LogicalPlan::EmptyRelation(_)
492476
) {
493-
// Inline array case (e.g. UNNEST([1,2,3])):
477+
// Inline array (e.g. UNNEST([1,2,3])):
494478
// FLATTEN is the sole FROM source.
495479
relation.flatten(flatten);
496480
return self.select_to_sql_recursively(
@@ -502,9 +486,8 @@ impl Unparser<'_> {
502486
}
503487

504488
// Non-empty source (table, subquery, etc.):
505-
// Recurse into the Unnest → inner source to set the
506-
// primary FROM relation, then add the FLATTEN as a
507-
// CROSS JOIN.
489+
// recurse to set the primary FROM, then attach FLATTEN
490+
// as a CROSS JOIN.
508491
self.select_to_sql_recursively(unnest_plan, query, select, relation)?;
509492

510493
let flatten_factor = flatten.build().map_err(|e| {
@@ -528,26 +511,6 @@ impl Unparser<'_> {
528511

529512
return Ok(());
530513
}
531-
// Standard UNNEST table factor path (BigQuery, etc.).
532-
// Only fires for single-expression projections — multi-expr
533-
// falls through to reconstruct_select_statement.
534-
if self.dialect.unnest_as_table_factor()
535-
&& unnest_input_type.is_some()
536-
&& p.expr.len() == 1
537-
&& user_alias.is_none() // Skip if user alias present — fall through to reconstruct_select_statement which preserves aliases
538-
&& let Some((unnest, unnest_plan)) =
539-
self.peel_to_unnest_with_modifiers(p.input.as_ref(), query)?
540-
&& let Some(unnest_relation) =
541-
self.try_unnest_to_table_factor_sql(unnest)?
542-
{
543-
relation.unnest(unnest_relation);
544-
return self.select_to_sql_recursively(
545-
unnest_plan,
546-
query,
547-
select,
548-
relation,
549-
);
550-
}
551514

552515
// If it's a unnest projection, we should provide the table column alias
553516
// to provide a column name for the unnest relation.
@@ -1151,24 +1114,22 @@ impl Unparser<'_> {
11511114
}
11521115
}
11531116
LogicalPlan::Unnest(unnest) => {
1154-
if self.dialect.unnest_as_lateral_flatten()
1155-
&& !unnest.struct_type_columns.is_empty()
1156-
{
1157-
return not_impl_err!(
1158-
"Snowflake FLATTEN cannot unparse struct unnest: \
1159-
DataFusion expands struct fields into columns (horizontal), \
1160-
but Snowflake FLATTEN expands them into rows (vertical). \
1161-
Columns: {:?}",
1117+
if !unnest.struct_type_columns.is_empty() {
1118+
if self.dialect.unnest_as_lateral_flatten() {
1119+
return not_impl_err!(
1120+
"Snowflake FLATTEN cannot unparse struct unnest: \
1121+
DataFusion expands struct fields into columns (horizontal), \
1122+
but Snowflake FLATTEN expands them into rows (vertical). \
1123+
Columns: {:?}",
1124+
unnest.struct_type_columns
1125+
);
1126+
}
1127+
return internal_err!(
1128+
"Struct type columns are not currently supported in UNNEST: {:?}",
11621129
unnest.struct_type_columns
11631130
);
11641131
}
11651132

1166-
assert_or_internal_err!(
1167-
unnest.struct_type_columns.is_empty(),
1168-
"Struct type columns are not currently supported in UNNEST: {:?}",
1169-
unnest.struct_type_columns
1170-
);
1171-
11721133
// For Snowflake FLATTEN: if the relation hasn't been set yet
11731134
// (UNNEST was in SELECT clause, not FROM clause), set the FLATTEN
11741135
// relation here so the FROM clause is emitted.
@@ -1293,27 +1254,13 @@ impl Unparser<'_> {
12931254
}
12941255
}
12951256

1296-
/// Search an expression for an unnest placeholder column reference.
1257+
/// Search an expression tree for an unnest placeholder column reference.
12971258
///
1298-
/// Returns both the [`UnnestInputType`] and whether the placeholder is
1299-
/// "bare" (the expression IS the placeholder, modulo aliases) or
1300-
/// "wrapped" (the placeholder is inside a function call or other
1301-
/// transformation).
1302-
///
1303-
/// Examples:
1304-
/// - `Alias("item", Column("__unnest_placeholder(...)"))` → `Some((Scalar, true))`
1305-
/// - `Alias("t", Cast(Column("__unnest_placeholder(...)"), Int64))` → `Some((Scalar, false))`
1306-
/// - `Column("unrelated")` → `None`
1307-
fn find_unnest_placeholder(expr: &Expr) -> Option<(UnnestInputType, bool)> {
1308-
// Fast path: check if the expression IS the placeholder (peel aliases).
1309-
let mut inner = expr;
1310-
while let Expr::Alias(Alias { expr, .. }) = inner {
1311-
inner = expr.as_ref();
1312-
}
1313-
if let Some(t) = Self::classify_placeholder_column(inner) {
1314-
return Some((t, true));
1315-
}
1316-
// Slow path: walk the full expression tree.
1259+
/// Returns the [`UnnestInputType`] if any sub-expression is a column
1260+
/// whose name starts with `__unnest_placeholder`. The placeholder may
1261+
/// be at the top level (bare), inside a function call, or one of several
1262+
/// expressions — this function finds it regardless.
1263+
fn find_unnest_placeholder(expr: &Expr) -> Option<UnnestInputType> {
13171264
let mut result = None;
13181265
let _ = expr.apply(|e| {
13191266
if let Some(t) = Self::classify_placeholder_column(e) {
@@ -1322,7 +1269,22 @@ impl Unparser<'_> {
13221269
}
13231270
Ok(TreeNodeRecursion::Continue)
13241271
});
1325-
result.map(|t| (t, false))
1272+
result
1273+
}
1274+
1275+
/// Returns true if `expr` is a placeholder column, optionally wrapped
1276+
/// in a single alias (the rewriter's internal `UNNEST(...)` name).
1277+
/// Does NOT match when a user alias wraps the internal alias
1278+
/// (e.g. `Alias("c1", Alias("UNNEST(...)", Column(placeholder)))`),
1279+
/// so the table-factor path correctly falls through to
1280+
/// `reconstruct_select_statement` which preserves user aliases.
1281+
fn is_bare_unnest_placeholder(expr: &Expr) -> bool {
1282+
// Peel at most one alias layer (the rewriter's internal name).
1283+
let inner = match expr {
1284+
Expr::Alias(Alias { expr, .. }) => expr.as_ref(),
1285+
other => other,
1286+
};
1287+
Self::classify_placeholder_column(inner).is_some()
13261288
}
13271289

13281290
/// If `expr` is a `Column` whose name starts with `__unnest_placeholder`,
@@ -1340,17 +1302,23 @@ impl Unparser<'_> {
13401302
None
13411303
}
13421304

1343-
/// Extract the outermost user-provided alias from an unnest expression.
1344-
/// Returns `None` if the outermost alias is DataFusion's internal display
1345-
/// name (e.g. `UNNEST(make_array(...))`), or if there is no alias at all.
1346-
fn extract_unnest_user_alias(expr: &Expr) -> Option<String> {
1347-
if let Expr::Alias(Alias { name, .. }) = expr {
1348-
// Internal aliases start with "UNNEST(" — user aliases don't.
1349-
if !name.starts_with(&format!("{UNNEST_COLUMN_PREFIX}(")) {
1350-
return Some(name.clone());
1305+
/// Check whether an expression carries an internal `UNNEST(...)` display
1306+
/// name as its column name or outermost alias. After
1307+
/// [`unproject_unnest_expr_as_flatten_value`] rewrites the placeholder
1308+
/// column to `_unnest.VALUE`, the internal alias may still linger
1309+
/// (e.g. `Alias("UNNEST(make_array(...))", Column("_unnest.VALUE"))`).
1310+
/// Callers use this to replace the expression with a clean
1311+
/// `_unnest."VALUE"` select item.
1312+
fn has_internal_unnest_alias(expr: &Expr) -> bool {
1313+
match expr {
1314+
Expr::Column(col) => {
1315+
col.name.starts_with(&format!("{UNNEST_COLUMN_PREFIX}("))
1316+
}
1317+
Expr::Alias(Alias { name, .. }) => {
1318+
name.starts_with(&format!("{UNNEST_COLUMN_PREFIX}("))
13511319
}
1320+
_ => false,
13521321
}
1353-
None
13541322
}
13551323

13561324
fn try_unnest_to_table_factor_sql(

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3119,22 +3119,24 @@ fn snowflake_flatten_unnest_from_subselect() -> Result<(), DataFusionError> {
31193119
}
31203120

31213121
/// Dummy scalar UDF for testing — takes a string and returns List<Int64>.
3122+
/// Simulates any UDF that extracts an array from a column (e.g. parsing
3123+
/// JSON, splitting a delimited string, etc.).
31223124
#[derive(Debug, PartialEq, Eq, Hash)]
3123-
struct JsonGetArrayUdf {
3125+
struct ExtractArrayUdf {
31243126
signature: Signature,
31253127
}
31263128

3127-
impl JsonGetArrayUdf {
3129+
impl ExtractArrayUdf {
31283130
fn new() -> Self {
31293131
Self {
31303132
signature: Signature::exact(vec![DataType::Utf8], Volatility::Immutable),
31313133
}
31323134
}
31333135
}
31343136

3135-
impl ScalarUDFImpl for JsonGetArrayUdf {
3137+
impl ScalarUDFImpl for ExtractArrayUdf {
31363138
fn name(&self) -> &str {
3137-
"json_get_array"
3139+
"extract_array"
31383140
}
31393141
fn signature(&self) -> &Signature {
31403142
&self.signature
@@ -3152,10 +3154,10 @@ impl ScalarUDFImpl for JsonGetArrayUdf {
31523154

31533155
#[test]
31543156
fn snowflake_flatten_unnest_udf_result() -> Result<(), DataFusionError> {
3155-
// UNNEST on a UDF result: json_get_array(col) returns List<Int64>,
3156-
// then UNNEST flattens it. This simulates a common Snowflake pattern
3157-
// where a UDF parses JSON into an array, then FLATTEN expands it.
3158-
let sql = "SELECT UNNEST(json_get_array(j1_string)) AS items FROM j1 LIMIT 5";
3157+
// UNNEST on a UDF result: extract_array(col) returns List<Int64>,
3158+
// then UNNEST flattens it. This exercises the path where the FLATTEN
3159+
// INPUT is a UDF call rather than a bare column reference.
3160+
let sql = "SELECT UNNEST(extract_array(j1_string)) AS items FROM j1 LIMIT 5";
31593161

31603162
let statement = Parser::new(&GenericDialect {})
31613163
.try_with_sql(sql)?
@@ -3164,7 +3166,7 @@ fn snowflake_flatten_unnest_udf_result() -> Result<(), DataFusionError> {
31643166
let state = MockSessionState::default()
31653167
.with_aggregate_function(max_udaf())
31663168
.with_aggregate_function(min_udaf())
3167-
.with_scalar_function(Arc::new(ScalarUDF::new_from_impl(JsonGetArrayUdf::new())))
3169+
.with_scalar_function(Arc::new(ScalarUDF::new_from_impl(ExtractArrayUdf::new())))
31683170
.with_expr_planner(Arc::new(CoreFunctionPlanner::default()))
31693171
.with_expr_planner(Arc::new(NestedFunctionPlanner))
31703172
.with_expr_planner(Arc::new(FieldAccessPlanner));
@@ -3180,7 +3182,7 @@ fn snowflake_flatten_unnest_udf_result() -> Result<(), DataFusionError> {
31803182
let result = unparser.plan_to_sql(&plan)?;
31813183
let actual = result.to_string();
31823184

3183-
insta::assert_snapshot!(actual, @r#"SELECT "_unnest"."VALUE" AS "items" FROM "j1" CROSS JOIN LATERAL FLATTEN(INPUT => json_get_array("j1"."j1_string")) AS "_unnest" LIMIT 5"#);
3185+
insta::assert_snapshot!(actual, @r#"SELECT "_unnest"."VALUE" AS "items" FROM "j1" CROSS JOIN LATERAL FLATTEN(INPUT => extract_array("j1"."j1_string")) AS "_unnest" LIMIT 5"#);
31843186
Ok(())
31853187
}
31863188

0 commit comments

Comments
 (0)