Skip to content

Commit c083c6c

Browse files
Quoting fix
1 parent 54d8a1d commit c083c6c

3 files changed

Lines changed: 103 additions & 184 deletions

File tree

datafusion/sql/src/unparser/ast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,7 @@ impl FlattenRelationBuilder {
769769
fn create_empty() -> Self {
770770
Self {
771771
alias: Some(ast::TableAlias {
772-
name: ast::Ident::new(FLATTEN_DEFAULT_ALIAS),
772+
name: ast::Ident::with_quote('"', FLATTEN_DEFAULT_ALIAS),
773773
columns: vec![],
774774
explicit: true,
775775
}),

datafusion/sql/src/unparser/plan.rs

Lines changed: 93 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -421,25 +421,16 @@ impl Unparser<'_> {
421421
}
422422

423423
// Projection can be top-level plan for unnest relation.
424-
// The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have
425-
// only one expression, which is the placeholder column generated by the rewriter.
426-
//
427-
// Two cases:
428-
// - "bare": the expression IS the placeholder (+ aliases):
429-
// `__unnest_placeholder(...) AS item`
430-
// - "wrapped": the placeholder is inside a function call:
431-
// `json_as_text(__unnest_placeholder(...), 'type') AS target_type`
424+
// The projection generated by the `RecursiveUnnestRewriter`
425+
// will have exactly one expression referencing the unnest
426+
// placeholder column. The placeholder may be at the top level
427+
// ("bare": `__unnest_placeholder(...) AS item`) or wrapped
428+
// inside a function call ("wrapped":
429+
// `json_as_text(__unnest_placeholder(...), 'type') AS t`).
432430
let (unnest_input_type, placeholder_is_bare) = if p.expr.len() == 1 {
433-
if let Some(t) =
434-
Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
435-
{
436-
(Some(t), true)
437-
} else if let Some(t) =
438-
Self::find_unnest_placeholder_in_expr(&p.expr[0])
439-
{
440-
(Some(t), false)
441-
} else {
442-
(None, false)
431+
match Self::find_unnest_placeholder(&p.expr[0]) {
432+
Some((t, is_bare)) => (Some(t), is_bare),
433+
None => (None, false),
443434
}
444435
} else {
445436
(None, false)
@@ -451,16 +442,19 @@ impl Unparser<'_> {
451442
} else {
452443
None
453444
};
445+
// Snowflake LATERAL FLATTEN path.
446+
// `peel_to_unnest_with_modifiers` walks through any
447+
// intermediate Limit/Sort nodes, applies their modifiers to
448+
// the query, and returns the Unnest + the LogicalPlan ref to
449+
// recurse into (bypassing the normal Limit/Sort handlers that
450+
// would create a derived subquery).
454451
if self.dialect.unnest_as_lateral_flatten()
455452
&& unnest_input_type.is_some()
456-
&& let Some(unnest) = Self::peel_to_unnest(p.input.as_ref())
457-
&& let Some(flatten_relation) =
453+
&& let Some((unnest, unnest_plan)) =
454+
self.peel_to_unnest_with_modifiers(p.input.as_ref(), query)?
455+
&& let Some(flatten) =
458456
self.try_unnest_to_lateral_flatten_sql(unnest)?
459457
{
460-
let alias_name = flatten_relation.alias_name().to_string();
461-
462-
// Check if the Unnest source is a real query (subselect)
463-
// vs an inline array (EmptyRelation).
464458
let inner_projection =
465459
Self::peel_to_inner_projection(unnest.input.as_ref())
466460
.ok_or_else(|| {
@@ -470,35 +464,30 @@ impl Unparser<'_> {
470464
))
471465
})?;
472466

473-
// Apply any intermediate Limit/Sort modifiers and get
474-
// a reference to the Unnest LogicalPlan node for recursion.
475-
// This bypasses the Limit/Sort handlers (which would create
476-
// unwanted derived subqueries when already_projected is set).
477-
let unnest_plan = self.apply_transparent_and_find_unnest_plan(
478-
p.input.as_ref(),
479-
query,
480-
)?;
467+
// Set the SELECT projection.
468+
if !select.already_projected() {
469+
if placeholder_is_bare {
470+
let value_expr = self.build_flatten_value_select_item(
471+
flatten.alias_name(),
472+
user_alias.as_deref(),
473+
);
474+
select.projection(vec![value_expr]);
475+
} else {
476+
// Composed expression wrapping the placeholder:
477+
// reconstruct_select_statement rewrites the
478+
// placeholder column to `_unnest."VALUE"` via
479+
// unproject_unnest_expr_as_flatten_value.
480+
self.reconstruct_select_statement(plan, p, select)?;
481+
}
482+
}
481483

482484
if matches!(
483485
inner_projection.input.as_ref(),
484486
LogicalPlan::EmptyRelation(_)
485487
) {
486488
// Inline array case (e.g. UNNEST([1,2,3])):
487489
// FLATTEN is the sole FROM source.
488-
relation.flatten(flatten_relation);
489-
490-
if !select.already_projected() {
491-
if placeholder_is_bare {
492-
let value_expr = self.build_flatten_value_select_item(
493-
&alias_name,
494-
user_alias.as_deref(),
495-
);
496-
select.projection(vec![value_expr]);
497-
} else {
498-
self.reconstruct_select_statement(plan, p, select)?;
499-
}
500-
}
501-
490+
relation.flatten(flatten);
502491
return self.select_to_sql_recursively(
503492
unnest_plan,
504493
query,
@@ -508,41 +497,9 @@ impl Unparser<'_> {
508497
}
509498

510499
// Non-empty source (table, subquery, etc.):
511-
// The FLATTEN references columns/expressions from the source.
512-
// Convert the inner Projection's expression to SQL for the
513-
// FLATTEN INPUT, then store the FLATTEN to be added as a
514-
// CROSS JOIN after the source relation is processed.
515-
let first_expr = inner_projection.expr.first().ok_or_else(|| {
516-
DataFusionError::Internal(
517-
"Unnest inner projection has no expressions".to_string(),
518-
)
519-
})?;
520-
// Strip the __unnest_placeholder alias to get the raw expression
521-
let raw_expr = match first_expr {
522-
Expr::Alias(Alias { expr, .. }) => expr.as_ref(),
523-
other => other,
524-
};
525-
let input_sql = self.expr_to_sql(raw_expr)?;
526-
527-
let mut flatten = FlattenRelationBuilder::default();
528-
flatten.input_expr(input_sql);
529-
flatten.outer(unnest.options.preserve_nulls);
530-
531-
if !select.already_projected() {
532-
if placeholder_is_bare {
533-
let value_expr = self.build_flatten_value_select_item(
534-
&alias_name,
535-
user_alias.as_deref(),
536-
);
537-
select.projection(vec![value_expr]);
538-
} else {
539-
self.reconstruct_select_statement(plan, p, select)?;
540-
}
541-
}
542-
543-
// Recurse into the Unnest → inner source to set the primary
544-
// relation (table scan, subquery, etc.), then add FLATTEN
545-
// as a CROSS JOIN.
500+
// Recurse into the Unnest → inner source to set the
501+
// primary FROM relation, then add the FLATTEN as a
502+
// CROSS JOIN.
546503
self.select_to_sql_recursively(unnest_plan, query, select, relation)?;
547504

548505
let flatten_factor = flatten.build().map_err(|e| {
@@ -559,26 +516,23 @@ impl Unparser<'_> {
559516
from.push_join(cross_join);
560517
select.push_from(from);
561518
} else {
562-
// No existing FROM — create one with just the FLATTEN
563519
let mut twj = TableWithJoinsBuilder::default();
564520
twj.push_join(cross_join);
565521
select.push_from(twj);
566522
}
567523

568524
return Ok(());
569525
}
526+
// Standard UNNEST table factor path (BigQuery, etc.).
570527
if self.dialect.unnest_as_table_factor()
571528
&& unnest_input_type.is_some()
572529
&& user_alias.is_none() // Skip if user alias present — fall through to reconstruct_select_statement which preserves aliases
573-
&& let Some(unnest) = Self::peel_to_unnest(p.input.as_ref())
530+
&& let Some((unnest, unnest_plan)) =
531+
self.peel_to_unnest_with_modifiers(p.input.as_ref(), query)?
574532
&& let Some(unnest_relation) =
575533
self.try_unnest_to_table_factor_sql(unnest)?
576534
{
577535
relation.unnest(unnest_relation);
578-
let unnest_plan = self.apply_transparent_and_find_unnest_plan(
579-
p.input.as_ref(),
580-
query,
581-
)?;
582536
return self.select_to_sql_recursively(
583537
unnest_plan,
584538
query,
@@ -1262,59 +1216,40 @@ impl Unparser<'_> {
12621216
}
12631217
}
12641218

1265-
/// Walk through "transparent" plan nodes (Limit, Sort) to find an Unnest.
1266-
///
1267-
/// The DataFusion optimizer may insert Limit or Sort between the outer
1268-
/// Projection and the Unnest node. These nodes modify result quantity or
1269-
/// ordering but do not change the plan shape for unnest detection. The
1270-
/// normal recursion in [`Self::select_to_sql_recursively`] handles their
1271-
/// SQL rendering (LIMIT/OFFSET/ORDER BY); this helper only needs to
1272-
/// locate the Unnest so the FLATTEN / table-factor code path can fire.
1273-
fn peel_to_unnest(plan: &LogicalPlan) -> Option<&Unnest> {
1274-
match plan {
1275-
LogicalPlan::Unnest(unnest) => Some(unnest),
1276-
LogicalPlan::Limit(limit) => Self::peel_to_unnest(limit.input.as_ref()),
1277-
LogicalPlan::Sort(sort) => Self::peel_to_unnest(sort.input.as_ref()),
1278-
_ => None,
1279-
}
1280-
}
1281-
1282-
/// Walk through "transparent" plan nodes (SubqueryAlias, Limit, Sort) to
1283-
/// find the inner Projection that feeds an Unnest node.
1219+
/// Walk through transparent nodes (SubqueryAlias) to find the inner
1220+
/// Projection that feeds an Unnest node.
12841221
///
1285-
/// The inner Projection is created by the `RecursiveUnnestRewriter` and
1286-
/// contains the array expression that the Unnest operates on. A
1287-
/// `SubqueryAlias` (e.g. from a virtual/passthrough table) may wrap the
1288-
/// Projection; Limit/Sort are also handled for robustness.
1222+
/// The inner Projection is created atomically by the
1223+
/// `RecursiveUnnestRewriter` and contains the array expression that the
1224+
/// Unnest operates on. A `SubqueryAlias` (e.g. from a virtual/passthrough
1225+
/// table) may wrap the Projection.
12891226
fn peel_to_inner_projection(plan: &LogicalPlan) -> Option<&Projection> {
12901227
match plan {
12911228
LogicalPlan::Projection(p) => Some(p),
12921229
LogicalPlan::SubqueryAlias(alias) => {
12931230
Self::peel_to_inner_projection(alias.input.as_ref())
12941231
}
1295-
LogicalPlan::Limit(limit) => {
1296-
Self::peel_to_inner_projection(limit.input.as_ref())
1297-
}
1298-
LogicalPlan::Sort(sort) => {
1299-
Self::peel_to_inner_projection(sort.input.as_ref())
1300-
}
13011232
_ => None,
13021233
}
13031234
}
13041235

13051236
/// Walk through transparent nodes (Limit, Sort) between the outer
13061237
/// Projection and the Unnest, applying their SQL modifiers (LIMIT,
1307-
/// OFFSET, ORDER BY) to the query builder. Returns a reference to the
1308-
/// Unnest `LogicalPlan` node so the caller can recurse into it directly,
1309-
/// bypassing the intermediate handlers that would otherwise create
1310-
/// unwanted derived subqueries.
1311-
fn apply_transparent_and_find_unnest_plan<'a>(
1238+
/// OFFSET, ORDER BY) to the query builder. Returns the `Unnest` node
1239+
/// and a reference to the enclosing `LogicalPlan` for recursion, or
1240+
/// `Ok(None)` if no Unnest is found.
1241+
///
1242+
/// By processing Limit/Sort inline and then recursing into the Unnest
1243+
/// plan directly, we bypass the normal Limit/Sort handlers which would
1244+
/// create unwanted derived subqueries (since `already_projected` is
1245+
/// set at the point this is called).
1246+
fn peel_to_unnest_with_modifiers<'a>(
13121247
&self,
13131248
plan: &'a LogicalPlan,
13141249
query: &mut Option<QueryBuilder>,
1315-
) -> Result<&'a LogicalPlan> {
1250+
) -> Result<Option<(&'a Unnest, &'a LogicalPlan)>> {
13161251
match plan {
1317-
LogicalPlan::Unnest(_) => Ok(plan),
1252+
LogicalPlan::Unnest(unnest) => Ok(Some((unnest, plan))),
13181253
LogicalPlan::Limit(limit) => {
13191254
if let Some(fetch) = &limit.fetch
13201255
&& let Some(q) = query.as_mut()
@@ -1329,7 +1264,7 @@ impl Unparser<'_> {
13291264
value: self.expr_to_sql(skip)?,
13301265
}));
13311266
}
1332-
self.apply_transparent_and_find_unnest_plan(limit.input.as_ref(), query)
1267+
self.peel_to_unnest_with_modifiers(limit.input.as_ref(), query)
13331268
}
13341269
LogicalPlan::Sort(sort) => {
13351270
let Some(query_ref) = query.as_mut() else {
@@ -1344,32 +1279,49 @@ impl Unparser<'_> {
13441279
))));
13451280
}
13461281
query_ref.order_by(self.sorts_to_sql(&sort.expr)?);
1347-
self.apply_transparent_and_find_unnest_plan(sort.input.as_ref(), query)
1348-
}
1349-
other => {
1350-
internal_err!("Unexpected node between Projection and Unnest: {other:?}")
1282+
self.peel_to_unnest_with_modifiers(sort.input.as_ref(), query)
13511283
}
1284+
_ => Ok(None),
13521285
}
13531286
}
13541287

1355-
/// Try to find the placeholder column name generated by `RecursiveUnnestRewriter`.
1288+
/// Search an expression for an unnest placeholder column reference.
13561289
///
1357-
/// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`,
1358-
/// it means it is a scalar column, return [UnnestInputType::Scalar].
1359-
/// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")`,
1360-
/// it means it is an outer reference column, return [UnnestInputType::OuterReference].
1361-
/// - If the column is not a placeholder column, return [None].
1290+
/// Returns both the [`UnnestInputType`] and whether the placeholder is
1291+
/// "bare" (the expression IS the placeholder, modulo aliases) or
1292+
/// "wrapped" (the placeholder is inside a function call or other
1293+
/// transformation).
13621294
///
1363-
/// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
1364-
fn check_unnest_placeholder_with_outer_ref(expr: &Expr) -> Option<UnnestInputType> {
1365-
// Peel through all Alias layers to find the inner Column.
1366-
// The expression may have multiple aliases, e.g.:
1367-
// Alias("items", Alias("UNNEST(...)", Column("__unnest_placeholder(...)")))
1295+
/// Examples:
1296+
/// - `Alias("item", Column("__unnest_placeholder(...)"))` → `Some((Scalar, true))`
1297+
/// - `Alias("t", Cast(Column("__unnest_placeholder(...)"), Int64))` → `Some((Scalar, false))`
1298+
/// - `Column("unrelated")` → `None`
1299+
fn find_unnest_placeholder(expr: &Expr) -> Option<(UnnestInputType, bool)> {
1300+
// Fast path: check if the expression IS the placeholder (peel aliases).
13681301
let mut inner = expr;
13691302
while let Expr::Alias(Alias { expr, .. }) = inner {
13701303
inner = expr.as_ref();
13711304
}
1372-
if let Expr::Column(Column { name, .. }) = inner
1305+
if let Some(t) = Self::classify_placeholder_column(inner) {
1306+
return Some((t, true));
1307+
}
1308+
// Slow path: walk the full expression tree.
1309+
let mut result = None;
1310+
let _ = expr.apply(|e| {
1311+
if let Some(t) = Self::classify_placeholder_column(e) {
1312+
result = Some(t);
1313+
return Ok(TreeNodeRecursion::Stop);
1314+
}
1315+
Ok(TreeNodeRecursion::Continue)
1316+
});
1317+
result.map(|t| (t, false))
1318+
}
1319+
1320+
/// If `expr` is a `Column` whose name starts with `__unnest_placeholder`,
1321+
/// classify it as [`UnnestInputType::OuterReference`] or
1322+
/// [`UnnestInputType::Scalar`].
1323+
fn classify_placeholder_column(expr: &Expr) -> Option<UnnestInputType> {
1324+
if let Expr::Column(Column { name, .. }) = expr
13731325
&& let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER)
13741326
{
13751327
if prefix.starts_with(&format!("({OUTER_REFERENCE_COLUMN_PREFIX}(")) {
@@ -1380,39 +1332,6 @@ impl Unparser<'_> {
13801332
None
13811333
}
13821334

1383-
/// Recursively search the expression tree for an unnest placeholder column.
1384-
///
1385-
/// Unlike [`Self::check_unnest_placeholder_with_outer_ref`] which only
1386-
/// matches when the placeholder IS the expression (modulo aliases), this
1387-
/// function finds placeholders buried inside function calls or other
1388-
/// transformations. For example:
1389-
///
1390-
/// ```text
1391-
/// Alias("target_type",
1392-
/// json_as_text(
1393-
/// Column("__unnest_placeholder(...)"), ← found here
1394-
/// Literal("type")))
1395-
/// ```
1396-
fn find_unnest_placeholder_in_expr(expr: &Expr) -> Option<UnnestInputType> {
1397-
let mut result = None;
1398-
let _ = expr.apply(|e| {
1399-
if let Expr::Column(Column { name, .. }) = e
1400-
&& let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER)
1401-
{
1402-
result = if prefix
1403-
.starts_with(&format!("({OUTER_REFERENCE_COLUMN_PREFIX}("))
1404-
{
1405-
Some(UnnestInputType::OuterReference)
1406-
} else {
1407-
Some(UnnestInputType::Scalar)
1408-
};
1409-
return Ok(TreeNodeRecursion::Stop);
1410-
}
1411-
Ok(TreeNodeRecursion::Continue)
1412-
});
1413-
result
1414-
}
1415-
14161335
/// Extract the outermost user-provided alias from an unnest expression.
14171336
/// Returns `None` if the outermost alias is DataFusion's internal display
14181337
/// name (e.g. `UNNEST(make_array(...))`), or if there is no alias at all.
@@ -1463,7 +1382,7 @@ impl Unparser<'_> {
14631382
user_alias: Option<&str>,
14641383
) -> ast::SelectItem {
14651384
let compound = ast::Expr::CompoundIdentifier(vec![
1466-
self.new_ident_without_quote_style(flatten_alias.to_string()),
1385+
self.new_ident_quoted_if_needs(flatten_alias.to_string()),
14671386
Ident::with_quote('"', "VALUE"),
14681387
]);
14691388
match user_alias {

0 commit comments

Comments
 (0)