@@ -421,25 +421,16 @@ impl Unparser<'_> {
421421 }
422422
423423 // Projection can be top-level plan for unnest relation.
424- // The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have
425- // only one expression, which is the placeholder column generated by the rewriter.
426- //
427- // Two cases:
428- // - "bare": the expression IS the placeholder (+ aliases):
429- // `__unnest_placeholder(...) AS item`
430- // - "wrapped": the placeholder is inside a function call:
431- // `json_as_text(__unnest_placeholder(...), 'type') AS target_type`
424+ // The projection generated by the `RecursiveUnnestRewriter`
425+ // will have exactly one expression referencing the unnest
426+ // placeholder column. The placeholder may be at the top level
427+ // ("bare": `__unnest_placeholder(...) AS item`) or wrapped
428+ // inside a function call ("wrapped":
429+ // `json_as_text(__unnest_placeholder(...), 'type') AS t`).
432430 let ( unnest_input_type, placeholder_is_bare) = if p. expr . len ( ) == 1 {
433- if let Some ( t) =
434- Self :: check_unnest_placeholder_with_outer_ref ( & p. expr [ 0 ] )
435- {
436- ( Some ( t) , true )
437- } else if let Some ( t) =
438- Self :: find_unnest_placeholder_in_expr ( & p. expr [ 0 ] )
439- {
440- ( Some ( t) , false )
441- } else {
442- ( None , false )
431+ match Self :: find_unnest_placeholder ( & p. expr [ 0 ] ) {
432+ Some ( ( t, is_bare) ) => ( Some ( t) , is_bare) ,
433+ None => ( None , false ) ,
443434 }
444435 } else {
445436 ( None , false )
@@ -451,16 +442,19 @@ impl Unparser<'_> {
451442 } else {
452443 None
453444 } ;
445+ // Snowflake LATERAL FLATTEN path.
446+ // `peel_to_unnest_with_modifiers` walks through any
447+ // intermediate Limit/Sort nodes, applies their modifiers to
448+ // the query, and returns the Unnest + the LogicalPlan ref to
449+ // recurse into (bypassing the normal Limit/Sort handlers that
450+ // would create a derived subquery).
454451 if self . dialect . unnest_as_lateral_flatten ( )
455452 && unnest_input_type. is_some ( )
456- && let Some ( unnest) = Self :: peel_to_unnest ( p. input . as_ref ( ) )
457- && let Some ( flatten_relation) =
453+ && let Some ( ( unnest, unnest_plan) ) =
454+ self . peel_to_unnest_with_modifiers ( p. input . as_ref ( ) , query) ?
455+ && let Some ( flatten) =
458456 self . try_unnest_to_lateral_flatten_sql ( unnest) ?
459457 {
460- let alias_name = flatten_relation. alias_name ( ) . to_string ( ) ;
461-
462- // Check if the Unnest source is a real query (subselect)
463- // vs an inline array (EmptyRelation).
464458 let inner_projection =
465459 Self :: peel_to_inner_projection ( unnest. input . as_ref ( ) )
466460 . ok_or_else ( || {
@@ -470,35 +464,30 @@ impl Unparser<'_> {
470464 ) )
471465 } ) ?;
472466
473- // Apply any intermediate Limit/Sort modifiers and get
474- // a reference to the Unnest LogicalPlan node for recursion.
475- // This bypasses the Limit/Sort handlers (which would create
476- // unwanted derived subqueries when already_projected is set).
477- let unnest_plan = self . apply_transparent_and_find_unnest_plan (
478- p. input . as_ref ( ) ,
479- query,
480- ) ?;
467+ // Set the SELECT projection.
468+ if !select. already_projected ( ) {
469+ if placeholder_is_bare {
470+ let value_expr = self . build_flatten_value_select_item (
471+ flatten. alias_name ( ) ,
472+ user_alias. as_deref ( ) ,
473+ ) ;
474+ select. projection ( vec ! [ value_expr] ) ;
475+ } else {
476+ // Composed expression wrapping the placeholder:
477+ // reconstruct_select_statement rewrites the
478+ // placeholder column to `_unnest."VALUE"` via
479+ // unproject_unnest_expr_as_flatten_value.
480+ self . reconstruct_select_statement ( plan, p, select) ?;
481+ }
482+ }
481483
482484 if matches ! (
483485 inner_projection. input. as_ref( ) ,
484486 LogicalPlan :: EmptyRelation ( _)
485487 ) {
486488 // Inline array case (e.g. UNNEST([1,2,3])):
487489 // FLATTEN is the sole FROM source.
488- relation. flatten ( flatten_relation) ;
489-
490- if !select. already_projected ( ) {
491- if placeholder_is_bare {
492- let value_expr = self . build_flatten_value_select_item (
493- & alias_name,
494- user_alias. as_deref ( ) ,
495- ) ;
496- select. projection ( vec ! [ value_expr] ) ;
497- } else {
498- self . reconstruct_select_statement ( plan, p, select) ?;
499- }
500- }
501-
490+ relation. flatten ( flatten) ;
502491 return self . select_to_sql_recursively (
503492 unnest_plan,
504493 query,
@@ -508,41 +497,9 @@ impl Unparser<'_> {
508497 }
509498
510499 // Non-empty source (table, subquery, etc.):
511- // The FLATTEN references columns/expressions from the source.
512- // Convert the inner Projection's expression to SQL for the
513- // FLATTEN INPUT, then store the FLATTEN to be added as a
514- // CROSS JOIN after the source relation is processed.
515- let first_expr = inner_projection. expr . first ( ) . ok_or_else ( || {
516- DataFusionError :: Internal (
517- "Unnest inner projection has no expressions" . to_string ( ) ,
518- )
519- } ) ?;
520- // Strip the __unnest_placeholder alias to get the raw expression
521- let raw_expr = match first_expr {
522- Expr :: Alias ( Alias { expr, .. } ) => expr. as_ref ( ) ,
523- other => other,
524- } ;
525- let input_sql = self . expr_to_sql ( raw_expr) ?;
526-
527- let mut flatten = FlattenRelationBuilder :: default ( ) ;
528- flatten. input_expr ( input_sql) ;
529- flatten. outer ( unnest. options . preserve_nulls ) ;
530-
531- if !select. already_projected ( ) {
532- if placeholder_is_bare {
533- let value_expr = self . build_flatten_value_select_item (
534- & alias_name,
535- user_alias. as_deref ( ) ,
536- ) ;
537- select. projection ( vec ! [ value_expr] ) ;
538- } else {
539- self . reconstruct_select_statement ( plan, p, select) ?;
540- }
541- }
542-
543- // Recurse into the Unnest → inner source to set the primary
544- // relation (table scan, subquery, etc.), then add FLATTEN
545- // as a CROSS JOIN.
500+ // Recurse into the Unnest → inner source to set the
501+ // primary FROM relation, then add the FLATTEN as a
502+ // CROSS JOIN.
546503 self . select_to_sql_recursively ( unnest_plan, query, select, relation) ?;
547504
548505 let flatten_factor = flatten. build ( ) . map_err ( |e| {
@@ -559,26 +516,23 @@ impl Unparser<'_> {
559516 from. push_join ( cross_join) ;
560517 select. push_from ( from) ;
561518 } else {
562- // No existing FROM — create one with just the FLATTEN
563519 let mut twj = TableWithJoinsBuilder :: default ( ) ;
564520 twj. push_join ( cross_join) ;
565521 select. push_from ( twj) ;
566522 }
567523
568524 return Ok ( ( ) ) ;
569525 }
526+ // Standard UNNEST table factor path (BigQuery, etc.).
570527 if self . dialect . unnest_as_table_factor ( )
571528 && unnest_input_type. is_some ( )
572529 && user_alias. is_none ( ) // Skip if user alias present — fall through to reconstruct_select_statement which preserves aliases
573- && let Some ( unnest) = Self :: peel_to_unnest ( p. input . as_ref ( ) )
530+ && let Some ( ( unnest, unnest_plan) ) =
531+ self . peel_to_unnest_with_modifiers ( p. input . as_ref ( ) , query) ?
574532 && let Some ( unnest_relation) =
575533 self . try_unnest_to_table_factor_sql ( unnest) ?
576534 {
577535 relation. unnest ( unnest_relation) ;
578- let unnest_plan = self . apply_transparent_and_find_unnest_plan (
579- p. input . as_ref ( ) ,
580- query,
581- ) ?;
582536 return self . select_to_sql_recursively (
583537 unnest_plan,
584538 query,
@@ -1262,59 +1216,40 @@ impl Unparser<'_> {
12621216 }
12631217 }
12641218
1265- /// Walk through "transparent" plan nodes (Limit, Sort) to find an Unnest.
1266- ///
1267- /// The DataFusion optimizer may insert Limit or Sort between the outer
1268- /// Projection and the Unnest node. These nodes modify result quantity or
1269- /// ordering but do not change the plan shape for unnest detection. The
1270- /// normal recursion in [`Self::select_to_sql_recursively`] handles their
1271- /// SQL rendering (LIMIT/OFFSET/ORDER BY); this helper only needs to
1272- /// locate the Unnest so the FLATTEN / table-factor code path can fire.
1273- fn peel_to_unnest ( plan : & LogicalPlan ) -> Option < & Unnest > {
1274- match plan {
1275- LogicalPlan :: Unnest ( unnest) => Some ( unnest) ,
1276- LogicalPlan :: Limit ( limit) => Self :: peel_to_unnest ( limit. input . as_ref ( ) ) ,
1277- LogicalPlan :: Sort ( sort) => Self :: peel_to_unnest ( sort. input . as_ref ( ) ) ,
1278- _ => None ,
1279- }
1280- }
1281-
1282- /// Walk through "transparent" plan nodes (SubqueryAlias, Limit, Sort) to
1283- /// find the inner Projection that feeds an Unnest node.
1219+ /// Walk through transparent nodes (SubqueryAlias) to find the inner
1220+ /// Projection that feeds an Unnest node.
12841221 ///
1285- /// The inner Projection is created by the `RecursiveUnnestRewriter` and
1286- /// contains the array expression that the Unnest operates on. A
1287- /// `SubqueryAlias` (e.g. from a virtual/passthrough table) may wrap the
1288- /// Projection; Limit/Sort are also handled for robustness .
1222+ /// The inner Projection is created atomically by the
1223+ /// `RecursiveUnnestRewriter` and contains the array expression that the
1224+ /// Unnest operates on. A `SubqueryAlias` (e.g. from a virtual/passthrough
1225+ /// table) may wrap the Projection .
12891226 fn peel_to_inner_projection ( plan : & LogicalPlan ) -> Option < & Projection > {
12901227 match plan {
12911228 LogicalPlan :: Projection ( p) => Some ( p) ,
12921229 LogicalPlan :: SubqueryAlias ( alias) => {
12931230 Self :: peel_to_inner_projection ( alias. input . as_ref ( ) )
12941231 }
1295- LogicalPlan :: Limit ( limit) => {
1296- Self :: peel_to_inner_projection ( limit. input . as_ref ( ) )
1297- }
1298- LogicalPlan :: Sort ( sort) => {
1299- Self :: peel_to_inner_projection ( sort. input . as_ref ( ) )
1300- }
13011232 _ => None ,
13021233 }
13031234 }
13041235
13051236 /// Walk through transparent nodes (Limit, Sort) between the outer
13061237 /// Projection and the Unnest, applying their SQL modifiers (LIMIT,
1307- /// OFFSET, ORDER BY) to the query builder. Returns a reference to the
1308- /// Unnest `LogicalPlan` node so the caller can recurse into it directly,
1309- /// bypassing the intermediate handlers that would otherwise create
1310- /// unwanted derived subqueries.
1311- fn apply_transparent_and_find_unnest_plan < ' a > (
1238+ /// OFFSET, ORDER BY) to the query builder. Returns the `Unnest` node
1239+ /// and a reference to the enclosing `LogicalPlan` for recursion, or
1240+ /// `Ok(None)` if no Unnest is found.
1241+ ///
1242+ /// By processing Limit/Sort inline and then recursing into the Unnest
1243+ /// plan directly, we bypass the normal Limit/Sort handlers which would
1244+ /// create unwanted derived subqueries (since `already_projected` is
1245+ /// set at the point this is called).
1246+ fn peel_to_unnest_with_modifiers < ' a > (
13121247 & self ,
13131248 plan : & ' a LogicalPlan ,
13141249 query : & mut Option < QueryBuilder > ,
1315- ) -> Result < & ' a LogicalPlan > {
1250+ ) -> Result < Option < ( & ' a Unnest , & ' a LogicalPlan ) > > {
13161251 match plan {
1317- LogicalPlan :: Unnest ( _ ) => Ok ( plan) ,
1252+ LogicalPlan :: Unnest ( unnest ) => Ok ( Some ( ( unnest , plan) ) ) ,
13181253 LogicalPlan :: Limit ( limit) => {
13191254 if let Some ( fetch) = & limit. fetch
13201255 && let Some ( q) = query. as_mut ( )
@@ -1329,7 +1264,7 @@ impl Unparser<'_> {
13291264 value : self . expr_to_sql ( skip) ?,
13301265 } ) ) ;
13311266 }
1332- self . apply_transparent_and_find_unnest_plan ( limit. input . as_ref ( ) , query)
1267+ self . peel_to_unnest_with_modifiers ( limit. input . as_ref ( ) , query)
13331268 }
13341269 LogicalPlan :: Sort ( sort) => {
13351270 let Some ( query_ref) = query. as_mut ( ) else {
@@ -1344,32 +1279,49 @@ impl Unparser<'_> {
13441279 ) ) ) ) ;
13451280 }
13461281 query_ref. order_by ( self . sorts_to_sql ( & sort. expr ) ?) ;
1347- self . apply_transparent_and_find_unnest_plan ( sort. input . as_ref ( ) , query)
1348- }
1349- other => {
1350- internal_err ! ( "Unexpected node between Projection and Unnest: {other:?}" )
1282+ self . peel_to_unnest_with_modifiers ( sort. input . as_ref ( ) , query)
13511283 }
1284+ _ => Ok ( None ) ,
13521285 }
13531286 }
13541287
1355- /// Try to find the placeholder column name generated by `RecursiveUnnestRewriter` .
1288+ /// Search an expression for an unnest placeholder column reference .
13561289 ///
1357- /// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`,
1358- /// it means it is a scalar column, return [UnnestInputType::Scalar].
1359- /// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")`,
1360- /// it means it is an outer reference column, return [UnnestInputType::OuterReference].
1361- /// - If the column is not a placeholder column, return [None].
1290+ /// Returns both the [`UnnestInputType`] and whether the placeholder is
1291+ /// "bare" (the expression IS the placeholder, modulo aliases) or
1292+ /// "wrapped" (the placeholder is inside a function call or other
1293+ /// transformation).
13621294 ///
1363- /// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
1364- fn check_unnest_placeholder_with_outer_ref ( expr : & Expr ) -> Option < UnnestInputType > {
1365- // Peel through all Alias layers to find the inner Column.
1366- // The expression may have multiple aliases, e.g.:
1367- // Alias("items", Alias("UNNEST(...)", Column("__unnest_placeholder(...)")))
1295+ /// Examples:
1296+ /// - `Alias("item", Column("__unnest_placeholder(...)"))` → `Some((Scalar, true))`
1297+ /// - `Alias("t", Cast(Column("__unnest_placeholder(...)"), Int64))` → `Some((Scalar, false))`
1298+ /// - `Column("unrelated")` → `None`
1299+ fn find_unnest_placeholder ( expr : & Expr ) -> Option < ( UnnestInputType , bool ) > {
1300+ // Fast path: check if the expression IS the placeholder (peel aliases).
13681301 let mut inner = expr;
13691302 while let Expr :: Alias ( Alias { expr, .. } ) = inner {
13701303 inner = expr. as_ref ( ) ;
13711304 }
1372- if let Expr :: Column ( Column { name, .. } ) = inner
1305+ if let Some ( t) = Self :: classify_placeholder_column ( inner) {
1306+ return Some ( ( t, true ) ) ;
1307+ }
1308+ // Slow path: walk the full expression tree.
1309+ let mut result = None ;
1310+ let _ = expr. apply ( |e| {
1311+ if let Some ( t) = Self :: classify_placeholder_column ( e) {
1312+ result = Some ( t) ;
1313+ return Ok ( TreeNodeRecursion :: Stop ) ;
1314+ }
1315+ Ok ( TreeNodeRecursion :: Continue )
1316+ } ) ;
1317+ result. map ( |t| ( t, false ) )
1318+ }
1319+
1320+ /// If `expr` is a `Column` whose name starts with `__unnest_placeholder`,
1321+ /// classify it as [`UnnestInputType::OuterReference`] or
1322+ /// [`UnnestInputType::Scalar`].
1323+ fn classify_placeholder_column ( expr : & Expr ) -> Option < UnnestInputType > {
1324+ if let Expr :: Column ( Column { name, .. } ) = expr
13731325 && let Some ( prefix) = name. strip_prefix ( UNNEST_PLACEHOLDER )
13741326 {
13751327 if prefix. starts_with ( & format ! ( "({OUTER_REFERENCE_COLUMN_PREFIX}(" ) ) {
@@ -1380,39 +1332,6 @@ impl Unparser<'_> {
13801332 None
13811333 }
13821334
1383- /// Recursively search the expression tree for an unnest placeholder column.
1384- ///
1385- /// Unlike [`Self::check_unnest_placeholder_with_outer_ref`] which only
1386- /// matches when the placeholder IS the expression (modulo aliases), this
1387- /// function finds placeholders buried inside function calls or other
1388- /// transformations. For example:
1389- ///
1390- /// ```text
1391- /// Alias("target_type",
1392- /// json_as_text(
1393- /// Column("__unnest_placeholder(...)"), ← found here
1394- /// Literal("type")))
1395- /// ```
1396- fn find_unnest_placeholder_in_expr ( expr : & Expr ) -> Option < UnnestInputType > {
1397- let mut result = None ;
1398- let _ = expr. apply ( |e| {
1399- if let Expr :: Column ( Column { name, .. } ) = e
1400- && let Some ( prefix) = name. strip_prefix ( UNNEST_PLACEHOLDER )
1401- {
1402- result = if prefix
1403- . starts_with ( & format ! ( "({OUTER_REFERENCE_COLUMN_PREFIX}(" ) )
1404- {
1405- Some ( UnnestInputType :: OuterReference )
1406- } else {
1407- Some ( UnnestInputType :: Scalar )
1408- } ;
1409- return Ok ( TreeNodeRecursion :: Stop ) ;
1410- }
1411- Ok ( TreeNodeRecursion :: Continue )
1412- } ) ;
1413- result
1414- }
1415-
14161335 /// Extract the outermost user-provided alias from an unnest expression.
14171336 /// Returns `None` if the outermost alias is DataFusion's internal display
14181337 /// name (e.g. `UNNEST(make_array(...))`), or if there is no alias at all.
@@ -1463,7 +1382,7 @@ impl Unparser<'_> {
14631382 user_alias : Option < & str > ,
14641383 ) -> ast:: SelectItem {
14651384 let compound = ast:: Expr :: CompoundIdentifier ( vec ! [
1466- self . new_ident_without_quote_style ( flatten_alias. to_string( ) ) ,
1385+ self . new_ident_quoted_if_needs ( flatten_alias. to_string( ) ) ,
14671386 Ident :: with_quote( '"' , "VALUE" ) ,
14681387 ] ) ;
14691388 match user_alias {
0 commit comments