Skip to content

Commit 203a8cf

Browse files
adriangbclaude
andcommitted
fix: handle complex projections in ordering validation
Previously, `get_projected_output_ordering` used `ordered_column_indices_from_projection` which was all-or-nothing: if any expression in the projection wasn't a simple Column, it returned None for the entire projection — even if the sort columns themselves were simple column refs. Replace it with `resolve_sort_column_projection` which only requires sort-column positions to resolve to simple Columns. Each ordering is now independently evaluated: orderings on simple column refs get validated with statistics even when other projection expressions are complex. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent bfc012e commit 203a8cf

1 file changed

Lines changed: 56 additions & 51 deletions

File tree

datafusion/datasource/src/file_scan_config.rs

Lines changed: 56 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,19 +1319,33 @@ impl DisplayAs for FileScanConfig {
13191319
}
13201320
}
13211321

1322-
/// Get the indices of columns in a projection if the projection is a simple
1323-
/// list of columns.
1324-
/// If there are any expressions other than columns, returns None.
1325-
fn ordered_column_indices_from_projection(
1322+
/// Build a projection index mapping for the sort columns in `ordering`.
1323+
///
1324+
/// Returns a `Vec<usize>` of the same length as `projection`, where each entry
1325+
/// maps a projected-schema column index to the corresponding table-schema column
1326+
/// index. Only the entries referenced by sort columns in `ordering` are required
1327+
/// to be simple `Column` expressions; non-sort-column positions are filled with a
1328+
/// placeholder (0) since they will never be accessed by `MinMaxStatistics`.
1329+
///
1330+
/// Returns `None` if any sort column is not a simple `Column` reference in the
1331+
/// projected ordering, or if its corresponding projection expression is not a
1332+
/// simple `Column`.
1333+
fn resolve_sort_column_projection(
1334+
ordering: &LexOrdering,
13261335
projection: &ProjectionExprs,
13271336
) -> Option<Vec<usize>> {
1328-
projection
1329-
.expr_iter()
1330-
.map(|e| {
1331-
let index = e.as_any().downcast_ref::<Column>()?.index();
1332-
Some(index)
1333-
})
1334-
.collect::<Option<Vec<usize>>>()
1337+
let proj_slice = projection.as_ref();
1338+
let mut indices = vec![0usize; proj_slice.len()];
1339+
1340+
for sort_expr in ordering.iter() {
1341+
let col = sort_expr.expr.as_any().downcast_ref::<Column>()?;
1342+
let proj_idx = col.index();
1343+
let proj_expr = proj_slice.get(proj_idx)?;
1344+
let table_col = proj_expr.expr.as_any().downcast_ref::<Column>()?;
1345+
indices[proj_idx] = table_col.index();
1346+
}
1347+
1348+
Some(indices)
13351349
}
13361350

13371351
/// Check whether a given ordering is valid for all file groups by verifying
@@ -1445,47 +1459,38 @@ fn get_projected_output_ordering(
14451459
let projected_orderings =
14461460
project_orderings(&base_config.output_ordering, projected_schema);
14471461

1448-
let indices = base_config
1449-
.file_source
1450-
.projection()
1451-
.as_ref()
1452-
.map(|p| ordered_column_indices_from_projection(p));
1453-
1454-
match indices {
1455-
Some(Some(indices)) => {
1456-
// Simple column projection — validate with statistics
1457-
validate_orderings(
1458-
&projected_orderings,
1459-
projected_schema,
1460-
&base_config.file_groups,
1461-
Some(indices.as_slice()),
1462-
)
1463-
}
1464-
None => {
1465-
// No projection — validate with statistics (no remapping needed)
1466-
validate_orderings(
1467-
&projected_orderings,
1468-
projected_schema,
1469-
&base_config.file_groups,
1470-
None,
1471-
)
1472-
}
1473-
Some(None) => {
1474-
// Complex projection (expressions, not simple columns) — can't
1475-
// determine column indices for statistics. Still valid if all
1476-
// file groups have at most one file.
1477-
if base_config.file_groups.iter().all(|g| g.len() <= 1) {
1478-
projected_orderings
1479-
} else {
1480-
debug!(
1481-
"Skipping specified output orderings. \
1482-
Some file groups couldn't be determined to be sorted: {:?}",
1483-
base_config.file_groups
1484-
);
1485-
vec![]
1462+
let projection = base_config.file_source.projection();
1463+
1464+
projected_orderings
1465+
.into_iter()
1466+
.filter(|ordering| match projection.as_ref() {
1467+
None => {
1468+
// No projection — validate directly with statistics
1469+
is_ordering_valid_for_file_groups(
1470+
&base_config.file_groups,
1471+
ordering,
1472+
projected_schema,
1473+
None,
1474+
)
14861475
}
1487-
}
1488-
}
1476+
Some(proj) => match resolve_sort_column_projection(ordering, proj) {
1477+
Some(indices) => {
1478+
// All sort columns resolved — validate with statistics
1479+
is_ordering_valid_for_file_groups(
1480+
&base_config.file_groups,
1481+
ordering,
1482+
projected_schema,
1483+
Some(&indices),
1484+
)
1485+
}
1486+
None => {
1487+
// Some sort column is a complex expression — can't
1488+
// look up statistics. Fall back to single-file check.
1489+
base_config.file_groups.iter().all(|g| g.len() <= 1)
1490+
}
1491+
},
1492+
})
1493+
.collect()
14891494
}
14901495

14911496
/// Convert type to a type suitable for use as a `ListingTable`

0 commit comments

Comments
 (0)