Skip to content

Commit 416c830

Browse files
adriangbclaude
andcommitted
fix: handle complex projections in ordering validation
Previously, `get_projected_output_ordering` used `ordered_column_indices_from_projection` which was all-or-nothing: if any expression in the projection wasn't a simple Column, it returned None for the entire projection — even if the sort columns themselves were simple column refs. Replace it with `resolve_sort_column_projection` which only requires sort-column positions to resolve to simple Columns. Each ordering is now independently evaluated: orderings on simple column refs get validated with statistics even when other projection expressions are complex. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 18af518 commit 416c830

1 file changed

Lines changed: 56 additions & 51 deletions

File tree

datafusion/datasource/src/file_scan_config.rs

Lines changed: 56 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,19 +1341,33 @@ impl DisplayAs for FileScanConfig {
13411341
}
13421342
}
13431343

1344-
/// Get the indices of columns in a projection if the projection is a simple
1345-
/// list of columns.
1346-
/// If there are any expressions other than columns, returns None.
1347-
fn ordered_column_indices_from_projection(
1344+
/// Build a projection index mapping for the sort columns in `ordering`.
1345+
///
1346+
/// Returns a `Vec<usize>` of the same length as `projection`, where each entry
1347+
/// maps a projected-schema column index to the corresponding table-schema column
1348+
/// index. Only the entries referenced by sort columns in `ordering` are required
1349+
/// to be simple `Column` expressions; non-sort-column positions are filled with a
1350+
/// placeholder (0) since they will never be accessed by `MinMaxStatistics`.
1351+
///
1352+
/// Returns `None` if any sort column is not a simple `Column` reference in the
1353+
/// projected ordering, or if its corresponding projection expression is not a
1354+
/// simple `Column`.
1355+
fn resolve_sort_column_projection(
1356+
ordering: &LexOrdering,
13481357
projection: &ProjectionExprs,
13491358
) -> Option<Vec<usize>> {
1350-
projection
1351-
.expr_iter()
1352-
.map(|e| {
1353-
let index = e.as_any().downcast_ref::<Column>()?.index();
1354-
Some(index)
1355-
})
1356-
.collect::<Option<Vec<usize>>>()
1359+
let proj_slice = projection.as_ref();
1360+
let mut indices = vec![0usize; proj_slice.len()];
1361+
1362+
for sort_expr in ordering.iter() {
1363+
let col = sort_expr.expr.as_any().downcast_ref::<Column>()?;
1364+
let proj_idx = col.index();
1365+
let proj_expr = proj_slice.get(proj_idx)?;
1366+
let table_col = proj_expr.expr.as_any().downcast_ref::<Column>()?;
1367+
indices[proj_idx] = table_col.index();
1368+
}
1369+
1370+
Some(indices)
13571371
}
13581372

13591373
/// Check whether a given ordering is valid for all file groups by verifying
@@ -1467,47 +1481,38 @@ fn get_projected_output_ordering(
14671481
let projected_orderings =
14681482
project_orderings(&base_config.output_ordering, projected_schema);
14691483

1470-
let indices = base_config
1471-
.file_source
1472-
.projection()
1473-
.as_ref()
1474-
.map(|p| ordered_column_indices_from_projection(p));
1475-
1476-
match indices {
1477-
Some(Some(indices)) => {
1478-
// Simple column projection — validate with statistics
1479-
validate_orderings(
1480-
&projected_orderings,
1481-
projected_schema,
1482-
&base_config.file_groups,
1483-
Some(indices.as_slice()),
1484-
)
1485-
}
1486-
None => {
1487-
// No projection — validate with statistics (no remapping needed)
1488-
validate_orderings(
1489-
&projected_orderings,
1490-
projected_schema,
1491-
&base_config.file_groups,
1492-
None,
1493-
)
1494-
}
1495-
Some(None) => {
1496-
// Complex projection (expressions, not simple columns) — can't
1497-
// determine column indices for statistics. Still valid if all
1498-
// file groups have at most one file.
1499-
if base_config.file_groups.iter().all(|g| g.len() <= 1) {
1500-
projected_orderings
1501-
} else {
1502-
debug!(
1503-
"Skipping specified output orderings. \
1504-
Some file groups couldn't be determined to be sorted: {:?}",
1505-
base_config.file_groups
1506-
);
1507-
vec![]
1484+
let projection = base_config.file_source.projection();
1485+
1486+
projected_orderings
1487+
.into_iter()
1488+
.filter(|ordering| match projection.as_ref() {
1489+
None => {
1490+
// No projection — validate directly with statistics
1491+
is_ordering_valid_for_file_groups(
1492+
&base_config.file_groups,
1493+
ordering,
1494+
projected_schema,
1495+
None,
1496+
)
15081497
}
1509-
}
1510-
}
1498+
Some(proj) => match resolve_sort_column_projection(ordering, proj) {
1499+
Some(indices) => {
1500+
// All sort columns resolved — validate with statistics
1501+
is_ordering_valid_for_file_groups(
1502+
&base_config.file_groups,
1503+
ordering,
1504+
projected_schema,
1505+
Some(&indices),
1506+
)
1507+
}
1508+
None => {
1509+
// Some sort column is a complex expression — can't
1510+
// look up statistics. Fall back to single-file check.
1511+
base_config.file_groups.iter().all(|g| g.len() <= 1)
1512+
}
1513+
},
1514+
})
1515+
.collect()
15111516
}
15121517

15131518
/// Convert type to a type suitable for use as a `ListingTable`

0 commit comments

Comments
 (0)