Skip to content

Commit c89e7ba

Browse files
adriangbclaude
andcommitted
fix: handle complex projections in ordering validation
Previously, `get_projected_output_ordering` used `ordered_column_indices_from_projection` which was all-or-nothing: if any expression in the projection wasn't a simple Column, it returned None for the entire projection — even if the sort columns themselves were simple column refs. Replace it with `resolve_sort_column_projection` which only requires sort-column positions to resolve to simple Columns. Each ordering is now independently evaluated: orderings on simple column refs get validated with statistics even when other projection expressions are complex. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c919054 commit c89e7ba

1 file changed

Lines changed: 56 additions & 51 deletions

File tree

datafusion/datasource/src/file_scan_config.rs

Lines changed: 56 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,19 +1331,33 @@ impl DisplayAs for FileScanConfig {
13311331
}
13321332
}
13331333

1334-
/// Get the indices of columns in a projection if the projection is a simple
1335-
/// list of columns.
1336-
/// If there are any expressions other than columns, returns None.
1337-
fn ordered_column_indices_from_projection(
1334+
/// Build a projection index mapping for the sort columns in `ordering`.
1335+
///
1336+
/// Returns a `Vec<usize>` of the same length as `projection`, where each entry
1337+
/// maps a projected-schema column index to the corresponding table-schema column
1338+
/// index. Only the entries referenced by sort columns in `ordering` are required
1339+
/// to be simple `Column` expressions; non-sort-column positions are filled with a
1340+
/// placeholder (0) since they will never be accessed by `MinMaxStatistics`.
1341+
///
1342+
/// Returns `None` if any sort column is not a simple `Column` reference in the
1343+
/// projected ordering, or if its corresponding projection expression is not a
1344+
/// simple `Column`.
1345+
fn resolve_sort_column_projection(
1346+
ordering: &LexOrdering,
13381347
projection: &ProjectionExprs,
13391348
) -> Option<Vec<usize>> {
1340-
projection
1341-
.expr_iter()
1342-
.map(|e| {
1343-
let index = e.as_any().downcast_ref::<Column>()?.index();
1344-
Some(index)
1345-
})
1346-
.collect::<Option<Vec<usize>>>()
1349+
let proj_slice = projection.as_ref();
1350+
let mut indices = vec![0usize; proj_slice.len()];
1351+
1352+
for sort_expr in ordering.iter() {
1353+
let col = sort_expr.expr.as_any().downcast_ref::<Column>()?;
1354+
let proj_idx = col.index();
1355+
let proj_expr = proj_slice.get(proj_idx)?;
1356+
let table_col = proj_expr.expr.as_any().downcast_ref::<Column>()?;
1357+
indices[proj_idx] = table_col.index();
1358+
}
1359+
1360+
Some(indices)
13471361
}
13481362

13491363
/// Check whether a given ordering is valid for all file groups by verifying
@@ -1457,47 +1471,38 @@ fn get_projected_output_ordering(
14571471
let projected_orderings =
14581472
project_orderings(&base_config.output_ordering, projected_schema);
14591473

1460-
let indices = base_config
1461-
.file_source
1462-
.projection()
1463-
.as_ref()
1464-
.map(|p| ordered_column_indices_from_projection(p));
1465-
1466-
match indices {
1467-
Some(Some(indices)) => {
1468-
// Simple column projection — validate with statistics
1469-
validate_orderings(
1470-
&projected_orderings,
1471-
projected_schema,
1472-
&base_config.file_groups,
1473-
Some(indices.as_slice()),
1474-
)
1475-
}
1476-
None => {
1477-
// No projection — validate with statistics (no remapping needed)
1478-
validate_orderings(
1479-
&projected_orderings,
1480-
projected_schema,
1481-
&base_config.file_groups,
1482-
None,
1483-
)
1484-
}
1485-
Some(None) => {
1486-
// Complex projection (expressions, not simple columns) — can't
1487-
// determine column indices for statistics. Still valid if all
1488-
// file groups have at most one file.
1489-
if base_config.file_groups.iter().all(|g| g.len() <= 1) {
1490-
projected_orderings
1491-
} else {
1492-
debug!(
1493-
"Skipping specified output orderings. \
1494-
Some file groups couldn't be determined to be sorted: {:?}",
1495-
base_config.file_groups
1496-
);
1497-
vec![]
1474+
let projection = base_config.file_source.projection();
1475+
1476+
projected_orderings
1477+
.into_iter()
1478+
.filter(|ordering| match projection.as_ref() {
1479+
None => {
1480+
// No projection — validate directly with statistics
1481+
is_ordering_valid_for_file_groups(
1482+
&base_config.file_groups,
1483+
ordering,
1484+
projected_schema,
1485+
None,
1486+
)
14981487
}
1499-
}
1500-
}
1488+
Some(proj) => match resolve_sort_column_projection(ordering, proj) {
1489+
Some(indices) => {
1490+
// All sort columns resolved — validate with statistics
1491+
is_ordering_valid_for_file_groups(
1492+
&base_config.file_groups,
1493+
ordering,
1494+
projected_schema,
1495+
Some(&indices),
1496+
)
1497+
}
1498+
None => {
1499+
// Some sort column is a complex expression — can't
1500+
// look up statistics. Fall back to single-file check.
1501+
base_config.file_groups.iter().all(|g| g.len() <= 1)
1502+
}
1503+
},
1504+
})
1505+
.collect()
15011506
}
15021507

15031508
/// Convert type to a type suitable for use as a `ListingTable`

0 commit comments

Comments
 (0)