Skip to content

Commit ec941ff

Browse files
committed
fix: cumulative prune only without WHERE to avoid under-returning rows
1 parent 583f4db commit ec941ff

1 file changed

Lines changed: 15 additions & 4 deletions

File tree

datafusion/datasource-parquet/src/opener.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,11 +1232,22 @@ impl RowGroupsPrunedParquetOpen {
12321232

12331233
// TopK cumulative pruning: after reorder + reverse, the RGs are in
12341234
// optimal order. Accumulate rows from the front until >= K, prune rest.
1235-
// For sort pushdown: always safe (sorted = non-overlapping).
1236-
// For non-sort-pushdown: safe only if reordered RGs are non-overlapping
1237-
// (verified by checking max[i] <= min[i+1] in the reordered sequence).
1235+
//
1236+
// Only safe when predicate is DynamicFilter-only (no WHERE clause).
1237+
// With WHERE, raw num_rows overestimates qualifying rows — cumulative
1238+
// prune may keep too few RGs, returning fewer than K results.
1239+
//
1240+
// Additionally requires either sort pushdown (guaranteed non-overlapping)
1241+
// or verified non-overlap from statistics.
1242+
let is_pure_dynamic_filter = prepared.predicate.as_ref().is_some_and(|p| {
1243+
let any_ref: &dyn std::any::Any = p.as_ref();
1244+
any_ref
1245+
.downcast_ref::<DynamicFilterPhysicalExpr>()
1246+
.is_some()
1247+
});
12381248
let has_sort_pushdown = prepared.sort_order_for_reorder.is_some();
1239-
if let Some(predicate) = &prepared.predicate
1249+
if is_pure_dynamic_filter
1250+
&& let Some(predicate) = &prepared.predicate
12401251
&& let Some(df) = find_dynamic_filter(predicate)
12411252
&& let Some(fetch) = df.fetch()
12421253
&& (has_sort_pushdown

0 commit comments

Comments
 (0)