File tree Expand file tree Collapse file tree
datafusion/datasource-parquet/src Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1232,11 +1232,22 @@ impl RowGroupsPrunedParquetOpen {
12321232
12331233 // TopK cumulative pruning: after reorder + reverse, the RGs are in
12341234 // optimal order. Accumulate rows from the front until >= K, prune rest.
1235- // For sort pushdown: always safe (sorted = non-overlapping).
1236- // For non-sort-pushdown: safe only if reordered RGs are non-overlapping
1237- // (verified by checking max[i] <= min[i+1] in the reordered sequence).
1235+ //
1236+ // Only safe when predicate is DynamicFilter-only (no WHERE clause).
1237+ // With WHERE, raw num_rows overestimates qualifying rows — cumulative
1238+ // prune may keep too few RGs, returning fewer than K results.
1239+ //
1240+ // Additionally requires either sort pushdown (guaranteed non-overlapping)
1241+ // or verified non-overlap from statistics.
1242+ let is_pure_dynamic_filter = prepared. predicate . as_ref ( ) . is_some_and ( |p| {
1243+ let any_ref: & dyn std:: any:: Any = p. as_ref ( ) ;
1244+ any_ref
1245+ . downcast_ref :: < DynamicFilterPhysicalExpr > ( )
1246+ . is_some ( )
1247+ } ) ;
12381248 let has_sort_pushdown = prepared. sort_order_for_reorder . is_some ( ) ;
1239- if let Some ( predicate) = & prepared. predicate
1249+ if is_pure_dynamic_filter
1250+ && let Some ( predicate) = & prepared. predicate
12401251 && let Some ( df) = find_dynamic_filter ( predicate)
12411252 && let Some ( fetch) = df. fetch ( )
12421253 && ( has_sort_pushdown
You can’t perform that action at this time.
0 commit comments