From ebe07a1dc0c19ddbdb9dd52c9658f1cbaa591bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Sat, 4 Apr 2026 22:44:05 +0200 Subject: [PATCH] Skip per-row filter evaluation when all row groups are fully matched When statistics prove that every remaining row group fully satisfies the filter predicate, skip attaching the row filter to the Parquet decoder entirely. This avoids unnecessary per-row filter evaluation for queries like `WHERE col <> 0` or `WHERE col <> ''` when min/max statistics show the filter is trivially true for all row groups. Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/datasource-parquet/src/opener.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs index 6621706c35c81..c380a0d4b1c9d 100644 --- a/datafusion/datasource-parquet/src/opener.rs +++ b/datafusion/datasource-parquet/src/opener.rs @@ -1048,6 +1048,15 @@ impl RowGroupsPrunedParquetOpen { row_groups.prune_by_limit(limit, rg_metadata, &prepared.file_metrics); } + // If all remaining row groups are fully matched by the predicate + // (i.e., statistics prove every row satisfies the filter), skip + // per-row filter evaluation entirely. + let all_fully_matched = row_filter.is_some() + && row_groups + .row_group_indexes() + .all(|idx| row_groups.is_fully_matched()[idx]); + let row_filter = if all_fully_matched { None } else { row_filter }; + // Page index pruning: if all data on individual pages can // be ruled using page metadata, rows from other columns // with that range can be skipped as well.