@@ -751,15 +751,30 @@ config_namespace! {
751751 /// parquet reader setting. 0 means no caching.
752752 pub max_predicate_cache_size: Option <usize >, default = None
753753
754- /// (reading) Minimum filter effectiveness threshold for adaptive filter
755- /// pushdown.
756- /// Only filters that filter out at least this fraction of rows will be
757- /// promoted to row filters during adaptive filter pushdown.
758- /// A value of 1.0 means only filters that filter out all rows will be
759- /// promoted. A value of 0.0 means all filters will be promoted.
760- /// Because there can be a high I/O cost to pushing down ineffective filters,
761- /// recommended values are in the range [0.8, 0.95], depending on random I/0 costs.
762- pub filter_effectiveness_threshold: f64 , default = 0.8
754+ /// (reading) Minimum bytes/sec throughput for adaptive filter pushdown.
755+ /// Filters that achieve at least this throughput (bytes_saved / eval_time)
756+ /// are promoted to row filters.
757+ /// f64::INFINITY (default) = no filters promoted (feature disabled).
758+ /// 0.0 = all filters pushed as row filters (no adaptive logic).
759+ pub filter_pushdown_min_bytes_per_sec: f64 , default = f64 :: INFINITY
760+
761+ /// (reading) Correlation ratio threshold for grouping filters.
762+ /// The ratio is P(A ∧ B) / (P(A) * P(B)):
763+ /// 1.0 = independent (keep separate for late materialization benefit)
764+ /// 1.5 = filters co-pass 50% more often than chance (default threshold)
765+ /// 2.0 = filters co-pass twice as often as chance (conservative)
766+ /// Higher values = less grouping = more late materialization, more overhead.
767+ /// Lower values = more grouping = less overhead, less late materialization.
768+ /// Set to f64::MAX to disable grouping entirely.
769+ pub filter_correlation_threshold: f64 , default = 1.5
770+
771+ /// (reading) Minimum rows of post-scan evaluation before statistics-based
772+ /// optimization activates. During collection, all filters are evaluated
773+ /// as post-scan to gather accurate marginal and joint selectivity statistics.
774+ /// Used for BOTH individual filter effectiveness decisions AND correlation-
775+ /// based grouping. Larger values = more accurate estimates, longer collection.
776+ /// Set to 0 to disable the collection phase entirely.
777+ pub filter_statistics_collection_min_rows: u64 , default = 10_000
763778
764779 // The following options affect writing to parquet files
765780 // and map to parquet::file::properties::WriterProperties
0 commit comments