feat: gate multi-distinct COUNT rewrite behind session config (default off)

yash · yash · commit 274eeb6a1255 · 2026-03-23T10:55:26.000+05:30
✅ Add datafusion.optimizer.enable_multi_distinct_count_rewrite (default false).

✅ MultiDistinctCountRewrite no-ops when disabled; OptimizerContext::with_enable_multi_distinct_count_rewrite for tests.

✅ SQL integration tests enable the flag via session helper; unit test skips_rewrite_when_config_disabled.

✅ Document option in user-guide configs.md.

❌ Does not change rewrite semantics when enabled.

Made-with: Cursor
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -969,6 +969,11 @@ config_namespace! {
         /// predicate push down.
         pub filter_null_join_keys: bool, default = false
 
+        /// When `true`, rewrite one grouped aggregate that has multiple `COUNT(DISTINCT …)` into
+        /// joins of per-distinct sub-aggregates (can lower peak memory; adds join work). Default
+        /// `false` until workload benchmarks justify enabling broadly.
+        pub enable_multi_distinct_count_rewrite: bool, default = false
+
         /// Should DataFusion repartition data using the aggregate keys to execute aggregates
         /// in parallel using the provided `target_partitions` level
         pub repartition_aggregations: bool, default = true
diff --git a/datafusion/core/tests/sql/aggregates/multi_distinct_count_rewrite.rs b/datafusion/core/tests/sql/aggregates/multi_distinct_count_rewrite.rs
@@ -20,11 +20,20 @@
 use super::*;
 use arrow::array::{Float64Array, Int32Array, StringArray};
 use datafusion::common::test_util::batches_to_sort_string;
+use datafusion::execution::config::SessionConfig;
+use datafusion::execution::context::SessionContext;
 use datafusion_catalog::MemTable;
 
+fn session_with_multi_distinct_count_rewrite() -> SessionContext {
+    SessionContext::new_with_config(SessionConfig::new().set_bool(
+        "datafusion.optimizer.enable_multi_distinct_count_rewrite",
+        true,
+    ))
+}
+
 #[tokio::test]
 async fn multi_count_distinct_matches_expected_with_nulls() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = session_with_multi_distinct_count_rewrite();
     let schema = Arc::new(Schema::new(vec![
         Field::new("g", DataType::Int32, false),
         Field::new("b", DataType::Utf8, true),
@@ -60,7 +69,7 @@ async fn multi_count_distinct_matches_expected_with_nulls() -> Result<()> {
 /// `COUNT(*)` + two `COUNT(DISTINCT …)` per group (BI-style); must match non-rewritten semantics.
 #[tokio::test]
 async fn multi_count_distinct_with_count_star_matches_expected() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = session_with_multi_distinct_count_rewrite();
     let schema = Arc::new(Schema::new(vec![
         Field::new("g", DataType::Int32, false),
         Field::new("b", DataType::Int32, false),
@@ -96,7 +105,7 @@ async fn multi_count_distinct_with_count_star_matches_expected() -> Result<()> {
 /// Multiple `GROUP BY` keys: join must align on all keys.
 #[tokio::test]
 async fn multi_count_distinct_two_group_keys_matches_expected() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = session_with_multi_distinct_count_rewrite();
     let schema = Arc::new(Schema::new(vec![
         Field::new("g1", DataType::Int32, false),
         Field::new("g2", DataType::Int32, false),
@@ -136,7 +145,7 @@ async fn multi_count_distinct_two_group_keys_matches_expected() -> Result<()> {
 /// Two `COUNT(DISTINCT …)` so the rewrite applies; semantics match plain aggregation.
 #[tokio::test]
 async fn multi_count_distinct_lower_matches_expected_case_collapsing() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = session_with_multi_distinct_count_rewrite();
     let schema = Arc::new(Schema::new(vec![
         Field::new("g", DataType::Int32, false),
         Field::new("b", DataType::Utf8, false),
@@ -173,7 +182,7 @@ async fn multi_count_distinct_lower_matches_expected_case_collapsing() -> Result
 /// Exercises the same “expression in distinct, not raw column” path as `CAST` in the rule.
 #[tokio::test]
 async fn multi_count_distinct_cast_float_to_int_collapses_nearby_values() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = session_with_multi_distinct_count_rewrite();
     let schema = Arc::new(Schema::new(vec![
         Field::new("g", DataType::Int32, false),
         Field::new("x", DataType::Float64, false),
diff --git a/datafusion/optimizer/src/multi_distinct_count_rewrite.rs b/datafusion/optimizer/src/multi_distinct_count_rewrite.rs
@@ -113,6 +113,14 @@ impl OptimizerRule for MultiDistinctCountRewrite {
         plan: LogicalPlan,
         config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
+        if !config
+            .options()
+            .optimizer
+            .enable_multi_distinct_count_rewrite
+        {
+            return Ok(Transformed::no(plan));
+        }
+
         let LogicalPlan::Aggregate(Aggregate {
             input,
             aggr_expr,
@@ -351,17 +359,27 @@ mod tests {
     use datafusion_expr::{Expr, col};
     use datafusion_functions_aggregate::expr_fn::{count, count_distinct};
 
-    fn optimize_with_rule(
+    fn optimize_with_rule_config(
         plan: LogicalPlan,
         rule: Arc<dyn OptimizerRule + Send + Sync>,
+        enable_multi_distinct_count_rewrite: bool,
     ) -> Result<LogicalPlan> {
         Optimizer::with_rules(vec![rule]).optimize(
             plan,
-            &OptimizerContext::new(),
+            &OptimizerContext::new().with_enable_multi_distinct_count_rewrite(
+                enable_multi_distinct_count_rewrite,
+            ),
             |_, _| {},
         )
     }
 
+    fn optimize_with_rule(
+        plan: LogicalPlan,
+        rule: Arc<dyn OptimizerRule + Send + Sync>,
+    ) -> Result<LogicalPlan> {
+        optimize_with_rule_config(plan, rule, true)
+    }
+
     #[test]
     fn rewrites_two_count_distinct() -> Result<()> {
         let table_scan = test_table_scan()?;
@@ -585,6 +603,25 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn skips_rewrite_when_config_disabled() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .aggregate(
+                vec![col("a")],
+                vec![count_distinct(col("b")), count_distinct(col("c"))],
+            )?
+            .build()?;
+        let before = plan.display_indent_schema().to_string();
+        let optimized = optimize_with_rule_config(
+            plan,
+            Arc::new(MultiDistinctCountRewrite::new()),
+            false,
+        )?;
+        assert_eq!(before, optimized.display_indent_schema().to_string());
+        Ok(())
+    }
+
     #[test]
     fn does_not_rewrite_mixed_agg() -> Result<()> {
         let table_scan = test_table_scan()?;
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
@@ -218,6 +218,14 @@ impl OptimizerContext {
         Arc::make_mut(&mut self.options).optimizer.max_passes = v as usize;
         self
     }
+
+    /// Enable [`crate::multi_distinct_count_rewrite::MultiDistinctCountRewrite`] (default off).
+    pub fn with_enable_multi_distinct_count_rewrite(mut self, enable: bool) -> Self {
+        Arc::make_mut(&mut self.options)
+            .optimizer
+            .enable_multi_distinct_count_rewrite = enable;
+        self
+    }
 }
 
 impl Default for OptimizerContext {
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
@@ -144,6 +144,7 @@ The following configuration settings are available:
 | datafusion.optimizer.enable_aggregate_dynamic_filter_pushdown           | true                      | When set to true, the optimizer will attempt to push down Aggregate dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.optimizer.enable_dynamic_filter_pushdown                     | true                      | When set to true attempts to push down dynamic filters generated by operators (TopK, Join & Aggregate) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown`, `enable_topk_dynamic_filter_pushdown` & `enable_aggregate_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.optimizer.filter_null_join_keys                              | false                     | When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| datafusion.optimizer.enable_multi_distinct_count_rewrite              | false                     | When set to true, the optimizer may rewrite a single aggregate with multiple `COUNT(DISTINCT …)` (with `GROUP BY`) into joins of per-distinct sub-aggregates. This can reduce peak memory but adds join work; default off until benchmarks support enabling broadly.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | datafusion.optimizer.repartition_aggregations                           | true                      | Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.optimizer.repartition_file_min_size                          | 10485760                  | Minimum total files size in bytes to perform file scan repartitioning.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.optimizer.repartition_joins                                  | true                      | Should DataFusion repartition data using the join keys to execute joins in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |

Original file line number	Diff line number	Diff line change
`@@ -218,6 +218,14 @@ impl OptimizerContext {`
`218`	`218`	`Arc::make_mut(&mut self.options).optimizer.max_passes = v as usize;`
`219`	`219`	`self`
`220`	`220`	`}`
	`221`	`+`
	`222`	+ /// Enable [`crate::multi_distinct_count_rewrite::MultiDistinctCountRewrite`] (default off).
	`223`	`+ pub fn with_enable_multi_distinct_count_rewrite(mut self, enable: bool) -> Self {`
	`224`	`+ Arc::make_mut(&mut self.options)`
	`225`	`+ .optimizer`
	`226`	`+ .enable_multi_distinct_count_rewrite = enable;`
	`227`	`+ self`
	`228`	`+ }`
`221`	`229`	`}`
`222`	`230`
`223`	`231`	`impl Default for OptimizerContext {`