@@ -2626,6 +2626,67 @@ mod tests {
26262626 /// an explicit projection must not panic when `try_swapping_with_projection`
26272627 /// attempts to swap the two nodes.
26282628 ///
2629+ /// Verifies that `ExpressionAnalyzerRegistry` computes selectivity for OR predicates
2630+ /// using inclusion-exclusion, which interval arithmetic cannot represent (a union of
2631+ /// two disjoint intervals is not a single interval).
2632+ ///
2633+ /// For `(a = 42 OR b = 5)` with NDV_a=10, NDV_b=5 on 1000 rows:
2634+ /// - Without ExpressionAnalyzer: default 20% selectivity -> 200 rows
2635+ /// - With ExpressionAnalyzer: P(a=42) + P(b=5) - P(a=42)*P(b=5) = 0.1 + 0.2 - 0.02 = 0.28 -> 280 rows
2636+ #[ tokio:: test]
2637+ async fn test_filter_statistics_expression_analyzer_selectivity_or_predicate ( ) -> Result < ( ) > {
2638+ let schema = Schema :: new ( vec ! [
2639+ Field :: new( "a" , DataType :: Int64 , false ) ,
2640+ Field :: new( "b" , DataType :: Int64 , false ) ,
2641+ ] ) ;
2642+ let input = Arc :: new ( StatisticsExec :: new (
2643+ Statistics {
2644+ num_rows : Precision :: Inexact ( 1000 ) ,
2645+ total_byte_size : Precision :: Absent ,
2646+ column_statistics : vec ! [
2647+ ColumnStatistics {
2648+ distinct_count: Precision :: Inexact ( 10 ) ,
2649+ ..Default :: default ( )
2650+ } ,
2651+ ColumnStatistics {
2652+ distinct_count: Precision :: Inexact ( 5 ) ,
2653+ ..Default :: default ( )
2654+ } ,
2655+ ] ,
2656+ } ,
2657+ schema. clone ( ) ,
2658+ ) ) ;
2659+ // (a = 42 OR b = 5): OR is not expressible as a single interval
2660+ let predicate = Arc :: new ( BinaryExpr :: new (
2661+ Arc :: new ( BinaryExpr :: new (
2662+ Arc :: new ( Column :: new ( "a" , 0 ) ) ,
2663+ Operator :: Eq ,
2664+ Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 42 ) ) ) ) ,
2665+ ) ) ,
2666+ Operator :: Or ,
2667+ Arc :: new ( BinaryExpr :: new (
2668+ Arc :: new ( Column :: new ( "b" , 1 ) ) ,
2669+ Operator :: Eq ,
2670+ Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 5 ) ) ) ) ,
2671+ ) ) ,
2672+ ) ) ;
2673+
2674+ // Without ExpressionAnalyzer: default 20% selectivity -> 200 rows
2675+ let filter = Arc :: new ( FilterExec :: try_new ( predicate. clone ( ) , input as _ ) ?) ;
2676+ let stats = filter. partition_statistics ( None ) ?;
2677+ assert_eq ! ( stats. num_rows, Precision :: Inexact ( 200 ) ) ;
2678+
2679+ // With ExpressionAnalyzer: inclusion-exclusion -> 0.1 + 0.2 - 0.02 = 0.28 -> 280 rows
2680+ let registry = Arc :: new ( ExpressionAnalyzerRegistry :: new ( ) ) ;
2681+ let filter_with_registry = filter
2682+ . with_expression_analyzer_registry ( & registry)
2683+ . expect ( "registry should be injectable when not already set" ) ;
2684+ let stats_with_registry = filter_with_registry. partition_statistics ( None ) ?;
2685+ assert_eq ! ( stats_with_registry. num_rows, Precision :: Inexact ( 280 ) ) ;
2686+
2687+ Ok ( ( ) )
2688+ }
2689+
26292690 /// Before the fix, `FilterExecBuilder::from(self)` copied the old projection
26302691 /// (e.g. `[0, 1, 2]`) from the FilterExec. After `.with_input` replaced the
26312692 /// input with the narrower ProjectionExec (2 columns), `.build()` tried to
0 commit comments