@@ -52,6 +52,27 @@ impl DefaultExpressionAnalyzer {
5252 . and_then ( |idx| input_stats. column_statistics . get ( idx) )
5353 }
5454
55+ /// Resolve NDV for a binary expression: try direct column stats first,
56+ /// then fall back to the registry for arbitrary expressions
57+ fn resolve_ndv (
58+ left : & Arc < dyn PhysicalExpr > ,
59+ right : & Arc < dyn PhysicalExpr > ,
60+ input_stats : & Statistics ,
61+ registry : & ExpressionAnalyzerRegistry ,
62+ ) -> Option < usize > {
63+ Self :: get_column_stats ( left, input_stats)
64+ . or_else ( || Self :: get_column_stats ( right, input_stats) )
65+ . and_then ( |s| s. distinct_count . get_value ( ) )
66+ . filter ( |& & ndv| ndv > 0 )
67+ . copied ( )
68+ . or_else ( || {
69+ let l = registry. get_distinct_count ( left, input_stats) ;
70+ let r = registry. get_distinct_count ( right, input_stats) ;
71+ l. max ( r)
72+ } )
73+ . filter ( |& n| n > 0 )
74+ }
75+
5576 /// Recursive selectivity estimation through the registry chain
5677 fn estimate_selectivity_recursive (
5778 & self ,
@@ -103,24 +124,26 @@ impl ExpressionAnalyzer for DefaultExpressionAnalyzer {
103124
104125 // Equality: selectivity = 1/NDV
105126 Operator :: Eq => {
106- let ndv = Self :: get_column_stats ( binary. left ( ) , input_stats)
107- . or_else ( || Self :: get_column_stats ( binary. right ( ) , input_stats) )
108- . and_then ( |s| s. distinct_count . get_value ( ) )
109- . filter ( |& & ndv| ndv > 0 ) ;
110- if let Some ( ndv) = ndv {
111- return AnalysisResult :: Computed ( 1.0 / ( * ndv as f64 ) ) ;
127+ if let Some ( ndv) = Self :: resolve_ndv (
128+ binary. left ( ) ,
129+ binary. right ( ) ,
130+ input_stats,
131+ registry,
132+ ) {
133+ return AnalysisResult :: Computed ( 1.0 / ( ndv as f64 ) ) ;
112134 }
113135 0.1 // Default equality selectivity
114136 }
115137
116138 // Inequality: selectivity = 1 - 1/NDV
117139 Operator :: NotEq => {
118- let ndv = Self :: get_column_stats ( binary. left ( ) , input_stats)
119- . or_else ( || Self :: get_column_stats ( binary. right ( ) , input_stats) )
120- . and_then ( |s| s. distinct_count . get_value ( ) )
121- . filter ( |& & ndv| ndv > 0 ) ;
122- if let Some ( ndv) = ndv {
123- return AnalysisResult :: Computed ( 1.0 - ( 1.0 / ( * ndv as f64 ) ) ) ;
140+ if let Some ( ndv) = Self :: resolve_ndv (
141+ binary. left ( ) ,
142+ binary. right ( ) ,
143+ input_stats,
144+ registry,
145+ ) {
146+ return AnalysisResult :: Computed ( 1.0 - ( 1.0 / ( ndv as f64 ) ) ) ;
124147 }
125148 0.9
126149 }
0 commit comments