@@ -119,43 +119,116 @@ fn test_equality_selectivity_column_on_right() {
119119}
120120
121121#[ test]
122- fn test_and_selectivity ( ) {
123- let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
122+ fn test_equality_selectivity_no_ndv_delegates ( ) {
123+ // When the column has no distinct_count, resolve_ndv must return None
124+ // so the predicate delegates rather than using the literal's NDV (1) as
125+ // the denominator, which would produce selectivity = 1.0.
126+ let stats = Statistics {
127+ num_rows : Precision :: Exact ( 1000 ) ,
128+ total_byte_size : Precision :: Absent ,
129+ column_statistics : vec ! [ ColumnStatistics :: default ( ) ] ,
130+ } ;
124131 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
132+ let lit =
133+ Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
134+ let eq = Arc :: new ( BinaryExpr :: new ( col, Operator :: Eq , lit) ) as Arc < dyn PhysicalExpr > ;
135+
136+ let registry = ExpressionAnalyzerRegistry :: new ( ) ;
137+ assert_eq ! ( registry. get_selectivity( & eq, & stats) , None ) ;
138+ }
139+
140+ #[ test]
141+ fn test_and_selectivity ( ) {
142+ // Both children have NDV, so AND can be computed.
143+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 , 50 ] ) ;
144+ let col_a = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
145+ let col_b = Arc :: new ( Column :: new ( "b" , 1 ) ) as Arc < dyn PhysicalExpr > ;
125146 let lit1 =
126147 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
127148 let lit2 =
128149 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 10 ) ) ) ) as Arc < dyn PhysicalExpr > ;
129150
130- let eq = Arc :: new ( BinaryExpr :: new ( Arc :: clone ( & col) , Operator :: Eq , lit1) )
131- as Arc < dyn PhysicalExpr > ;
132- let gt = Arc :: new ( BinaryExpr :: new ( col, Operator :: Gt , lit2) ) as Arc < dyn PhysicalExpr > ;
151+ // a = 42 AND b = 10: 1/100 * 1/50 = 0.0002
152+ let eq_a =
153+ Arc :: new ( BinaryExpr :: new ( col_a, Operator :: Eq , lit1) ) as Arc < dyn PhysicalExpr > ;
154+ let eq_b =
155+ Arc :: new ( BinaryExpr :: new ( col_b, Operator :: Eq , lit2) ) as Arc < dyn PhysicalExpr > ;
133156 let and_expr =
134- Arc :: new ( BinaryExpr :: new ( eq , Operator :: And , gt ) ) as Arc < dyn PhysicalExpr > ;
157+ Arc :: new ( BinaryExpr :: new ( eq_a , Operator :: And , eq_b ) ) as Arc < dyn PhysicalExpr > ;
135158
136159 let registry = ExpressionAnalyzerRegistry :: new ( ) ;
137160 let sel = registry. get_selectivity ( & and_expr, & stats) . unwrap ( ) ;
138- assert ! ( ( sel - 0.0033 ) . abs( ) < 0.001 ) ; // 0.01 * 0.33
161+ assert ! ( ( sel - 0.0002 ) . abs( ) < 1e-6 ) ; // 0.01 * 0.02
162+
163+ // When a child has no NDV (column stats absent), its selectivity is unknown,
164+ // so AND cannot produce an estimate and must delegate.
165+ let stats_no_ndv = Statistics {
166+ num_rows : Precision :: Exact ( 1000 ) ,
167+ total_byte_size : Precision :: Absent ,
168+ column_statistics : vec ! [ ColumnStatistics :: default ( ) ] ,
169+ } ;
170+ // c = 1: column c has no distinct_count, so resolve_ndv returns None -> Delegate
171+ let eq_no_ndv = Arc :: new ( BinaryExpr :: new (
172+ Arc :: new ( Column :: new ( "c" , 0 ) ) as Arc < dyn PhysicalExpr > ,
173+ Operator :: Eq ,
174+ Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ) as Arc < dyn PhysicalExpr > ,
175+ ) ) as Arc < dyn PhysicalExpr > ;
176+ // c > 5: no range selectivity without statistics -> Delegate
177+ let gt_no_ndv = Arc :: new ( BinaryExpr :: new (
178+ Arc :: new ( Column :: new ( "c" , 0 ) ) as Arc < dyn PhysicalExpr > ,
179+ Operator :: Gt ,
180+ Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 5 ) ) ) ) as Arc < dyn PhysicalExpr > ,
181+ ) ) as Arc < dyn PhysicalExpr > ;
182+ let and_no_info = Arc :: new ( BinaryExpr :: new ( eq_no_ndv, Operator :: And , gt_no_ndv) )
183+ as Arc < dyn PhysicalExpr > ;
184+ assert_eq ! ( registry. get_selectivity( & and_no_info, & stats_no_ndv) , None ) ;
139185}
140186
141187#[ test]
142188fn test_or_selectivity ( ) {
143- let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
144- let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
189+ // Both children have NDV, so OR can use inclusion-exclusion.
190+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 , 50 ] ) ;
191+ let col_a = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
192+ let col_b = Arc :: new ( Column :: new ( "b" , 1 ) ) as Arc < dyn PhysicalExpr > ;
145193 let lit1 =
146194 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
147195 let lit2 =
148196 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 10 ) ) ) ) as Arc < dyn PhysicalExpr > ;
149197
150- let eq = Arc :: new ( BinaryExpr :: new ( Arc :: clone ( & col) , Operator :: Eq , lit1) )
151- as Arc < dyn PhysicalExpr > ;
152- let gt = Arc :: new ( BinaryExpr :: new ( col, Operator :: Gt , lit2) ) as Arc < dyn PhysicalExpr > ;
198+ // a = 42 OR b = 10: 0.01 + 0.02 - 0.0002 = 0.0298
199+ let eq_a =
200+ Arc :: new ( BinaryExpr :: new ( col_a, Operator :: Eq , lit1) ) as Arc < dyn PhysicalExpr > ;
201+ let eq_b =
202+ Arc :: new ( BinaryExpr :: new ( col_b, Operator :: Eq , lit2) ) as Arc < dyn PhysicalExpr > ;
153203 let or_expr =
154- Arc :: new ( BinaryExpr :: new ( eq , Operator :: Or , gt ) ) as Arc < dyn PhysicalExpr > ;
204+ Arc :: new ( BinaryExpr :: new ( eq_a , Operator :: Or , eq_b ) ) as Arc < dyn PhysicalExpr > ;
155205
156206 let registry = ExpressionAnalyzerRegistry :: new ( ) ;
157207 let sel = registry. get_selectivity ( & or_expr, & stats) . unwrap ( ) ;
158- assert ! ( ( sel - 0.3367 ) . abs( ) < 0.001 ) ; // 0.01 + 0.33 - 0.01*0.33
208+ assert ! ( ( sel - 0.0298 ) . abs( ) < 1e-6 ) ; // 0.01 + 0.02 - 0.01*0.02
209+
210+ // When a child has no NDV (column stats absent), its selectivity is unknown,
211+ // so OR cannot produce an estimate and must delegate.
212+ let stats_no_ndv = Statistics {
213+ num_rows : Precision :: Exact ( 1000 ) ,
214+ total_byte_size : Precision :: Absent ,
215+ column_statistics : vec ! [ ColumnStatistics :: default ( ) ] ,
216+ } ;
217+ // c = 1: column c has no distinct_count, so resolve_ndv returns None -> Delegate
218+ let eq_no_ndv = Arc :: new ( BinaryExpr :: new (
219+ Arc :: new ( Column :: new ( "c" , 0 ) ) as Arc < dyn PhysicalExpr > ,
220+ Operator :: Eq ,
221+ Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ) as Arc < dyn PhysicalExpr > ,
222+ ) ) as Arc < dyn PhysicalExpr > ;
223+ // c > 5: no range selectivity without statistics -> Delegate
224+ let gt_no_ndv = Arc :: new ( BinaryExpr :: new (
225+ Arc :: new ( Column :: new ( "c" , 0 ) ) as Arc < dyn PhysicalExpr > ,
226+ Operator :: Gt ,
227+ Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 5 ) ) ) ) as Arc < dyn PhysicalExpr > ,
228+ ) ) as Arc < dyn PhysicalExpr > ;
229+ let or_no_info = Arc :: new ( BinaryExpr :: new ( eq_no_ndv, Operator :: Or , gt_no_ndv) )
230+ as Arc < dyn PhysicalExpr > ;
231+ assert_eq ! ( registry. get_selectivity( & or_no_info, & stats_no_ndv) , None ) ;
159232}
160233
161234#[ test]
0 commit comments