@@ -23,34 +23,37 @@ use datafusion_common::{ColumnStatistics, ScalarValue, Statistics};
2323use datafusion_expr:: Operator ;
2424use std:: sync:: Arc ;
2525
26- fn make_stats_with_ndv ( num_rows : usize , ndv : usize ) -> Statistics {
26+ fn make_stats_with_ndvs ( num_rows : usize , ndvs : & [ usize ] ) -> Statistics {
2727 Statistics {
2828 num_rows : Precision :: Exact ( num_rows) ,
2929 total_byte_size : Precision :: Absent ,
30- column_statistics : vec ! [ ColumnStatistics {
31- null_count: Precision :: Exact ( 0 ) ,
32- max_value: Precision :: Absent ,
33- min_value: Precision :: Absent ,
34- sum_value: Precision :: Absent ,
35- distinct_count: Precision :: Exact ( ndv) ,
36- byte_size: Precision :: Absent ,
37- } ] ,
30+ column_statistics : ndvs
31+ . iter ( )
32+ . map ( |& ndv| ColumnStatistics {
33+ null_count : Precision :: Exact ( 0 ) ,
34+ max_value : Precision :: Absent ,
35+ min_value : Precision :: Absent ,
36+ sum_value : Precision :: Absent ,
37+ distinct_count : Precision :: Exact ( ndv) ,
38+ byte_size : Precision :: Absent ,
39+ } )
40+ . collect ( ) ,
3841 }
3942}
4043
4144// NDV tests
4245
4346#[ test]
4447fn test_column_ndv ( ) {
45- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
48+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
4649 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
4750 let registry = ExpressionAnalyzerRegistry :: new ( ) ;
4851 assert_eq ! ( registry. get_distinct_count( & col, & stats) , Some ( 100 ) ) ;
4952}
5053
5154#[ test]
5255fn test_literal_ndv ( ) {
53- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
56+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
5457 let lit =
5558 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
5659 let registry = ExpressionAnalyzerRegistry :: new ( ) ;
@@ -59,7 +62,7 @@ fn test_literal_ndv() {
5962
6063#[ test]
6164fn test_arithmetic_ndv ( ) {
62- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
65+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
6366 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
6467 let lit =
6568 Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -91,7 +94,7 @@ fn test_arithmetic_ndv() {
9194
9295#[ test]
9396fn test_equality_selectivity ( ) {
94- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
97+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
9598 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
9699 let lit =
97100 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -104,7 +107,7 @@ fn test_equality_selectivity() {
104107
105108#[ test]
106109fn test_equality_selectivity_column_on_right ( ) {
107- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
110+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
108111 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
109112 let lit =
110113 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -117,7 +120,7 @@ fn test_equality_selectivity_column_on_right() {
117120
118121#[ test]
119122fn test_and_selectivity ( ) {
120- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
123+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
121124 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
122125 let lit1 =
123126 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -137,7 +140,7 @@ fn test_and_selectivity() {
137140
138141#[ test]
139142fn test_or_selectivity ( ) {
140- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
143+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
141144 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
142145 let lit1 =
143146 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -157,7 +160,7 @@ fn test_or_selectivity() {
157160
158161#[ test]
159162fn test_not_selectivity ( ) {
160- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
163+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
161164 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
162165 let lit =
163166 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -172,7 +175,7 @@ fn test_not_selectivity() {
172175
173176#[ test]
174177fn test_equality_selectivity_expression_eq_literal ( ) {
175- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
178+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
176179 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
177180 let one =
178181 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -191,7 +194,7 @@ fn test_equality_selectivity_expression_eq_literal() {
191194
192195#[ test]
193196fn test_inequality_selectivity_expression_neq_literal ( ) {
194- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
197+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
195198 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
196199 let one =
197200 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -208,6 +211,55 @@ fn test_inequality_selectivity_expression_neq_literal() {
208211 assert ! ( ( sel - 0.99 ) . abs( ) < 0.001 ) ;
209212}
210213
214+ // Tests for resolve_ndv symmetry (column-vs-column and column-vs-expression)
215+
216+ #[ test]
217+ fn test_equality_selectivity_column_eq_column_symmetric ( ) {
218+ // a = b and b = a must give the same selectivity regardless of operand order
219+ let stats = make_stats_with_ndvs ( 1000 , & [ 50 , 200 ] ) ;
220+ let col_a = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
221+ let col_b = Arc :: new ( Column :: new ( "b" , 1 ) ) as Arc < dyn PhysicalExpr > ;
222+
223+ let registry = ExpressionAnalyzerRegistry :: new ( ) ;
224+
225+ let eq_ab = Arc :: new ( BinaryExpr :: new (
226+ Arc :: clone ( & col_a) ,
227+ Operator :: Eq ,
228+ Arc :: clone ( & col_b) ,
229+ ) ) as Arc < dyn PhysicalExpr > ;
230+ let eq_ba = Arc :: new ( BinaryExpr :: new (
231+ Arc :: clone ( & col_b) ,
232+ Operator :: Eq ,
233+ Arc :: clone ( & col_a) ,
234+ ) ) as Arc < dyn PhysicalExpr > ;
235+
236+ let sel_ab = registry. get_selectivity ( & eq_ab, & stats) . unwrap ( ) ;
237+ let sel_ba = registry. get_selectivity ( & eq_ba, & stats) . unwrap ( ) ;
238+
239+ assert_eq ! ( sel_ab, sel_ba) ;
240+ }
241+
242+ #[ test]
243+ fn test_equality_selectivity_column_eq_expression_uses_max_ndv ( ) {
244+ // col = (expr) should use max(ndv(col), ndv(expr)), not just ndv(col)
245+ // Here col has ndv=10 and expr=(b+1) has ndv=200 from column b
246+ let stats = make_stats_with_ndvs ( 1000 , & [ 10 , 200 ] ) ;
247+ let col_a = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
248+ let col_b = Arc :: new ( Column :: new ( "b" , 1 ) ) as Arc < dyn PhysicalExpr > ;
249+ let one =
250+ Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ) as Arc < dyn PhysicalExpr > ;
251+ let b_plus_1 =
252+ Arc :: new ( BinaryExpr :: new ( col_b, Operator :: Plus , one) ) as Arc < dyn PhysicalExpr > ;
253+
254+ let eq =
255+ Arc :: new ( BinaryExpr :: new ( col_a, Operator :: Eq , b_plus_1) ) as Arc < dyn PhysicalExpr > ;
256+
257+ let registry = ExpressionAnalyzerRegistry :: new ( ) ;
258+ let sel = registry. get_selectivity ( & eq, & stats) . unwrap ( ) ;
259+ // max(ndv(a)=10, ndv(b+1)=200) = 200, so selectivity = 1/200
260+ assert_eq ! ( sel, 1.0 / 200.0 ) ;
261+ }
262+
211263// Min/max tests
212264
213265#[ test]
@@ -235,7 +287,7 @@ fn test_column_min_max() {
235287
236288#[ test]
237289fn test_literal_min_max ( ) {
238- let stats = make_stats_with_ndv ( 100 , 10 ) ;
290+ let stats = make_stats_with_ndvs ( 100 , & [ 10 ] ) ;
239291 let lit =
240292 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
241293 let registry = ExpressionAnalyzerRegistry :: new ( ) ;
@@ -271,7 +323,7 @@ fn test_column_null_fraction() {
271323
272324#[ test]
273325fn test_literal_null_fraction ( ) {
274- let stats = make_stats_with_ndv ( 100 , 10 ) ;
326+ let stats = make_stats_with_ndvs ( 100 , & [ 10 ] ) ;
275327 let registry = ExpressionAnalyzerRegistry :: new ( ) ;
276328
277329 let lit =
@@ -301,7 +353,7 @@ impl ExpressionAnalyzer for FixedSelectivityAnalyzer {
301353
302354#[ test]
303355fn test_custom_analyzer_overrides_default ( ) {
304- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
356+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
305357 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
306358 let lit =
307359 Arc :: new ( Literal :: new ( ScalarValue :: Int32 ( Some ( 42 ) ) ) ) as Arc < dyn PhysicalExpr > ;
@@ -336,7 +388,7 @@ impl ExpressionAnalyzer for ColumnAOnlyAnalyzer {
336388
337389#[ test]
338390fn test_custom_analyzer_delegates_to_default ( ) {
339- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
391+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
340392 let col_a = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
341393 let col_b = Arc :: new ( Column :: new ( "b" , 0 ) ) as Arc < dyn PhysicalExpr > ;
342394 let lit =
@@ -359,7 +411,7 @@ fn test_custom_analyzer_delegates_to_default() {
359411
360412#[ test]
361413fn test_with_analyzers_and_default ( ) {
362- let stats = make_stats_with_ndv ( 1000 , 100 ) ;
414+ let stats = make_stats_with_ndvs ( 1000 , & [ 100 ] ) ;
363415 let col = Arc :: new ( Column :: new ( "a" , 0 ) ) as Arc < dyn PhysicalExpr > ;
364416
365417 let registry =
0 commit comments