1515// specific language governing permissions and limitations
1616// under the License.
1717
18- //! Specialized primitive type filters for InList expressions
18+ //! Optimized primitive type filters for InList expressions
19+ //!
20+ //! This module provides high-performance membership testing for Arrow primitive types.
1921
2022use arrow:: array:: {
2123 Array , ArrayRef , AsArray , BooleanArray , downcast_array, downcast_dictionary_array,
@@ -26,8 +28,114 @@ use arrow::datatypes::*;
2628use datafusion_common:: { HashSet , Result , exec_datafusion_err} ;
2729use std:: hash:: { Hash , Hasher } ;
2830
31+ use super :: result:: { build_in_list_result, handle_dictionary} ;
2932use super :: static_filter:: StaticFilter ;
3033
34+ // =============================================================================
35+ // BITMAP FILTERS (O(1) lookup for u8/u16 via bit test)
36+ // =============================================================================
37+
38+ /// Trait for bitmap storage (stack-allocated for u8, heap-allocated for u16).
39+ pub ( crate ) trait BitmapStorage : Send + Sync {
40+ fn new_zeroed ( ) -> Self ;
41+ fn set_bit ( & mut self , index : usize ) ;
42+ fn get_bit ( & self , index : usize ) -> bool ;
43+ }
44+
45+ impl BitmapStorage for [ u64 ; 4 ] {
46+ #[ inline]
47+ fn new_zeroed ( ) -> Self {
48+ [ 0u64 ; 4 ]
49+ }
50+ #[ inline]
51+ fn set_bit ( & mut self , index : usize ) {
52+ self [ index / 64 ] |= 1u64 << ( index % 64 ) ;
53+ }
54+ #[ inline( always) ]
55+ fn get_bit ( & self , index : usize ) -> bool {
56+ ( self [ index / 64 ] >> ( index % 64 ) ) & 1 != 0
57+ }
58+ }
59+
60+ /// Configuration trait for bitmap filters.
61+ pub ( crate ) trait BitmapFilterConfig : Send + Sync + ' static {
62+ type Native : ArrowNativeType + Copy + Send + Sync ;
63+ type ArrowType : ArrowPrimitiveType < Native = Self :: Native > ;
64+ type Storage : BitmapStorage ;
65+
66+ fn to_index ( v : Self :: Native ) -> usize ;
67+ }
68+
69+ /// Config for u8 bitmap (256 bits = 32 bytes, fits in cache line).
70+ pub ( crate ) enum U8Config { }
71+ impl BitmapFilterConfig for U8Config {
72+ type Native = u8 ;
73+ type ArrowType = UInt8Type ;
74+ type Storage = [ u64 ; 4 ] ;
75+
76+ #[ inline( always) ]
77+ fn to_index ( v : u8 ) -> usize {
78+ v as usize
79+ }
80+ }
81+
82+ /// Bitmap filter for O(1) set membership via single bit test.
83+ ///
84+ /// For small integer types (u8/u16), bitmap lookup outperforms both branchless
85+ /// and hashed approaches at all list sizes.
86+ pub ( crate ) struct BitmapFilter < C : BitmapFilterConfig > {
87+ null_count : usize ,
88+ bits : C :: Storage ,
89+ }
90+
91+ impl < C : BitmapFilterConfig > BitmapFilter < C > {
92+ pub ( crate ) fn try_new ( in_array : & ArrayRef ) -> Result < Self > {
93+ let prim_array =
94+ in_array. as_primitive_opt :: < C :: ArrowType > ( ) . ok_or_else ( || {
95+ exec_datafusion_err ! ( "BitmapFilter: expected primitive array" )
96+ } ) ?;
97+ let mut bits = C :: Storage :: new_zeroed ( ) ;
98+ for v in prim_array. iter ( ) . flatten ( ) {
99+ bits. set_bit ( C :: to_index ( v) ) ;
100+ }
101+ Ok ( Self {
102+ null_count : prim_array. null_count ( ) ,
103+ bits,
104+ } )
105+ }
106+
107+ #[ inline( always) ]
108+ fn check ( & self , needle : C :: Native ) -> bool {
109+ self . bits . get_bit ( C :: to_index ( needle) )
110+ }
111+ }
112+
113+ impl < C : BitmapFilterConfig > StaticFilter for BitmapFilter < C > {
114+ fn null_count ( & self ) -> usize {
115+ self . null_count
116+ }
117+
118+ fn contains ( & self , v : & dyn Array , negated : bool ) -> Result < BooleanArray > {
119+ handle_dictionary ! ( self , v, negated) ;
120+ let v = v. as_primitive_opt :: < C :: ArrowType > ( ) . ok_or_else ( || {
121+ exec_datafusion_err ! ( "BitmapFilter: expected primitive array" )
122+ } ) ?;
123+ let input_values = v. values ( ) ;
124+ Ok ( build_in_list_result (
125+ v. len ( ) ,
126+ v. nulls ( ) ,
127+ self . null_count > 0 ,
128+ negated,
129+ #[ inline( always) ]
130+ |i| self . check ( unsafe { * input_values. get_unchecked ( i) } ) ,
131+ ) )
132+ }
133+ }
134+
135+ // =============================================================================
136+ // LEGACY FILTERS (to be replaced by optimized ones in subsequent commits)
137+ // =============================================================================
138+
31139/// Wrapper for f32 that implements Hash and Eq using bit comparison.
32140#[ derive( Clone , Copy ) ]
33141pub ( crate ) struct OrderedFloat32 ( pub ( crate ) f32 ) ;
@@ -177,7 +285,6 @@ primitive_static_filter!(Int8StaticFilter, Int8Type);
177285primitive_static_filter ! ( Int16StaticFilter , Int16Type ) ;
178286primitive_static_filter ! ( Int32StaticFilter , Int32Type ) ;
179287primitive_static_filter ! ( Int64StaticFilter , Int64Type ) ;
180- primitive_static_filter ! ( UInt8StaticFilter , UInt8Type ) ;
181288primitive_static_filter ! ( UInt16StaticFilter , UInt16Type ) ;
182289primitive_static_filter ! ( UInt32StaticFilter , UInt32Type ) ;
183290primitive_static_filter ! ( UInt64StaticFilter , UInt64Type ) ;
0 commit comments