|
18 | 18 | use std::sync::Arc; |
19 | 19 |
|
20 | 20 | use arrow::array::ArrayRef; |
21 | | -use arrow::datatypes::DataType; |
22 | | -use datafusion_common::Result; |
| 21 | +use arrow::datatypes::*; |
| 22 | +use datafusion_common::{Result, exec_datafusion_err}; |
23 | 23 |
|
24 | 24 | use super::array_static_filter::ArrayStaticFilter; |
25 | 25 | use super::primitive_filter::*; |
26 | 26 | use super::static_filter::StaticFilter; |
27 | | -use super::transform::make_bitmap_filter; |
| 27 | +use super::transform::{make_bitmap_filter, make_branchless_filter}; |
28 | 28 |
|
| 29 | +// ============================================================================= |
| 30 | +// LOOKUP STRATEGY THRESHOLDS (tuned via microbenchmarks) |
| 31 | +// ============================================================================= |
| 32 | + |
| 33 | +/// Maximum list size for branchless lookup on 4-byte primitives (Int32, UInt32, Float32). |
| 34 | +const BRANCHLESS_MAX_4B: usize = 32; |
| 35 | + |
| 36 | +/// Maximum list size for branchless lookup on 8-byte primitives (Int64, UInt64, Float64). |
| 37 | +const BRANCHLESS_MAX_8B: usize = 16; |
| 38 | + |
| 39 | +/// Maximum list size for branchless lookup on 16-byte types (Decimal128). |
| 40 | +const BRANCHLESS_MAX_16B: usize = 4; |
| 41 | + |
| 42 | +// ============================================================================= |
| 43 | +// FILTER STRATEGY SELECTION |
| 44 | +// ============================================================================= |
| 45 | + |
| 46 | +/// The lookup strategy to use for a given data type and list size. |
| 47 | +#[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 48 | +enum FilterStrategy { |
| 49 | + /// Bitmap filter for u8/u16 - O(1) bit test, always fastest for these types. |
| 50 | + Bitmap1B, |
| 51 | + Bitmap2B, |
| 52 | + /// Branchless OR-chain for small lists. |
| 53 | + Branchless, |
| 54 | + /// Generic ArrayStaticFilter fallback. |
| 55 | + Generic, |
| 56 | +} |
| 57 | + |
| 58 | +/// Determines the optimal lookup strategy based on data type and list size. |
| 59 | +fn select_strategy(dt: &DataType, len: usize) -> FilterStrategy { |
| 60 | + match dt.primitive_width() { |
| 61 | + Some(1) => FilterStrategy::Bitmap1B, |
| 62 | + Some(2) => FilterStrategy::Bitmap2B, |
| 63 | + Some(4) => { |
| 64 | + if len <= BRANCHLESS_MAX_4B { |
| 65 | + FilterStrategy::Branchless |
| 66 | + } else { |
| 67 | + FilterStrategy::Generic |
| 68 | + } |
| 69 | + } |
| 70 | + Some(8) => { |
| 71 | + if len <= BRANCHLESS_MAX_8B { |
| 72 | + FilterStrategy::Branchless |
| 73 | + } else { |
| 74 | + FilterStrategy::Generic |
| 75 | + } |
| 76 | + } |
| 77 | + Some(16) => { |
| 78 | + if len <= BRANCHLESS_MAX_16B { |
| 79 | + FilterStrategy::Branchless |
| 80 | + } else { |
| 81 | + FilterStrategy::Generic |
| 82 | + } |
| 83 | + } |
| 84 | + _ => FilterStrategy::Generic, |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +// ============================================================================= |
| 89 | +// FILTER INSTANTIATION |
| 90 | +// ============================================================================= |
| 91 | + |
| 92 | +/// Creates the optimal static filter for the given array. |
29 | 93 | pub(super) fn instantiate_static_filter( |
30 | 94 | in_array: ArrayRef, |
31 | 95 | ) -> Result<Arc<dyn StaticFilter + Send + Sync>> { |
32 | | - match in_array.data_type() { |
33 | | - DataType::Int8 | DataType::UInt8 => make_bitmap_filter::<U8Config>(&in_array), |
34 | | - DataType::Int16 | DataType::UInt16 => make_bitmap_filter::<U16Config>(&in_array), |
35 | | - DataType::Int32 => Ok(Arc::new(Int32StaticFilter::try_new(&in_array)?)), |
36 | | - DataType::Int64 => Ok(Arc::new(Int64StaticFilter::try_new(&in_array)?)), |
37 | | - DataType::UInt32 => Ok(Arc::new(UInt32StaticFilter::try_new(&in_array)?)), |
38 | | - DataType::UInt64 => Ok(Arc::new(UInt64StaticFilter::try_new(&in_array)?)), |
39 | | - // Float primitive types (use ordered wrappers for Hash/Eq) |
40 | | - DataType::Float32 => Ok(Arc::new(Float32StaticFilter::try_new(&in_array)?)), |
41 | | - DataType::Float64 => Ok(Arc::new(Float64StaticFilter::try_new(&in_array)?)), |
42 | | - _ => { |
43 | | - /* fall through to generic implementation for unsupported types (Struct, etc.) */ |
44 | | - Ok(Arc::new(ArrayStaticFilter::try_new(in_array)?)) |
45 | | - } |
| 96 | + use FilterStrategy::*; |
| 97 | + |
| 98 | + let len = in_array.len(); |
| 99 | + let dt = in_array.data_type(); |
| 100 | + let strategy = select_strategy(dt, len); |
| 101 | + |
| 102 | + match (dt, strategy) { |
| 103 | + // Bitmap filters for 1-byte and 2-byte types |
| 104 | + (_, Bitmap1B) => make_bitmap_filter::<U8Config>(&in_array), |
| 105 | + (_, Bitmap2B) => make_bitmap_filter::<U16Config>(&in_array), |
| 106 | + |
| 107 | + // Branchless filters for small lists of primitives |
| 108 | + (_, Branchless) => dispatch_branchless(&in_array).ok_or_else(|| { |
| 109 | + exec_datafusion_err!( |
| 110 | + "Branchless strategy selected but no filter for {:?}", |
| 111 | + dt |
| 112 | + ) |
| 113 | + })?, |
| 114 | + |
| 115 | + // Fallback for larger primitive lists or complex types. |
| 116 | + (_, Generic) => match dt { |
| 117 | + DataType::Int32 => Ok(Arc::new(Int32StaticFilter::try_new(&in_array)?)), |
| 118 | + DataType::Int64 => Ok(Arc::new(Int64StaticFilter::try_new(&in_array)?)), |
| 119 | + DataType::UInt32 => Ok(Arc::new(UInt32StaticFilter::try_new(&in_array)?)), |
| 120 | + DataType::UInt64 => Ok(Arc::new(UInt64StaticFilter::try_new(&in_array)?)), |
| 121 | + DataType::Float32 => Ok(Arc::new(Float32StaticFilter::try_new(&in_array)?)), |
| 122 | + DataType::Float64 => Ok(Arc::new(Float64StaticFilter::try_new(&in_array)?)), |
| 123 | + _ => Ok(Arc::new(ArrayStaticFilter::try_new(in_array)?)), |
| 124 | + }, |
| 125 | + } |
| 126 | +} |
| 127 | + |
| 128 | +// ============================================================================= |
| 129 | +// TYPE DISPATCH |
| 130 | +// ============================================================================= |
| 131 | + |
| 132 | +fn dispatch_branchless( |
| 133 | + arr: &ArrayRef, |
| 134 | +) -> Option<Result<Arc<dyn StaticFilter + Send + Sync>>> { |
| 135 | + // Dispatch to width-specific branchless filter. |
| 136 | + match arr.data_type().primitive_width() { |
| 137 | + Some(4) => Some(make_branchless_filter::<UInt32Type>(arr, 4)), |
| 138 | + Some(8) => Some(make_branchless_filter::<UInt64Type>(arr, 8)), |
| 139 | + Some(16) => Some(make_branchless_filter::<Decimal128Type>(arr, 16)), |
| 140 | + _ => None, |
46 | 141 | } |
47 | 142 | } |
0 commit comments