Skip to content

Commit 18f47cf

Browse files
Implement Zero-Copy Reinterpretation and enable Int8/Int16 Bitmaps
Introduces zero-copy buffer reinterpretation to allow signed integers and other 1 or 2-byte primitive types (e.g. Float16) to use the high-performance bitmap filters. Triggers for all types with 1-byte or 2-byte width.
1 parent 46dc242 commit 18f47cf

5 files changed

Lines changed: 215 additions & 8 deletions

File tree

datafusion/physical-expr/src/expressions/in_list.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ mod primitive_filter;
4141
mod result;
4242
mod static_filter;
4343
mod strategy;
44+
mod transform;
4445

4546
use static_filter::StaticFilter;
4647
use strategy::instantiate_static_filter;

datafusion/physical-expr/src/expressions/in_list/primitive_filter.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,19 @@ impl<C: BitmapFilterConfig> BitmapFilter<C> {
136136
fn check(&self, needle: C::Native) -> bool {
137137
self.bits.get_bit(C::to_index(needle))
138138
}
139+
140+
/// Check membership using a raw values slice (zero-copy path for type reinterpretation).
141+
#[inline]
142+
pub(crate) fn contains_slice(
143+
&self,
144+
values: &[C::Native],
145+
nulls: Option<&NullBuffer>,
146+
negated: bool,
147+
) -> BooleanArray {
148+
build_in_list_result(values.len(), nulls, self.null_count > 0, negated, |i| {
149+
self.check(unsafe { *values.get_unchecked(i) })
150+
})
151+
}
139152
}
140153

141154
impl<C: BitmapFilterConfig> StaticFilter for BitmapFilter<C> {
@@ -340,9 +353,6 @@ macro_rules! primitive_static_filter {
340353
};
341354
}
342355

343-
// Generate specialized filters for all integer primitive types
344-
primitive_static_filter!(Int8StaticFilter, Int8Type);
345-
primitive_static_filter!(Int16StaticFilter, Int16Type);
346356
primitive_static_filter!(Int32StaticFilter, Int32Type);
347357
primitive_static_filter!(Int64StaticFilter, Int64Type);
348358
primitive_static_filter!(UInt32StaticFilter, UInt32Type);

datafusion/physical-expr/src/expressions/in_list/result.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
//! from IN list membership tests, handling null propagation correctly
2222
//! according to SQL three-valued logic.
2323
24+
#![expect(dead_code)]
25+
2426
use arrow::array::BooleanArray;
2527
use arrow::buffer::{BooleanBuffer, NullBuffer};
2628

@@ -48,6 +50,9 @@ use arrow::buffer::{BooleanBuffer, NullBuffer};
4850
/// This version computes contains for ALL positions (including nulls), then applies
4951
/// null masking via bitmap operations. This is optimal for cheap contains checks
5052
/// (like DirectProbeFilter) where the branch overhead exceeds the check cost.
53+
///
54+
/// For expensive contains checks (like ByteViewMaskedFilter with string comparison),
55+
/// use `build_in_list_result_with_null_shortcircuit` instead.
5156
#[inline]
5257
pub(crate) fn build_in_list_result<C>(
5358
len: usize,
@@ -66,6 +71,106 @@ where
6671
build_result_from_contains(needle_nulls, haystack_has_nulls, negated, contains_buf)
6772
}
6873

74+
/// Builds a BooleanArray result with null short-circuit (optimized for expensive contains).
75+
///
76+
/// Unlike `build_in_list_result`, this version checks nulls INSIDE the loop and
77+
/// skips the contains check for null positions. This is optimal for expensive
78+
/// contains checks (like ByteViewMaskedFilter with hash lookup + string comparison) where
79+
/// skipping lookups outweighs the branch overhead.
80+
///
81+
/// The shortcircuit is only applied when `needle_null_count > 0` - if there are
82+
/// no actual nulls, we avoid the branch overhead entirely.
83+
///
84+
/// Use this for: ByteViewMaskedFilter, Utf8TwoStageFilter (string/binary types)
85+
/// Use `build_in_list_result` for: DirectProbeFilter, BranchlessFilter (primitive types)
86+
#[inline]
87+
pub(crate) fn build_in_list_result_with_null_shortcircuit<C>(
88+
len: usize,
89+
needle_nulls: Option<&NullBuffer>,
90+
needle_null_count: usize,
91+
haystack_has_nulls: bool,
92+
negated: bool,
93+
mut contains: C,
94+
) -> BooleanArray
95+
where
96+
C: FnMut(usize) -> bool,
97+
{
98+
// When null_count=0, treat as no validity buffer to avoid extra work.
99+
// The validity buffer might exist but have all bits set to true.
100+
let effective_nulls = needle_nulls.filter(|_| needle_null_count > 0);
101+
102+
match effective_nulls {
103+
Some(nulls) => {
104+
// Has nulls: check validity inside loop to skip expensive contains()
105+
let contains_buf =
106+
BooleanBuffer::collect_bool(len, |i| nulls.is_valid(i) && contains(i));
107+
build_result_from_contains_premasked(
108+
Some(nulls),
109+
haystack_has_nulls,
110+
negated,
111+
contains_buf,
112+
)
113+
}
114+
None => {
115+
// No nulls: compute contains for all positions without branch overhead
116+
let contains_buf = BooleanBuffer::collect_bool(len, contains);
117+
// Use premasked path since contains_buf is "trivially premasked" (no nulls to mask)
118+
build_result_from_contains_premasked(
119+
None,
120+
haystack_has_nulls,
121+
negated,
122+
contains_buf,
123+
)
124+
}
125+
}
126+
}
127+
128+
/// Builds result from a contains buffer that was pre-masked at null positions.
129+
///
130+
/// This is used by `build_in_list_result_with_null_shortcircuit` where the
131+
/// contains buffer already has `false` at null positions due to the short-circuit.
132+
///
133+
/// Since contains_buf is pre-masked (false at null positions), we can simplify:
134+
/// - `valid & contains_buf` = `contains_buf` (already 0 where valid is 0)
135+
/// - XOR can replace AND+NOT for the negated case: `valid ^ contains = valid & !contains`
136+
#[inline]
137+
fn build_result_from_contains_premasked(
138+
needle_nulls: Option<&NullBuffer>,
139+
haystack_has_nulls: bool,
140+
negated: bool,
141+
contains_buf: BooleanBuffer,
142+
) -> BooleanArray {
143+
match (needle_nulls, haystack_has_nulls, negated) {
144+
// Haystack has nulls: result is null unless value is found
145+
(_, true, false) => {
146+
// contains_buf is already masked (false at null positions)
147+
BooleanArray::new(contains_buf.clone(), Some(NullBuffer::new(contains_buf)))
148+
}
149+
(Some(v), true, true) => {
150+
// NOT IN with nulls: true if valid and not found, null if found or needle null
151+
// XOR: valid ^ contains = 1 iff valid=1 and contains=0 (not found)
152+
BooleanArray::new(
153+
v.inner() ^ &contains_buf,
154+
Some(NullBuffer::new(contains_buf)),
155+
)
156+
}
157+
(None, true, true) => {
158+
BooleanArray::new(!&contains_buf, Some(NullBuffer::new(contains_buf)))
159+
}
160+
// Haystack has no nulls: result validity follows needle validity
161+
(Some(v), false, false) => {
162+
// contains_buf is already masked, just use needle validity for nulls
163+
BooleanArray::new(contains_buf, Some(v.clone()))
164+
}
165+
(Some(v), false, true) => {
166+
// Need AND because !contains_buf is 1 at null positions
167+
BooleanArray::new(v.inner() & &(!&contains_buf), Some(v.clone()))
168+
}
169+
(None, false, false) => BooleanArray::new(contains_buf, None),
170+
(None, false, true) => BooleanArray::new(!&contains_buf, None),
171+
}
172+
}
173+
69174
/// Builds a BooleanArray result from a pre-computed contains buffer.
70175
///
71176
/// This version does NOT assume contains_buf is pre-masked at null positions.

datafusion/physical-expr/src/expressions/in_list/strategy.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,16 @@ use datafusion_common::Result;
2424
use super::array_static_filter::ArrayStaticFilter;
2525
use super::primitive_filter::*;
2626
use super::static_filter::StaticFilter;
27+
use super::transform::make_bitmap_filter;
2728

2829
pub(super) fn instantiate_static_filter(
2930
in_array: ArrayRef,
3031
) -> Result<Arc<dyn StaticFilter + Send + Sync>> {
3132
match in_array.data_type() {
32-
// Integer primitive types
33-
DataType::Int8 => Ok(Arc::new(Int8StaticFilter::try_new(&in_array)?)),
34-
DataType::Int16 => Ok(Arc::new(Int16StaticFilter::try_new(&in_array)?)),
33+
DataType::Int8 | DataType::UInt8 => make_bitmap_filter::<U8Config>(&in_array),
34+
DataType::Int16 | DataType::UInt16 => make_bitmap_filter::<U16Config>(&in_array),
3535
DataType::Int32 => Ok(Arc::new(Int32StaticFilter::try_new(&in_array)?)),
3636
DataType::Int64 => Ok(Arc::new(Int64StaticFilter::try_new(&in_array)?)),
37-
DataType::UInt8 => Ok(Arc::new(BitmapFilter::<U8Config>::try_new(&in_array)?)),
38-
DataType::UInt16 => Ok(Arc::new(BitmapFilter::<U16Config>::try_new(&in_array)?)),
3937
DataType::UInt32 => Ok(Arc::new(UInt32StaticFilter::try_new(&in_array)?)),
4038
DataType::UInt64 => Ok(Arc::new(UInt64StaticFilter::try_new(&in_array)?)),
4139
// Float primitive types (use ordered wrappers for Hash/Eq)
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Type transformation utilities for InList filters
19+
//!
20+
//! This module provides type reinterpretation for optimizing filter dispatch.
21+
//! For equality comparison, only the bit pattern matters, so we can:
22+
//! - Reinterpret signed integers as unsigned (Int32 → UInt32)
23+
//! - Reinterpret floats as unsigned integers (Float64 → UInt64)
24+
//!
25+
//! This allows using a single filter implementation (e.g., for UInt64) to handle
26+
//! multiple types (Int64, Float64, Timestamp, Duration) that share the same
27+
//! byte width, reducing code duplication.
28+
29+
use std::sync::Arc;
30+
31+
use arrow::array::{Array, ArrayRef, BooleanArray, PrimitiveArray};
32+
use arrow::buffer::ScalarBuffer;
33+
use arrow::datatypes::ArrowPrimitiveType;
34+
use datafusion_common::Result;
35+
36+
use super::primitive_filter::{BitmapFilter, BitmapFilterConfig};
37+
use super::result::handle_dictionary;
38+
use super::static_filter::StaticFilter;
39+
40+
// =============================================================================
41+
// REINTERPRETING FILTERS (zero-copy type conversion)
42+
// =============================================================================
43+
44+
/// Reinterpreting filter for bitmap lookups (u8/u16).
45+
struct ReinterpretedBitmap<C: BitmapFilterConfig> {
46+
inner: BitmapFilter<C>,
47+
}
48+
49+
impl<C: BitmapFilterConfig> StaticFilter for ReinterpretedBitmap<C> {
50+
fn null_count(&self) -> usize {
51+
self.inner.null_count()
52+
}
53+
54+
fn contains(&self, v: &dyn Array, negated: bool) -> Result<BooleanArray> {
55+
handle_dictionary!(self, v, negated);
56+
57+
let data = v.to_data();
58+
let values: &[C::Native] = data.buffer::<C::Native>(0);
59+
60+
Ok(self.inner.contains_slice(values, data.nulls(), negated))
61+
}
62+
}
63+
64+
/// Reinterprets any primitive-like array as the target primitive type T by extracting
65+
/// the underlying buffer.
66+
///
67+
/// This is a zero-copy operation that works for all primitive types (Int*, UInt*, Float*,
68+
/// Timestamp*, Date*, Duration*, etc.) by directly accessing the underlying buffer,
69+
/// ignoring any metadata like timezones or precision/scale.
70+
#[inline]
71+
pub(crate) fn reinterpret_any_primitive_to<T: ArrowPrimitiveType>(
72+
array: &dyn Array,
73+
) -> ArrayRef {
74+
let values = array.to_data().buffers()[0].clone();
75+
let buffer: ScalarBuffer<T::Native> = values.into();
76+
Arc::new(PrimitiveArray::<T>::new(buffer, array.nulls().cloned()))
77+
}
78+
79+
/// Creates a bitmap filter for u8/u16 types, reinterpreting if needed.
80+
pub(crate) fn make_bitmap_filter<C>(
81+
in_array: &ArrayRef,
82+
) -> Result<Arc<dyn StaticFilter + Send + Sync>>
83+
where
84+
C: BitmapFilterConfig,
85+
{
86+
if in_array.data_type() == &C::ArrowType::DATA_TYPE {
87+
return Ok(Arc::new(BitmapFilter::<C>::try_new(in_array)?));
88+
}
89+
90+
let reinterpreted = reinterpret_any_primitive_to::<C::ArrowType>(in_array.as_ref());
91+
let inner = BitmapFilter::<C>::try_new(&reinterpreted)?;
92+
Ok(Arc::new(ReinterpretedBitmap { inner }))
93+
}

0 commit comments

Comments
 (0)