Skip to content

Commit 0b8616e

Browse files
Dandandanclaude
andcommitted
Add in-place bitwise AND/OR for boolean arrays
When evaluating boolean AND/OR expressions, try to reuse the left buffer in-place via Buffer::into_mutable. If the left buffer is shared, try the right buffer (AND/OR are commutative). Falls back to standard and_kleene/or_kleene when both buffers are shared or when nulls are present. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4c6f444 commit 0b8616e

1 file changed

Lines changed: 102 additions & 2 deletions

File tree

  • datafusion/physical-expr/src/expressions

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use std::hash::Hash;
2323
use std::{any::Any, sync::Arc};
2424

2525
use arrow::array::*;
26+
use arrow::buffer::BooleanBuffer;
2627
use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
2728
use arrow::compute::kernels::concat_elements::concat_elements_utf8;
2829
use arrow::compute::{SlicesIterator, cast, filter_record_batch};
@@ -165,6 +166,105 @@ fn boolean_op(
165166
op(ll, rr).map(|t| Arc::new(t) as _)
166167
}
167168

169+
/// Boolean AND/OR operation kind for in-place optimization.
170+
#[derive(Copy, Clone)]
171+
enum BoolOp {
172+
And,
173+
Or,
174+
}
175+
176+
/// Try in-place bitwise AND/OR on boolean arrays when neither side has nulls
177+
/// and both have zero offset. Tries left first, then right.
178+
/// Falls back to the standard kleene kernel otherwise.
179+
fn boolean_op_inplace(left: ArrayRef, right: ArrayRef, op: BoolOp) -> Result<ArrayRef> {
180+
// Only optimize the non-null, zero-offset case
181+
if left.null_count() != 0 || right.null_count() != 0 || left.len() != right.len() {
182+
let kleene_fn = match op {
183+
BoolOp::And => and_kleene,
184+
BoolOp::Or => or_kleene,
185+
};
186+
return Ok(boolean_op(&left, &right, kleene_fn)?);
187+
}
188+
189+
let left_bool = as_boolean_array(&left)
190+
.expect("boolean_op_inplace failed to downcast left array");
191+
let right_bool = as_boolean_array(&right)
192+
.expect("boolean_op_inplace failed to downcast right array");
193+
194+
if left_bool.offset() != 0 || right_bool.offset() != 0 {
195+
let kleene_fn = match op {
196+
BoolOp::And => and_kleene,
197+
BoolOp::Or => or_kleene,
198+
};
199+
return Ok(boolean_op(&left, &right, kleene_fn)?);
200+
}
201+
202+
let len = left_bool.len();
203+
let byte_len = len.div_ceil(8);
204+
205+
// Try left first
206+
let other_bytes = right_bool.values().inner().as_slice();
207+
let left_clone = left_bool.clone();
208+
drop(left);
209+
let (left_values, _nulls) = left_clone.into_parts();
210+
match left_values.into_inner().into_mutable() {
211+
Ok(mut mutable) => {
212+
apply_bool_assign(mutable.as_slice_mut(), other_bytes, byte_len, op);
213+
Ok(Arc::new(BooleanArray::new(
214+
BooleanBuffer::new(mutable.into(), 0, len),
215+
None,
216+
)))
217+
}
218+
Err(left_buf) => {
219+
// Left buffer shared — try right
220+
let left_bytes = left_buf.as_slice();
221+
let right_clone = right_bool.clone();
222+
drop(right);
223+
let (right_values, _nulls) = right_clone.into_parts();
224+
match right_values.into_inner().into_mutable() {
225+
Ok(mut mutable) => {
226+
// AND/OR are commutative, so we can swap operands
227+
apply_bool_assign(mutable.as_slice_mut(), left_bytes, byte_len, op);
228+
Ok(Arc::new(BooleanArray::new(
229+
BooleanBuffer::new(mutable.into(), 0, len),
230+
None,
231+
)))
232+
}
233+
Err(right_buf) => {
234+
// Both shared — fall back
235+
let left_arr =
236+
BooleanArray::new(BooleanBuffer::new(left_buf, 0, len), None);
237+
let right_arr =
238+
BooleanArray::new(BooleanBuffer::new(right_buf, 0, len), None);
239+
let kleene_fn = match op {
240+
BoolOp::And => and_kleene,
241+
BoolOp::Or => or_kleene,
242+
};
243+
Ok(boolean_op(&left_arr, &right_arr, kleene_fn)?)
244+
}
245+
}
246+
}
247+
}
248+
}
249+
250+
#[inline]
251+
fn apply_bool_assign(dst: &mut [u8], src: &[u8], byte_len: usize, op: BoolOp) {
252+
match op {
253+
BoolOp::And => {
254+
dst[..byte_len]
255+
.iter_mut()
256+
.zip(&src[..byte_len])
257+
.for_each(|(d, s)| *d &= s);
258+
}
259+
BoolOp::Or => {
260+
dst[..byte_len]
261+
.iter_mut()
262+
.zip(&src[..byte_len])
263+
.for_each(|(d, s)| *d |= s);
264+
}
265+
}
266+
}
267+
168268
/// Returns true if both operands are Date types (Date32 or Date64)
169269
/// Used to detect Date - Date operations which should return Int64 (days difference)
170270
fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
@@ -693,7 +793,7 @@ impl BinaryExpr {
693793
| NotLikeMatch | NotILikeMatch => unreachable!(),
694794
And => {
695795
if left_data_type == &DataType::Boolean {
696-
Ok(boolean_op(&left, &right, and_kleene)?)
796+
boolean_op_inplace(left, right, BoolOp::And)
697797
} else {
698798
internal_err!(
699799
"Cannot evaluate binary expression {:?} with types {:?} and {:?}",
@@ -705,7 +805,7 @@ impl BinaryExpr {
705805
}
706806
Or => {
707807
if left_data_type == &DataType::Boolean {
708-
Ok(boolean_op(&left, &right, or_kleene)?)
808+
boolean_op_inplace(left, right, BoolOp::Or)
709809
} else {
710810
internal_err!(
711811
"Cannot evaluate binary expression {:?} with types {:?} and {:?}",

0 commit comments

Comments
 (0)