Skip to content

Commit f70c9e7

Browse files
Dandandanclaude
andcommitted
Add in-place bitwise AND/OR for boolean arrays
When evaluating boolean AND/OR expressions without nulls, use BooleanBuffer's BitAndAssign/BitOrAssign which internally attempts Buffer::into_mutable() for in-place mutation. Falls back to and_kleene/or_kleene when nulls are present. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0b8616e commit f70c9e7

2 files changed

Lines changed: 21 additions & 81 deletions

File tree

datafusion/physical-expr-common/src/datum.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ where
365365
}
366366

367367
/// Try to apply arithmetic in-place on `right` array with a scalar `left`.
368-
/// The operation is `result[i] = op(scalar_left, right[i])`, stored in `right`'s buffer.
368+
/// The operation is `result\[i\] = op(scalar_left, right\[i\])`, stored in `right`'s buffer.
369369
/// Returns `Ok(result)` on success, or `Err(right)` if in-place not possible.
370370
fn try_apply_inplace_scalar_rhs(
371371
right: ArrayRef,
@@ -379,7 +379,7 @@ fn try_apply_inplace_scalar_rhs(
379379
}
380380

381381
/// Try to apply arithmetic in-place on `right` array using values from `left` array.
382-
/// The operation is `result[i] = op(left[i], right[i])`, stored in `right`'s buffer.
382+
/// The operation is `result\[i\] = op(left\[i\], right\[i\])`, stored in `right`'s buffer.
383383
/// Returns `Ok(result)` on success, or `Err(right)` if in-place not possible.
384384
fn try_apply_inplace_array_rhs(
385385
left: &ArrayRef,
@@ -392,7 +392,7 @@ fn try_apply_inplace_array_rhs(
392392
dispatch_inplace_binary!(right, left, op, try_inplace_binary_rhs)
393393
}
394394

395-
/// Attempt in-place mutation on the right PrimitiveArray: result[i] = op(scalar, right[i]).
395+
/// Attempt in-place mutation on the right PrimitiveArray: result\[i\] = op(scalar, right\[i\]).
396396
fn try_inplace_unary_rhs<T: ArrowPrimitiveType>(
397397
array: ArrayRef,
398398
scalar: T::Native,
@@ -431,7 +431,7 @@ where
431431
}
432432
}
433433

434-
/// Attempt in-place mutation on the right PrimitiveArray: result[i] = op(left[i], right[i]).
434+
/// Attempt in-place mutation on the right PrimitiveArray: result\[i\] = op(left\[i\], right\[i\]).
435435
/// Note: parameter order is (right_owned, left_ref) to match the dispatch_inplace_binary macro.
436436
fn try_inplace_binary_rhs<T: ArrowPrimitiveType>(
437437
right: ArrayRef,
@@ -473,7 +473,7 @@ where
473473
let right_slice = builder.values_slice_mut();
474474
let left_values = left_primitive.values();
475475

476-
// Note: op(left[i], right[i]) — left is the first operand
476+
// Note: op(left\[i\], right\[i\]) — left is the first operand
477477
right_slice
478478
.iter_mut()
479479
.zip(left_values.iter())

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 16 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ use std::hash::Hash;
2323
use std::{any::Any, sync::Arc};
2424

2525
use arrow::array::*;
26-
use arrow::buffer::BooleanBuffer;
2726
use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
2827
use arrow::compute::kernels::concat_elements::concat_elements_utf8;
2928
use arrow::compute::{SlicesIterator, cast, filter_record_batch};
@@ -173,11 +172,12 @@ enum BoolOp {
173172
Or,
174173
}
175174

176-
/// Try in-place bitwise AND/OR on boolean arrays when neither side has nulls
177-
/// and both have zero offset. Tries left first, then right.
178-
/// Falls back to the standard kleene kernel otherwise.
179-
fn boolean_op_inplace(left: ArrayRef, right: ArrayRef, op: BoolOp) -> Result<ArrayRef> {
180-
// Only optimize the non-null, zero-offset case
175+
/// Try in-place bitwise AND/OR on boolean arrays when neither side has nulls.
176+
/// Uses `BooleanBuffer`'s `BitAndAssign`/`BitOrAssign` which internally attempts
177+
/// `Buffer::into_mutable()` for in-place mutation, falling back to allocation if shared.
178+
/// Falls back to standard kleene kernel when nulls are present.
179+
fn boolean_op_inplace(left: ArrayRef, right: &ArrayRef, op: BoolOp) -> Result<ArrayRef> {
180+
// Only optimize the non-null case; kleene logic is needed when nulls are present
181181
if left.null_count() != 0 || right.null_count() != 0 || left.len() != right.len() {
182182
let kleene_fn = match op {
183183
BoolOp::And => and_kleene,
@@ -191,78 +191,18 @@ fn boolean_op_inplace(left: ArrayRef, right: ArrayRef, op: BoolOp) -> Result<Arr
191191
let right_bool = as_boolean_array(&right)
192192
.expect("boolean_op_inplace failed to downcast right array");
193193

194-
if left_bool.offset() != 0 || right_bool.offset() != 0 {
195-
let kleene_fn = match op {
196-
BoolOp::And => and_kleene,
197-
BoolOp::Or => or_kleene,
198-
};
199-
return Ok(boolean_op(&left, &right, kleene_fn)?);
200-
}
201-
202-
let len = left_bool.len();
203-
let byte_len = len.div_ceil(8);
204-
205-
// Try left first
206-
let other_bytes = right_bool.values().inner().as_slice();
207-
let left_clone = left_bool.clone();
194+
let right_values = right_bool.values();
195+
// Clone left BooleanBuffer (cheap Arc clone), then drop ArrayRef to
196+
// reduce refcount so BitAndAssign/BitOrAssign can mutate in-place.
197+
let mut left_values = left_bool.values().clone();
208198
drop(left);
209-
let (left_values, _nulls) = left_clone.into_parts();
210-
match left_values.into_inner().into_mutable() {
211-
Ok(mut mutable) => {
212-
apply_bool_assign(mutable.as_slice_mut(), other_bytes, byte_len, op);
213-
Ok(Arc::new(BooleanArray::new(
214-
BooleanBuffer::new(mutable.into(), 0, len),
215-
None,
216-
)))
217-
}
218-
Err(left_buf) => {
219-
// Left buffer shared — try right
220-
let left_bytes = left_buf.as_slice();
221-
let right_clone = right_bool.clone();
222-
drop(right);
223-
let (right_values, _nulls) = right_clone.into_parts();
224-
match right_values.into_inner().into_mutable() {
225-
Ok(mut mutable) => {
226-
// AND/OR are commutative, so we can swap operands
227-
apply_bool_assign(mutable.as_slice_mut(), left_bytes, byte_len, op);
228-
Ok(Arc::new(BooleanArray::new(
229-
BooleanBuffer::new(mutable.into(), 0, len),
230-
None,
231-
)))
232-
}
233-
Err(right_buf) => {
234-
// Both shared — fall back
235-
let left_arr =
236-
BooleanArray::new(BooleanBuffer::new(left_buf, 0, len), None);
237-
let right_arr =
238-
BooleanArray::new(BooleanBuffer::new(right_buf, 0, len), None);
239-
let kleene_fn = match op {
240-
BoolOp::And => and_kleene,
241-
BoolOp::Or => or_kleene,
242-
};
243-
Ok(boolean_op(&left_arr, &right_arr, kleene_fn)?)
244-
}
245-
}
246-
}
247-
}
248-
}
249199

250-
#[inline]
251-
fn apply_bool_assign(dst: &mut [u8], src: &[u8], byte_len: usize, op: BoolOp) {
252200
match op {
253-
BoolOp::And => {
254-
dst[..byte_len]
255-
.iter_mut()
256-
.zip(&src[..byte_len])
257-
.for_each(|(d, s)| *d &= s);
258-
}
259-
BoolOp::Or => {
260-
dst[..byte_len]
261-
.iter_mut()
262-
.zip(&src[..byte_len])
263-
.for_each(|(d, s)| *d |= s);
264-
}
201+
BoolOp::And => left_values &= right_values,
202+
BoolOp::Or => left_values |= right_values,
265203
}
204+
205+
Ok(Arc::new(BooleanArray::new(left_values, None)))
266206
}
267207

268208
/// Returns true if both operands are Date types (Date32 or Date64)
@@ -793,7 +733,7 @@ impl BinaryExpr {
793733
| NotLikeMatch | NotILikeMatch => unreachable!(),
794734
And => {
795735
if left_data_type == &DataType::Boolean {
796-
boolean_op_inplace(left, right, BoolOp::And)
736+
boolean_op_inplace(left, &right, BoolOp::And)
797737
} else {
798738
internal_err!(
799739
"Cannot evaluate binary expression {:?} with types {:?} and {:?}",
@@ -805,7 +745,7 @@ impl BinaryExpr {
805745
}
806746
Or => {
807747
if left_data_type == &DataType::Boolean {
808-
boolean_op_inplace(left, right, BoolOp::Or)
748+
boolean_op_inplace(left, &right, BoolOp::Or)
809749
} else {
810750
internal_err!(
811751
"Cannot evaluate binary expression {:?} with types {:?} and {:?}",

0 commit comments

Comments
 (0)