Skip to content

Commit acde8fe

Browse files
Dandandanclaude
andcommitted
Simplify apply_null_mask and restore equal_rows_arr empty behavior
- Use NullBuffer::union + build_unchecked in apply_null_mask - Remove redundant null_count guard at call site - Accept Option<&NullBuffer> instead of &UInt64Array - Restore equal_rows_arr to return empty arrays instead of error Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent aefebec commit acde8fe

1 file changed

Lines changed: 15 additions & 24 deletions

File tree

  • datafusion/physical-plan/src/joins

datafusion/physical-plan/src/joins/utils.rs

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -992,23 +992,20 @@ fn new_empty_schema_batch(schema: &Schema, row_count: usize) -> Result<RecordBat
992992
)?)
993993
}
994994

995-
/// Apply null mask from build_indices onto a result array.
995+
/// Apply null mask from index nulls onto a result array.
996996
/// Used for outer joins where unmatched rows are represented as null indices.
997-
fn apply_null_mask(result: ArrayRef, build_indices: &UInt64Array) -> Result<ArrayRef> {
998-
if let Some(idx_nulls) = build_indices.nulls() {
999-
let data = result.to_data();
1000-
let combined_nulls = if let Some(existing) = data.nulls() {
1001-
NullBuffer::new(existing.inner() & idx_nulls.inner())
1002-
} else {
1003-
idx_nulls.clone()
1004-
};
1005-
Ok(arrow::array::make_array(
1006-
data.into_builder()
1007-
.null_bit_buffer(Some(combined_nulls.into_inner().into_inner()))
1008-
.build()?,
1009-
))
1010-
} else {
1011-
Ok(result)
997+
fn apply_null_mask(result: ArrayRef, index_nulls: Option<&NullBuffer>) -> ArrayRef {
998+
let combined = NullBuffer::union(result.nulls(), index_nulls);
999+
// SAFETY: We only modify the null buffer, which is the union of the existing nulls
1000+
// and the index nulls. All other array data (buffers, offsets, child data) is unchanged.
1001+
unsafe {
1002+
arrow::array::make_array(
1003+
result
1004+
.into_data()
1005+
.into_builder()
1006+
.nulls(combined)
1007+
.build_unchecked(),
1008+
)
10121009
}
10131010
}
10141011

@@ -1105,11 +1102,7 @@ pub(crate) fn build_batch_from_indices(
11051102
let result = compute::interleave(&arrays, il_indices)?;
11061103
// Apply null mask from build_indices (for outer joins where
11071104
// unmatched rows are represented as null indices)
1108-
if build_indices.null_count() > 0 {
1109-
apply_null_mask(result, build_indices)?
1110-
} else {
1111-
result
1112-
}
1105+
apply_null_mask(result, build_indices.nulls())
11131106
}
11141107
BuildGather::AllNull => {
11151108
// All build indices are null (outer join with no matches)
@@ -1870,9 +1863,7 @@ pub(super) fn equal_rows_arr(
18701863
null_equality: NullEquality,
18711864
) -> Result<(UInt64Array, UInt32Array)> {
18721865
if left_arrays_per_batch.is_empty() || right_arrays.is_empty() {
1873-
return Err(DataFusionError::Internal(
1874-
"At least one array should be provided for both left and right".to_string(),
1875-
));
1866+
return Ok((Vec::<u64>::new().into(), Vec::<u32>::new().into()));
18761867
}
18771868

18781869
let num_keys = right_arrays.len();

0 commit comments

Comments
 (0)