diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs index 77e29cf35cd5b..99e10409f8c8d 100644 --- a/datafusion/datasource-parquet/src/metadata.rs +++ b/datafusion/datasource-parquet/src/metadata.rs @@ -495,12 +495,10 @@ fn summarize_column_statistics( // handle the common special case when all row groups have exact statistics let exactness = &is_max_value_exact_stat; - if !exactness.is_empty() - && exactness.null_count() == 0 - && exactness.true_count() == exactness.len() + if !exactness.is_empty() && exactness.null_count() == 0 && !exactness.has_false() { accumulators.is_max_value_exact[logical_schema_index] = Some(true); - } else if exactness.true_count() == 0 { + } else if !exactness.has_true() { accumulators.is_max_value_exact[logical_schema_index] = Some(false); } else { let val = max_acc.evaluate()?; @@ -514,12 +512,10 @@ fn summarize_column_statistics( // handle the common special case when all row groups have exact statistics let exactness = &is_min_value_exact_stat; - if !exactness.is_empty() - && exactness.null_count() == 0 - && exactness.true_count() == exactness.len() + if !exactness.is_empty() && exactness.null_count() == 0 && !exactness.has_false() { accumulators.is_min_value_exact[logical_schema_index] = Some(true); - } else if exactness.true_count() == 0 { + } else if !exactness.has_true() { accumulators.is_min_value_exact[logical_schema_index] = Some(false); } else { let val = min_acc.evaluate()?; @@ -650,7 +646,7 @@ fn has_any_exact_match( let scalar_array = value.to_scalar().ok()?; let eq_mask = eq(&scalar_array, &array).ok()?; let combined_mask = and(&eq_mask, exactness).ok()?; - Some(combined_mask.true_count() > 0) + Some(combined_mask.has_true()) } /// Wrapper to implement [`FileMetadata`] for [`ParquetMetaData`]. diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 5eebd7b7268cd..721921309f892 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -337,7 +337,7 @@ fn array_has_dispatch_for_array<'a>( let is_nested = arr.data_type().is_nested(); let needle_row = Scalar::new(needle.slice(i, 1)); let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?; - boolean_builder.append_value(eq_array.true_count() > 0); + boolean_builder.append_value(eq_array.has_true()); } Ok(Arc::new(boolean_builder.finish())) diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 3de66810fcaf0..7effd13b696ad 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -347,7 +347,7 @@ fn general_replace( let mut counter = 0; // All elements are false, no need to replace, just copy original data - if eq_array.false_count() == eq_array.len() { + if !eq_array.has_true() { mutable.extend( original_idx.to_usize().unwrap(), start.to_usize().unwrap(), diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index 35cfed228121e..58a5393e6619a 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -793,17 +793,14 @@ impl CaseBody { } }?; - // `true_count` ignores `true` values where the validity bit is not set, so there's - // no need to call `prep_null_mask_filter`. - let when_true_count = when_value.true_count(); - - // If the 'when' predicate did not match any rows, continue to the next branch immediately - if when_true_count == 0 { + // If the 'when' predicate did not match any rows, continue to the next branch immediately. + // `has_true` short-circuits on the first true value, avoiding a full popcount. + if !when_value.has_true() { continue; } // If the 'when' predicate matched all remaining rows, there is no need to filter - if when_true_count == remainder_batch.num_rows() { + if when_value.true_count() == remainder_batch.num_rows() { let then_expression = &self.when_then_expr[i].1; let then_value = then_expression.evaluate(&remainder_batch)?; result_builder.add_branch_result(&remainder_rows, then_value)?; @@ -882,17 +879,14 @@ impl CaseBody { internal_datafusion_err!("WHEN expression did not return a BooleanArray") })?; - // `true_count` ignores `true` values where the validity bit is not set, so there's - // no need to call `prep_null_mask_filter`. - let when_true_count = when_value.true_count(); - - // If the 'when' predicate did not match any rows, continue to the next branch immediately - if when_true_count == 0 { + // If the 'when' predicate did not match any rows, continue to the next branch immediately. + // `has_true` short-circuits on the first true value, avoiding a full popcount. + if !when_value.has_true() { continue; } // If the 'when' predicate matched all remaining rows, there is no need to filter - if when_true_count == remainder_batch.num_rows() { + if when_value.true_count() == remainder_batch.num_rows() { let then_expression = &self.when_then_expr[i].1; let then_value = then_expression.evaluate(&remainder_batch)?; result_builder.add_branch_result(&remainder_rows, then_value)?; diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 1582556b01e11..f8ef6b2b2bc5e 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -1672,7 +1672,7 @@ impl NestedLoopJoinStream { return Ok(None); } - if cur_right_bitmap.true_count() == 0 { + if !cur_right_bitmap.has_true() { // If none of the pairs has passed the join predicate/filter Ok(None) } else { @@ -2259,7 +2259,7 @@ fn build_unmatched_batch( not(&batch_bitmap)? }; - if bitmap.true_count() == 0 { + if !bitmap.has_true() { return Ok(None); } diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/filter.rs b/datafusion/physical-plan/src/joins/sort_merge_join/filter.rs index 3a5f0d0f0598f..4fc6cccaa8838 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/filter.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/filter.rs @@ -331,7 +331,7 @@ pub fn filter_record_batch_by_join_type( .unwrap(); // All rows passed the filter — no null-joining needed - if kept_corrected.true_count() == kept_corrected.len() { + if !kept_corrected.has_false() { return Ok(kept_batch); } diff --git a/datafusion/spark/src/function/array/array_contains.rs b/datafusion/spark/src/function/array/array_contains.rs index e0f99f7dd326e..9ae23468dad02 100644 --- a/datafusion/spark/src/function/array/array_contains.rs +++ b/datafusion/spark/src/function/array/array_contains.rs @@ -84,7 +84,7 @@ fn apply_spark_null_semantics( haystack_arg: &ColumnarValue, ) -> Result { // happy path - if result.false_count() == 0 || haystack_arg.data_type() == DataType::Null { + if !result.has_false() || haystack_arg.data_type() == DataType::Null { return Ok(result.clone()); }