Skip to content

Commit d28a03c

Browse files
authored
Adjust case_when DivideByZeroProtection benchmark so that "percentage of zeroes" corresponds to "number of times protection is needed" (#20105)
## Which issue does this PR close? - Related to #11570. ## Rationale for this change The `case_when` microbenchmark that covers the pattern `CASE WHEN d != 0 THEN n / d ELSE NULL END` pattern is parameterised over the percentage of zeroes in the `d` column. The benchmark uses the condition `d > 0` rather than `d != 0` though which is a bit misleading. In the '0% zeroes' run one would expect the else branch to never be taken, but because slightly less than 50% of the `d` values is negative, it's still taken 50% of the time. This PR adjust the benchmark to use `d != 0` instead. ## What changes are included in this PR? - Adjust the divide by zero benchmark to use `d != 0` as condition - Remove the duplicate benchmark, the div-by-zero variant is sufficient to compare changes across branches - Add a couple of SLTs to cover the `CASE` pattern ## Are these changes tested? Manual testing ## Are there any user-facing changes? No
1 parent 7388eed commit d28a03c

2 files changed

Lines changed: 22 additions & 30 deletions

File tree

datafusion/physical-expr/benches/case_when.rs

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,6 @@ fn benchmark_divide_by_zero_protection(c: &mut Criterion, batch_size: usize) {
564564

565565
let numerator_col = col("numerator", &batch.schema()).unwrap();
566566
let divisor_col = col("divisor", &batch.schema()).unwrap();
567-
let divisor_copy_col = col("divisor_copy", &batch.schema()).unwrap();
568567

569568
// DivideByZeroProtection: WHEN condition checks `divisor_col > 0` and division
570569
// uses `divisor_col` as divisor. Since the checked column matches the divisor,
@@ -578,35 +577,7 @@ fn benchmark_divide_by_zero_protection(c: &mut Criterion, batch_size: usize) {
578577
|b| {
579578
let when = Arc::new(BinaryExpr::new(
580579
Arc::clone(&divisor_col),
581-
Operator::Gt,
582-
lit(0i32),
583-
));
584-
let then = Arc::new(BinaryExpr::new(
585-
Arc::clone(&numerator_col),
586-
Operator::Divide,
587-
Arc::clone(&divisor_col),
588-
));
589-
let else_null: Arc<dyn PhysicalExpr> = lit(ScalarValue::Int32(None));
590-
let expr =
591-
Arc::new(case(None, vec![(when, then)], Some(else_null)).unwrap());
592-
593-
b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
594-
},
595-
);
596-
597-
// ExpressionOrExpression: WHEN condition checks `divisor_copy_col > 0` but
598-
// division uses `divisor_col` as divisor. Since the checked column does NOT
599-
// match the divisor, this falls back to ExpressionOrExpression evaluation.
600-
group.bench_function(
601-
format!(
602-
"{} rows, {}% zeros: ExpressionOrExpression",
603-
batch_size,
604-
(zero_percentage * 100.0) as i32
605-
),
606-
|b| {
607-
let when = Arc::new(BinaryExpr::new(
608-
Arc::clone(&divisor_copy_col),
609-
Operator::Gt,
580+
Operator::NotEq,
610581
lit(0i32),
611582
));
612583
let then = Arc::new(BinaryExpr::new(

datafusion/sqllogictest/test_files/case.slt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,27 @@ a
621621
b
622622
c
623623

624+
query I
625+
SELECT CASE WHEN d != 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), (1, -1)) t(n,d)
626+
----
627+
1
628+
NULL
629+
-1
630+
631+
query I
632+
SELECT CASE WHEN d > 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), (1, -1)) t(n,d)
633+
----
634+
1
635+
NULL
636+
NULL
637+
638+
query I
639+
SELECT CASE WHEN d < 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), (1, -1)) t(n,d)
640+
----
641+
NULL
642+
NULL
643+
-1
644+
624645
# EvalMethod::WithExpression using subset of all selected columns in case expression
625646
query III
626647
SELECT CASE a1 WHEN 1 THEN a1 WHEN 2 THEN a2 WHEN 3 THEN b END, b, c

0 commit comments

Comments
 (0)