Skip to content

Commit 293a880

Browse files
authored
Avoid creating new RecordBatches to simplify expressions (#20534)
## Which issue does this PR close? - part of #19795 - follow on to #20234 ## Rationale for this change While reviewing #20234 from @AdamGS I wondered why we were creating new `RecordBatch`es to simplify expressions. I looked into it a bit and I think we can avoid a bunch of small allocations / deallocations ## What changes are included in this PR? 1. Create the dummy batch once and reuse it ## Are these changes tested? Yes by CI. I will also run benchmarks on it ## Are there any user-facing changes? No this is entirely internal
1 parent 4a7330f commit 293a880

2 files changed

Lines changed: 20 additions & 8 deletions

File tree

datafusion/physical-expr/src/simplifier/const_evaluator.rs

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use arrow::array::new_null_array;
2323
use arrow::datatypes::{DataType, Field, Schema};
2424
use arrow::record_batch::RecordBatch;
2525
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
26-
use datafusion_common::{Result, ScalarValue};
26+
use datafusion_common::{Result, ScalarValue, internal_datafusion_err};
2727
use datafusion_expr_common::columnar_value::ColumnarValue;
2828

2929
use crate::PhysicalExpr;
@@ -53,7 +53,7 @@ pub fn simplify_const_expr(
5353
}
5454

5555
// Evaluate the expression
56-
match expr.evaluate(&batch) {
56+
match expr.evaluate(batch) {
5757
Ok(ColumnarValue::Scalar(scalar)) => {
5858
Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
5959
}
@@ -146,11 +146,23 @@ pub(crate) fn simplify_const_expr_immediate(
146146
/// that only contain literals, the batch content is irrelevant.
147147
///
148148
/// This is the same approach used in the logical expression `ConstEvaluator`.
149-
pub(crate) fn create_dummy_batch() -> Result<RecordBatch> {
150-
// RecordBatch requires at least one column
151-
let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)]));
152-
let col = new_null_array(&DataType::Null, 1);
153-
Ok(RecordBatch::try_new(dummy_schema, vec![col])?)
149+
pub(crate) fn create_dummy_batch() -> Result<&'static RecordBatch> {
150+
static DUMMY_BATCH: std::sync::OnceLock<Result<RecordBatch>> =
151+
std::sync::OnceLock::new();
152+
DUMMY_BATCH
153+
.get_or_init(|| {
154+
// RecordBatch requires at least one column
155+
let dummy_schema =
156+
Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)]));
157+
let col = new_null_array(&DataType::Null, 1);
158+
Ok(RecordBatch::try_new(dummy_schema, vec![col])?)
159+
})
160+
.as_ref()
161+
.map_err(|e| {
162+
internal_datafusion_err!(
163+
"Failed to create dummy batch for constant expression evaluation: {e}"
164+
)
165+
})
154166
}
155167

156168
fn can_evaluate_as_constant(expr: &Arc<dyn PhysicalExpr>) -> bool {

datafusion/physical-expr/src/simplifier/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ impl<'a> PhysicalExprSimplifier<'a> {
6969
let rewritten = not::simplify_not_expr(node, schema)?
7070
.transform_data(|node| unwrap_cast_in_comparison(node, schema))?
7171
.transform_data(|node| {
72-
const_evaluator::simplify_const_expr_immediate(node, &batch)
72+
const_evaluator::simplify_const_expr_immediate(node, batch)
7373
})?;
7474

7575
#[cfg(debug_assertions)]

0 commit comments

Comments
 (0)