Skip to content

Commit 4a7330f

Browse files
AdamGSalamb
andauthored
Add a builder to SimplifyContext to avoid allocating default values (#21092)
## Which issue does this PR close? - Closes #. ## Rationale for this change This is a follow up to #21084, where @blaginin realized that allocating `ConfigOptions` has this surprising side effect. Reading through how its used I realized that on mode "real" code paths (and in many tests), DataFusion ends up allocating the default values of `SimplifyContext` just to immediately drop them and override them with pre-existing clone-able data. ## What changes are included in this PR? Adds a new type `SimplifyContextBuilder` and `SimplifyContext::builder` ## Are these changes tested? Includes a couple of tests to make sure the builder makes sense, in addition to many existing tests. ## Are there any user-facing changes? As noted above, new type and a new function on an existing type. <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> --------- Signed-off-by: Adam Gutglick <adamgsal@gmail.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent ec7c9ab commit 4a7330f

15 files changed

Lines changed: 222 additions & 64 deletions

File tree

datafusion-examples/examples/query_planning/expr_api.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,10 @@ fn simplify_demo() -> Result<()> {
175175
// the ExecutionProps carries information needed to simplify
176176
// expressions, such as the current time (to evaluate `now()`
177177
// correctly)
178-
let context = SimplifyContext::default()
178+
let context = SimplifyContext::builder()
179179
.with_schema(schema)
180-
.with_current_time();
180+
.with_current_time()
181+
.build();
181182
let simplifier = ExprSimplifier::new(context);
182183

183184
// And then call the simplify_expr function:
@@ -192,9 +193,10 @@ fn simplify_demo() -> Result<()> {
192193

193194
// here are some other examples of what DataFusion is capable of
194195
let schema = Schema::new(vec![make_field("i", DataType::Int64)]).to_dfschema_ref()?;
195-
let context = SimplifyContext::default()
196+
let context = SimplifyContext::builder()
196197
.with_schema(Arc::clone(&schema))
197-
.with_current_time();
198+
.with_current_time()
199+
.build();
198200
let simplifier = ExprSimplifier::new(context);
199201

200202
// basic arithmetic simplification
@@ -554,9 +556,10 @@ fn type_coercion_demo() -> Result<()> {
554556
assert!(physical_expr.evaluate(&batch).is_ok());
555557

556558
// 2. Type coercion with `ExprSimplifier::coerce`.
557-
let context = SimplifyContext::default()
559+
let context = SimplifyContext::builder()
558560
.with_schema(Arc::new(df_schema.clone()))
559-
.with_current_time();
561+
.with_current_time()
562+
.build();
560563
let simplifier = ExprSimplifier::new(context);
561564
let coerced_expr = simplifier.coerce(expr.clone(), &df_schema)?;
562565
let physical_expr = datafusion::physical_expr::create_physical_expr(

datafusion/core/src/execution/context/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,12 +1488,13 @@ impl SessionContext {
14881488
})?;
14891489

14901490
let state = self.state.read();
1491-
let context = SimplifyContext::default()
1491+
let context = SimplifyContext::builder()
14921492
.with_schema(Arc::clone(prepared.plan.schema()))
14931493
.with_config_options(Arc::clone(state.config_options()))
14941494
.with_query_execution_start_time(
14951495
state.execution_props().query_execution_start_time,
1496-
);
1496+
)
1497+
.build();
14971498
let simplifier = ExprSimplifier::new(context);
14981499

14991500
// Only allow literals as parameters for now.

datafusion/core/src/execution/session_state.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -743,12 +743,13 @@ impl SessionState {
743743
df_schema: &DFSchema,
744744
) -> datafusion_common::Result<Arc<dyn PhysicalExpr>> {
745745
let config_options = self.config_options();
746-
let simplify_context = SimplifyContext::default()
746+
let simplify_context = SimplifyContext::builder()
747747
.with_schema(Arc::new(df_schema.clone()))
748748
.with_config_options(Arc::clone(config_options))
749749
.with_query_execution_start_time(
750750
self.execution_props().query_execution_start_time,
751-
);
751+
)
752+
.build();
752753
let simplifier = ExprSimplifier::new(simplify_context);
753754
// apply type coercion here to ensure types match
754755
let mut expr = simplifier.coerce(expr, df_schema)?;
@@ -1835,11 +1836,12 @@ impl ContextProvider for SessionContextProvider<'_> {
18351836
.get(name)
18361837
.cloned()
18371838
.ok_or_else(|| plan_datafusion_err!("table function '{name}' not found"))?;
1838-
let simplify_context = SimplifyContext::default()
1839+
let simplify_context = SimplifyContext::builder()
18391840
.with_config_options(Arc::clone(self.state.config_options()))
18401841
.with_query_execution_start_time(
18411842
self.state.execution_props().query_execution_start_time,
1842-
);
1843+
)
1844+
.build();
18431845
let simplifier = ExprSimplifier::new(simplify_context);
18441846
let schema = DFSchema::empty();
18451847
let args = args

datafusion/core/src/test_util/parquet.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,9 @@ impl TestParquetFile {
166166
let df_schema = Arc::clone(&self.schema).to_dfschema_ref()?;
167167

168168
// run coercion on the filters to coerce types etc.
169-
let context = SimplifyContext::default().with_schema(Arc::clone(&df_schema));
169+
let context = SimplifyContext::builder()
170+
.with_schema(Arc::clone(&df_schema))
171+
.build();
170172
if let Some(filter) = maybe_filter {
171173
let simplifier = ExprSimplifier::new(context);
172174
let filter = simplifier.coerce(filter, &df_schema).unwrap();

datafusion/core/tests/expr_api/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,9 @@ fn create_simplified_expr_test(expr: Expr, expected_expr: &str) {
421421
let df_schema = DFSchema::try_from(batch.schema()).unwrap();
422422

423423
// Simplify the expression first
424-
let simplify_context = SimplifyContext::default().with_schema(Arc::new(df_schema));
424+
let simplify_context = SimplifyContext::builder()
425+
.with_schema(Arc::new(df_schema))
426+
.build();
425427
let simplifier = ExprSimplifier::new(simplify_context).with_max_cycles(10);
426428
let simplified = simplifier.simplify(expr).unwrap();
427429
create_expr_test(simplified, expected_expr);

datafusion/core/tests/expr_api/simplification.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,10 @@ fn test_evaluate_with_start_time(
8888
expected_expr: Expr,
8989
date_time: &DateTime<Utc>,
9090
) {
91-
let context = SimplifyContext::default()
91+
let context = SimplifyContext::builder()
9292
.with_schema(schema())
93-
.with_query_execution_start_time(Some(*date_time));
93+
.with_query_execution_start_time(Some(*date_time))
94+
.build();
9495
let simplifier = ExprSimplifier::new(context);
9596
let simplified_expr = simplifier
9697
.simplify(input_expr.clone())
@@ -153,9 +154,10 @@ fn to_timestamp_expr(arg: impl Into<String>) -> Expr {
153154

154155
#[test]
155156
fn basic() {
156-
let context = SimplifyContext::default()
157+
let context = SimplifyContext::builder()
157158
.with_schema(schema())
158-
.with_query_execution_start_time(Some(Utc::now()));
159+
.with_query_execution_start_time(Some(Utc::now()))
160+
.build();
159161

160162
// The `Expr` is a core concept in DataFusion, and DataFusion can
161163
// help simplify it.
@@ -171,7 +173,7 @@ fn basic() {
171173

172174
#[test]
173175
fn fold_and_simplify() {
174-
let context = SimplifyContext::default().with_schema(schema());
176+
let context = SimplifyContext::builder().with_schema(schema()).build();
175177

176178
// What will it do with the expression `concat('foo', 'bar') == 'foobar')`?
177179
let expr = concat(vec![lit("foo"), lit("bar")]).eq(lit("foobar"));
@@ -565,7 +567,9 @@ fn expr_test_schema() -> DFSchemaRef {
565567
}
566568

567569
fn test_simplify(input_expr: Expr, expected_expr: Expr) {
568-
let context = SimplifyContext::default().with_schema(expr_test_schema());
570+
let context = SimplifyContext::builder()
571+
.with_schema(expr_test_schema())
572+
.build();
569573
let simplifier = ExprSimplifier::new(context);
570574
let simplified_expr = simplifier
571575
.simplify(input_expr.clone())
@@ -581,9 +585,10 @@ fn test_simplify_with_cycle_count(
581585
expected_expr: Expr,
582586
expected_count: u32,
583587
) {
584-
let context = SimplifyContext::default()
588+
let context = SimplifyContext::builder()
585589
.with_schema(expr_test_schema())
586-
.with_query_execution_start_time(Some(Utc::now()));
590+
.with_query_execution_start_time(Some(Utc::now()))
591+
.build();
587592
let simplifier = ExprSimplifier::new(context);
588593
let (simplified_expr, count) = simplifier
589594
.simplify_with_cycle_count_transformed(input_expr.clone())

datafusion/expr/src/simplify.rs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@ pub struct SimplifyContext {
4040
config_options: Arc<ConfigOptions>,
4141
}
4242

43+
/// Builder for [`SimplifyContext`].
44+
#[derive(Debug, Default)]
45+
pub struct SimplifyContextBuilder {
46+
schema: Option<DFSchemaRef>,
47+
query_execution_start_time: Option<DateTime<Utc>>,
48+
config_options: Option<Arc<ConfigOptions>>,
49+
}
50+
4351
impl Default for SimplifyContext {
4452
fn default() -> Self {
4553
Self {
@@ -51,18 +59,35 @@ impl Default for SimplifyContext {
5159
}
5260

5361
impl SimplifyContext {
62+
/// Returns a builder for [`SimplifyContext`].
63+
pub fn builder() -> SimplifyContextBuilder {
64+
SimplifyContextBuilder::default()
65+
}
66+
67+
#[deprecated(
68+
since = "54.0.0",
69+
note = "Use SimplifyContextBuilder if you intend to use non-default values."
70+
)]
5471
/// Set the [`ConfigOptions`] for this context
5572
pub fn with_config_options(mut self, config_options: Arc<ConfigOptions>) -> Self {
5673
self.config_options = config_options;
5774
self
5875
}
5976

77+
#[deprecated(
78+
since = "54.0.0",
79+
note = "Use SimplifyContextBuilder if you intend to use non-default values."
80+
)]
6081
/// Set the schema for this context
6182
pub fn with_schema(mut self, schema: DFSchemaRef) -> Self {
6283
self.schema = schema;
6384
self
6485
}
6586

87+
#[deprecated(
88+
since = "54.0.0",
89+
note = "Use SimplifyContextBuilder if you intend to use non-default values."
90+
)]
6691
/// Set the query execution start time
6792
pub fn with_query_execution_start_time(
6893
mut self,
@@ -72,6 +97,10 @@ impl SimplifyContext {
7297
self
7398
}
7499

100+
#[deprecated(
101+
since = "54.0.0",
102+
note = "Use SimplifyContextBuilder if you intend to use non-default values."
103+
)]
75104
/// Set the query execution start to the current time
76105
pub fn with_current_time(mut self) -> Self {
77106
self.query_execution_start_time = Some(Utc::now());
@@ -110,6 +139,46 @@ impl SimplifyContext {
110139
}
111140
}
112141

142+
impl SimplifyContextBuilder {
143+
/// Set the [`ConfigOptions`] for this context.
144+
pub fn with_config_options(mut self, config_options: Arc<ConfigOptions>) -> Self {
145+
self.config_options = Some(config_options);
146+
self
147+
}
148+
149+
/// Set the schema for this context.
150+
pub fn with_schema(mut self, schema: DFSchemaRef) -> Self {
151+
self.schema = Some(schema);
152+
self
153+
}
154+
155+
/// Set the query execution start time.
156+
pub fn with_query_execution_start_time(
157+
mut self,
158+
query_execution_start_time: Option<DateTime<Utc>>,
159+
) -> Self {
160+
self.query_execution_start_time = query_execution_start_time;
161+
self
162+
}
163+
164+
/// Set the query execution start to the current time.
165+
pub fn with_current_time(mut self) -> Self {
166+
self.query_execution_start_time = Some(Utc::now());
167+
self
168+
}
169+
170+
/// Build a [`SimplifyContext`], filling in any unspecified fields with defaults.
171+
pub fn build(self) -> SimplifyContext {
172+
SimplifyContext {
173+
schema: self.schema.unwrap_or_else(|| Arc::new(DFSchema::empty())),
174+
query_execution_start_time: self.query_execution_start_time,
175+
config_options: self
176+
.config_options
177+
.unwrap_or_else(|| Arc::new(ConfigOptions::default())),
178+
}
179+
}
180+
}
181+
113182
/// Was the expression simplified?
114183
#[derive(Debug)]
115184
pub enum ExprSimplifyResult {
@@ -119,3 +188,38 @@ pub enum ExprSimplifyResult {
119188
/// are return unmodified.
120189
Original(Vec<Expr>),
121190
}
191+
192+
#[cfg(test)]
193+
mod tests {
194+
use super::*;
195+
196+
#[test]
197+
fn simplify_context_builder_builds_default_context() {
198+
let context = SimplifyContext::builder().build();
199+
let default_options = ConfigOptions::default();
200+
201+
assert_eq!(context.schema().as_ref(), &DFSchema::empty());
202+
assert_eq!(context.query_execution_start_time(), None);
203+
assert_eq!(
204+
context.config_options().optimizer.max_passes,
205+
default_options.optimizer.max_passes
206+
);
207+
}
208+
209+
#[test]
210+
fn simplify_context_builder_uses_overrides() {
211+
let schema = Arc::new(DFSchema::empty());
212+
let config_options = Arc::new(ConfigOptions::default());
213+
let current_time = Utc::now();
214+
215+
let context = SimplifyContext::builder()
216+
.with_schema(Arc::clone(&schema))
217+
.with_config_options(Arc::clone(&config_options))
218+
.with_query_execution_start_time(Some(current_time))
219+
.build();
220+
221+
assert_eq!(context.schema().as_ref(), schema.as_ref());
222+
assert_eq!(context.query_execution_start_time(), Some(current_time));
223+
assert!(Arc::ptr_eq(context.config_options(), &config_options));
224+
}
225+
}

datafusion/functions-nested/src/array_has.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -999,6 +999,7 @@ mod tests {
999999
DataFusionError, ScalarValue, config::ConfigOptions,
10001000
utils::SingleRowListArrayBuilder,
10011001
};
1002+
use datafusion_expr::simplify::SimplifyContext;
10021003
use datafusion_expr::{
10031004
ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col, lit,
10041005
simplify::ExprSimplifyResult,
@@ -1017,7 +1018,7 @@ mod tests {
10171018
.build_list_scalar());
10181019
let needle = col("c");
10191020

1020-
let context = datafusion_expr::simplify::SimplifyContext::default();
1021+
let context = SimplifyContext::default();
10211022

10221023
let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) =
10231024
ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
@@ -1040,7 +1041,7 @@ mod tests {
10401041
let haystack = make_array(vec![lit(1), lit(2), lit(3)]);
10411042
let needle = col("c");
10421043

1043-
let context = datafusion_expr::simplify::SimplifyContext::default();
1044+
let context = SimplifyContext::default();
10441045

10451046
let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) =
10461047
ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)
@@ -1063,7 +1064,7 @@ mod tests {
10631064
let haystack = Expr::Literal(ScalarValue::Null, None);
10641065
let needle = col("c");
10651066

1066-
let context = datafusion_expr::simplify::SimplifyContext::default();
1067+
let context = SimplifyContext::default();
10671068
let Ok(ExprSimplifyResult::Simplified(simplified)) =
10681069
ArrayHas::new().simplify(vec![haystack, needle], &context)
10691070
else {
@@ -1080,7 +1081,7 @@ mod tests {
10801081
let haystack = Expr::Literal(ScalarValue::List(Arc::new(haystack)), None);
10811082
let needle = col("c");
10821083

1083-
let context = datafusion_expr::simplify::SimplifyContext::default();
1084+
let context = SimplifyContext::default();
10841085
let Ok(ExprSimplifyResult::Simplified(simplified)) =
10851086
ArrayHas::new().simplify(vec![haystack, needle], &context)
10861087
else {
@@ -1095,7 +1096,7 @@ mod tests {
10951096
let haystack = col("c1");
10961097
let needle = col("c2");
10971098

1098-
let context = datafusion_expr::simplify::SimplifyContext::default();
1099+
let context = SimplifyContext::default();
10991100

11001101
let Ok(ExprSimplifyResult::Original(args)) =
11011102
ArrayHas::new().simplify(vec![haystack, needle.clone()], &context)

datafusion/functions/src/datetime/current_time.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,11 @@ mod tests {
151151
Some(tz.to_string())
152152
};
153153
let schema = Arc::new(DFSchema::empty());
154-
SimplifyContext::default()
154+
SimplifyContext::builder()
155155
.with_schema(schema)
156156
.with_config_options(Arc::new(config))
157157
.with_query_execution_start_time(Some(start_time))
158+
.build()
158159
}
159160

160161
#[test]

0 commit comments

Comments
 (0)