Skip to content

Commit 03d17e8

Browse files
alamb2010YOUY01
andauthored
Improve documentation for AggregateUdfImpl::simplify and WindowUDFImpl::simplify (#20712)
## Rationale for this change While working on #20665 I was somewhat confused about how the AggregateUDF simplify worked. So I added some more clarifying comments ## What changes are included in this PR? 1. Improve documentation strings ## Are these changes tested? Yes by CI ## Are there any user-facing changes? Better API docs --------- Co-authored-by: Yongting You <2010youy01@gmail.com>
1 parent 631c918 commit 03d17e8

5 files changed

Lines changed: 58 additions & 51 deletions

File tree

datafusion/expr/src/expr.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,7 @@ impl WindowFunctionDefinition {
10101010
}
10111011
}
10121012

1013-
/// Return the inner window simplification function, if any
1013+
/// Returns this window function's simplification hook, if any.
10141014
///
10151015
/// See [`WindowFunctionSimplification`] for more information
10161016
pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -1097,7 +1097,7 @@ impl WindowFunction {
10971097
}
10981098
}
10991099

1100-
/// Return the inner window simplification function, if any
1100+
/// Returns this window function's simplification hook, if any.
11011101
///
11021102
/// See [`WindowFunctionSimplification`] for more information
11031103
pub fn simplify(&self) -> Option<WindowFunctionSimplification> {

datafusion/expr/src/function.rs

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ pub use datafusion_functions_aggregate_common::accumulator::{
2727
AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs,
2828
};
2929

30+
use crate::expr::{AggregateFunction, WindowFunction};
31+
use crate::simplify::SimplifyContext;
3032
pub use datafusion_functions_window_common::expr::ExpressionArgs;
3133
pub use datafusion_functions_window_common::field::WindowUDFFieldArgs;
3234
pub use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
@@ -64,28 +66,22 @@ pub type PartitionEvaluatorFactory =
6466
pub type StateTypeFunction =
6567
Arc<dyn Fn(&DataType) -> Result<Arc<Vec<DataType>>> + Send + Sync>;
6668

67-
/// [crate::udaf::AggregateUDFImpl::simplify] simplifier closure
68-
/// A closure with two arguments:
69-
/// * 'aggregate_function': [crate::expr::AggregateFunction] for which simplified has been invoked
70-
/// * 'info': [crate::simplify::SimplifyContext]
69+
/// Type alias for [crate::udaf::AggregateUDFImpl::simplify].
7170
///
72-
/// Closure returns simplified [Expr] or an error.
73-
pub type AggregateFunctionSimplification = Box<
74-
dyn Fn(
75-
crate::expr::AggregateFunction,
76-
&crate::simplify::SimplifyContext,
77-
) -> Result<Expr>,
78-
>;
71+
/// This closure is invoked with:
72+
/// * `aggregate_function`: [AggregateFunction] with already simplified arguments
73+
/// * `info`: [SimplifyContext]
74+
///
75+
/// It returns a simplified [Expr] or an error.
76+
pub type AggregateFunctionSimplification =
77+
Box<dyn Fn(AggregateFunction, &SimplifyContext) -> Result<Expr>>;
7978

80-
/// [crate::udwf::WindowUDFImpl::simplify] simplifier closure
81-
/// A closure with two arguments:
82-
/// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked
83-
/// * 'info': [crate::simplify::SimplifyContext]
79+
/// Type alias for [crate::udwf::WindowUDFImpl::simplify].
80+
///
81+
/// This closure is invoked with:
82+
/// * `window_function`: [WindowFunction] with already simplified arguments
83+
/// * `info`: [SimplifyContext]
8484
///
85-
/// Closure returns simplified [Expr] or an error.
86-
pub type WindowFunctionSimplification = Box<
87-
dyn Fn(
88-
crate::expr::WindowFunction,
89-
&crate::simplify::SimplifyContext,
90-
) -> Result<Expr>,
91-
>;
85+
/// It returns a simplified [Expr] or an error.
86+
pub type WindowFunctionSimplification =
87+
Box<dyn Fn(WindowFunction, &SimplifyContext) -> Result<Expr>>;

datafusion/expr/src/udaf.rs

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ impl AggregateUDF {
294294
self.inner.reverse_expr()
295295
}
296296

297-
/// Do the function rewrite
297+
/// Returns this aggregate function's simplification hook, if any.
298298
///
299299
/// See [`AggregateUDFImpl::simplify`] for more details.
300300
pub fn simplify(&self) -> Option<AggregateFunctionSimplification> {
@@ -651,26 +651,34 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
651651
AggregateOrderSensitivity::HardRequirement
652652
}
653653

654-
/// Optionally apply per-UDaF simplification / rewrite rules.
654+
/// Returns an optional hook for simplifying this user-defined aggregate.
655655
///
656-
/// This can be used to apply function specific simplification rules during
657-
/// optimization (e.g. `arrow_cast` --> `Expr::Cast`). The default
658-
/// implementation does nothing.
656+
/// Use this hook to apply function-specific rewrites during optimization.
657+
/// The default implementation returns `None`.
659658
///
660-
/// Note that DataFusion handles simplifying arguments and "constant
661-
/// folding" (replacing a function call with constant arguments such as
662-
/// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
663-
/// optimizations manually for specific UDFs.
659+
/// For example, `percentile_cont(x, 0.0)` and `percentile_cont(x, 1.0)` can
660+
/// be rewritten to `MIN(x)` or `MAX(x)` depending on the `ORDER BY`
661+
/// direction.
662+
///
663+
/// DataFusion already simplifies arguments and performs constant folding
664+
/// (for example, `my_add(1, 2) -> 3`). For nested expressions, the optimizer
665+
/// runs simplification in multiple passes, so arguments are typically
666+
/// simplified before this hook is invoked. As a result, UDF implementations
667+
/// usually do not need to handle argument simplification themselves.
668+
///
669+
/// See configuration `datafusion.optimizer.max_passes` for details on how many
670+
/// optimization passes may be applied.
664671
///
665672
/// # Returns
666673
///
667-
/// [None] if simplify is not defined or,
674+
/// `None` if simplify is not defined.
668675
///
669-
/// Or, a closure with two arguments:
670-
/// * 'aggregate_function': [AggregateFunction] for which simplified has been invoked
671-
/// * 'info': [crate::simplify::SimplifyContext]
676+
/// Or, a closure ([`AggregateFunctionSimplification`]) invoked with:
677+
/// * `aggregate_function`: [AggregateFunction] with already simplified
678+
/// arguments
679+
/// * `info`: [crate::simplify::SimplifyContext]
672680
///
673-
/// closure returns simplified [Expr] or an error.
681+
/// The closure returns a simplified [Expr] or an error.
674682
///
675683
/// # Notes
676684
///

datafusion/expr/src/udf.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ impl ScalarUDF {
217217
self.inner.return_field_from_args(args)
218218
}
219219

220-
/// Do the function rewrite
220+
/// Returns this scalar function's simplification result.
221221
///
222222
/// See [`ScalarUDFImpl::simplify`] for more details.
223223
pub fn simplify(

datafusion/expr/src/udwf.rs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ impl WindowUDF {
157157
self.inner.signature()
158158
}
159159

160-
/// Do the function rewrite
160+
/// Returns this window function's simplification hook, if any.
161161
///
162162
/// See [`WindowUDFImpl::simplify`] for more details.
163163
pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -344,25 +344,28 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync {
344344
partition_evaluator_args: PartitionEvaluatorArgs,
345345
) -> Result<Box<dyn PartitionEvaluator>>;
346346

347-
/// Optionally apply per-UDWF simplification / rewrite rules.
347+
/// Returns an optional hook for simplifying this user-defined window
348+
/// function.
348349
///
349-
/// This can be used to apply function specific simplification rules during
350-
/// optimization. The default implementation does nothing.
350+
/// Use this hook to apply function-specific rewrites during optimization.
351+
/// The default implementation returns `None`.
351352
///
352-
/// Note that DataFusion handles simplifying arguments and "constant
353-
/// folding" (replacing a function call with constant arguments such as
354-
/// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
355-
/// optimizations manually for specific UDFs.
353+
/// DataFusion already simplifies arguments and performs constant folding
354+
/// (for example, `my_add(1, 2) -> 3`), so there is usually no need to
355+
/// implement those optimizations manually for specific UDFs.
356356
///
357357
/// Example:
358358
/// `advanced_udwf.rs`: <https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs>
359359
///
360360
/// # Returns
361-
/// [None] if simplify is not defined or,
361+
/// `None` if simplify is not defined.
362362
///
363-
/// Or, a closure with two arguments:
364-
/// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked
365-
/// * 'info': [crate::simplify::SimplifyContext]
363+
/// Or, a closure ([`WindowFunctionSimplification`]) invoked with:
364+
/// * `window_function`: [WindowFunction] with already simplified
365+
/// arguments
366+
/// * `info`: [crate::simplify::SimplifyContext]
367+
///
368+
/// The closure returns a simplified [Expr] or an error.
366369
///
367370
/// # Notes
368371
/// The returned expression must have the same schema as the original

0 commit comments

Comments
 (0)