Skip to content

Commit 96a3f32

Browse files
committed
Refactor cast expression handling in planner
Expose shared cast_with_target_field helper to validate and build field-aware CastExprs in one place. Update planner to directly call this helper, removing the need for temporary type-only casts. Add regression tests to cover standard casts, metadata-bearing casts, and same-type semantic-preserving casts.
1 parent 2e03cca commit 96a3f32

3 files changed

Lines changed: 73 additions & 17 deletions

File tree

datafusion/physical-expr/src/expressions/cast.rs

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,12 @@ impl CastExpr {
201201
}
202202
}
203203

204+
fn is_default_target_field(target_field: &FieldRef) -> bool {
205+
target_field.name().is_empty()
206+
&& target_field.is_nullable()
207+
&& target_field.metadata().is_empty()
208+
}
209+
204210
pub(crate) fn is_order_preserving_cast_family(
205211
source_type: &DataType,
206212
target_type: &DataType,
@@ -315,23 +321,57 @@ pub fn cast_with_options(
315321
input_schema: &Schema,
316322
cast_type: DataType,
317323
cast_options: Option<CastOptions<'static>>,
324+
) -> Result<Arc<dyn PhysicalExpr>> {
325+
cast_with_target_field(
326+
expr,
327+
input_schema,
328+
cast_type.into_nullable_field_ref(),
329+
cast_options,
330+
)
331+
}
332+
333+
/// Return a PhysicalExpression representing `expr` casted to `target_field`,
334+
/// preserving any explicit field semantics such as name, nullability, and
335+
/// metadata.
336+
pub fn cast_with_target_field(
337+
expr: Arc<dyn PhysicalExpr>,
338+
input_schema: &Schema,
339+
target_field: FieldRef,
340+
cast_options: Option<CastOptions<'static>>,
318341
) -> Result<Arc<dyn PhysicalExpr>> {
319342
let expr_type = expr.data_type(input_schema)?;
320-
if expr_type == cast_type {
321-
Ok(Arc::clone(&expr))
322-
} else if requires_nested_struct_cast(&expr_type, &cast_type) {
323-
if can_cast_named_struct_types(&expr_type, &cast_type) {
343+
let cast_type = target_field.data_type();
344+
if expr_type == *cast_type {
345+
if is_default_target_field(&target_field) {
346+
return Ok(Arc::clone(&expr));
347+
}
348+
349+
Ok(Arc::new(CastExpr::new_with_target_field(
350+
expr,
351+
target_field,
352+
cast_options,
353+
)))
354+
} else if requires_nested_struct_cast(&expr_type, cast_type) {
355+
if can_cast_named_struct_types(&expr_type, cast_type) {
324356
// Allow casts involving structs (including nested inside Lists, Dictionaries,
325357
// etc.) that pass name-based compatibility validation. This validation is
326358
// applied at planning time (now) to fail fast, rather than deferring errors
327359
// to execution time. The name-based casting logic will be executed at runtime
328360
// via ColumnarValue::cast_to.
329-
Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options)))
361+
Ok(Arc::new(CastExpr::new_with_target_field(
362+
expr,
363+
target_field,
364+
cast_options,
365+
)))
330366
} else {
331367
not_impl_err!("Unsupported CAST from {expr_type} to {cast_type}")
332368
}
333-
} else if can_cast_types(&expr_type, &cast_type) {
334-
Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options)))
369+
} else if can_cast_types(&expr_type, cast_type) {
370+
Ok(Arc::new(CastExpr::new_with_target_field(
371+
expr,
372+
target_field,
373+
cast_options,
374+
)))
335375
} else {
336376
not_impl_err!("Unsupported CAST from {expr_type} to {cast_type}")
337377
}

datafusion/physical-expr/src/expressions/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ pub use crate::aggregate::stats::StatsType;
4141

4242
pub use binary::{BinaryExpr, binary, similar_to};
4343
pub use case::{CaseExpr, case};
44-
pub use cast::{CastExpr, cast};
44+
pub use cast::{CastExpr, cast, cast_with_target_field};
4545
pub use cast_column::CastColumnExpr;
4646
pub use column::{Column, col, with_new_schema};
4747
pub use datafusion_expr::utils::format_state_name;

datafusion/physical-expr/src/planner.rs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -289,17 +289,12 @@ pub fn create_physical_expr(
289289
Ok(expressions::case(expr, when_then_expr, else_expr)?)
290290
}
291291
Expr::Cast(Cast { expr, field }) => {
292-
let expr = create_physical_expr(expr, input_dfschema, execution_props)?;
293-
294-
// Reuse the standard CAST validation path, but preserve the logical
295-
// target field instead of lowering to a type-only physical cast.
296-
expressions::cast(Arc::clone(&expr), input_schema, field.data_type().clone())?;
297-
298-
Ok(Arc::new(expressions::CastExpr::new_with_target_field(
299-
expr,
292+
expressions::cast_with_target_field(
293+
create_physical_expr(expr, input_dfschema, execution_props)?,
294+
input_schema,
300295
Arc::clone(field),
301296
None,
302-
)))
297+
)
303298
}
304299
Expr::TryCast(TryCast { expr, field }) => {
305300
if !field.metadata().is_empty() {
@@ -497,6 +492,27 @@ mod tests {
497492
Ok(())
498493
}
499494

495+
#[test]
496+
fn test_cast_lowering_preserves_standard_cast_semantics() -> Result<()> {
497+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
498+
let df_schema = DFSchema::try_from(schema.clone())?;
499+
let cast_expr = Expr::Cast(Cast::new(Box::new(col("a")), DataType::Int64));
500+
501+
let physical =
502+
create_physical_expr(&cast_expr, &df_schema, &ExecutionProps::new())?;
503+
let cast = physical
504+
.as_any()
505+
.downcast_ref::<expressions::CastExpr>()
506+
.expect("planner should lower ordinary CAST to CastExpr");
507+
508+
assert_eq!(cast.cast_type(), &DataType::Int64);
509+
assert_eq!(physical.return_field(&schema)?.name(), "a");
510+
assert_eq!(physical.return_field(&schema)?.data_type(), &DataType::Int64);
511+
assert!(!physical.nullable(&schema)?);
512+
513+
Ok(())
514+
}
515+
500516
#[test]
501517
fn test_cast_lowering_preserves_same_type_field_semantics() -> Result<()> {
502518
let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);

0 commit comments

Comments
 (0)