@@ -35,11 +35,10 @@ use datafusion_common::{
3535} ;
3636use datafusion_functions:: core:: getfield:: GetFieldFunc ;
3737use datafusion_physical_expr:: PhysicalExprSimplifier ;
38- use datafusion_physical_expr:: expressions:: CastColumnExpr ;
3938use datafusion_physical_expr:: projection:: { ProjectionExprs , Projector } ;
4039use datafusion_physical_expr:: {
4140 ScalarFunctionExpr ,
42- expressions:: { self , Column } ,
41+ expressions:: { self , CastExpr , Column } ,
4342} ;
4443use datafusion_physical_expr_common:: physical_expr:: PhysicalExpr ;
4544use itertools:: Itertools ;
@@ -440,7 +439,7 @@ impl DefaultPhysicalExprAdapterRewriter {
440439 // TODO: add optimization to move the cast from the column to literal expressions in the case of `col = 123`
441440 // since that's much cheaper to evalaute.
442441 // See https://github.com/apache/datafusion/issues/15780#issuecomment-2824716928
443- self . create_cast_column_expr ( resolved_column, physical_field, logical_field)
442+ self . create_cast_expr ( resolved_column, physical_field, logical_field)
444443 }
445444
446445 /// Resolves a logical column to the corresponding physical column and field.
@@ -476,12 +475,12 @@ impl DefaultPhysicalExprAdapterRewriter {
476475 ) ) )
477476 }
478477
479- /// Validates type compatibility and creates a CastColumnExpr if needed.
478+ /// Validates type compatibility and creates a field-aware CastExpr if needed.
480479 ///
481480 /// Checks whether the physical field can be cast to the logical field type,
482- /// handling both struct and scalar types. Returns a CastColumnExpr with the
483- /// appropriate configuration.
484- fn create_cast_column_expr (
481+ /// handling both struct and scalar types. Returns a CastExpr with the
482+ /// appropriate logical target field configuration.
483+ fn create_cast_expr (
485484 & self ,
486485 column : Column ,
487486 physical_field : FieldRef ,
@@ -513,9 +512,15 @@ impl DefaultPhysicalExprAdapterRewriter {
513512 }
514513 }
515514
516- let cast_expr = Arc :: new ( CastColumnExpr :: new (
515+ let physical_column_index = self . physical_file_schema . index_of ( column. name ( ) ) ?;
516+ let column = if column. index ( ) == physical_column_index {
517+ column
518+ } else {
519+ Column :: new_with_schema ( column. name ( ) , self . physical_file_schema . as_ref ( ) ) ?
520+ } ;
521+
522+ let cast_expr = Arc :: new ( CastExpr :: new_with_target_field (
517523 Arc :: new ( column) ,
518- physical_field,
519524 Arc :: new ( logical_field. clone ( ) ) ,
520525 None ,
521526 ) ) ;
@@ -669,7 +674,7 @@ mod tests {
669674 use arrow:: datatypes:: { DataType , Field , Fields , Schema , SchemaRef } ;
670675 use datafusion_common:: { Result , ScalarValue , assert_contains, record_batch} ;
671676 use datafusion_expr:: Operator ;
672- use datafusion_physical_expr:: expressions:: { Column , Literal , col, lit} ;
677+ use datafusion_physical_expr:: expressions:: { CastExpr , Column , Literal , col, lit} ;
673678 use datafusion_physical_expr_common:: physical_expr:: PhysicalExpr ;
674679 use itertools:: Itertools ;
675680 use std:: sync:: Arc ;
@@ -702,7 +707,7 @@ mod tests {
702707 let result = adapter. rewrite ( column_expr) . unwrap ( ) ;
703708
704709 // Should be wrapped in a cast expression
705- assert ! ( result. as_any( ) . downcast_ref:: <CastColumnExpr >( ) . is_some( ) ) ;
710+ assert ! ( result. as_any( ) . downcast_ref:: <CastExpr >( ) . is_some( ) ) ;
706711 }
707712
708713 #[ test]
@@ -723,8 +728,8 @@ mod tests {
723728 let result = adapter. rewrite ( Arc :: new ( Column :: new ( "a" , 0 ) ) ) ?;
724729 let cast = result
725730 . as_any ( )
726- . downcast_ref :: < CastColumnExpr > ( )
727- . expect ( "Expected CastColumnExpr " ) ;
731+ . downcast_ref :: < CastExpr > ( )
732+ . expect ( "Expected CastExpr " ) ;
728733
729734 assert_eq ! ( cast. target_field( ) . data_type( ) , & DataType :: Int64 ) ;
730735 assert ! ( !cast. target_field( ) . is_nullable( ) ) ;
@@ -736,8 +741,10 @@ mod tests {
736741 Some ( "1" )
737742 ) ;
738743
739- // Ensure the expression reports the logical nullability regardless of input schema
740- assert ! ( !result. nullable( physical_schema. as_ref( ) ) ?) ;
744+ // Runtime nullability follows the child expression, but the logical
745+ // target field nullability is still preserved via return_field().
746+ assert ! ( result. nullable( physical_schema. as_ref( ) ) ?) ;
747+ assert ! ( !result. return_field( physical_schema. as_ref( ) ) ?. is_nullable( ) ) ;
741748
742749 Ok ( ( ) )
743750 }
@@ -772,9 +779,8 @@ mod tests {
772779 println ! ( "Rewritten expression: {result}" ) ;
773780
774781 let expected = expressions:: BinaryExpr :: new (
775- Arc :: new ( CastColumnExpr :: new (
782+ Arc :: new ( CastExpr :: new_with_target_field (
776783 Arc :: new ( Column :: new ( "a" , 0 ) ) ,
777- Arc :: new ( Field :: new ( "a" , DataType :: Int32 , false ) ) ,
778784 Arc :: new ( Field :: new ( "a" , DataType :: Int64 , false ) ) ,
779785 None ,
780786 ) ) ,
@@ -860,17 +866,6 @@ mod tests {
860866
861867 let result = adapter. rewrite ( column_expr) . unwrap ( ) ;
862868
863- let physical_struct_fields: Fields = vec ! [
864- Field :: new( "id" , DataType :: Int32 , false ) ,
865- Field :: new( "name" , DataType :: Utf8 , true ) ,
866- ]
867- . into ( ) ;
868- let physical_field = Arc :: new ( Field :: new (
869- "data" ,
870- DataType :: Struct ( physical_struct_fields) ,
871- false ,
872- ) ) ;
873-
874869 let logical_struct_fields: Fields = vec ! [
875870 Field :: new( "id" , DataType :: Int64 , false ) ,
876871 Field :: new( "name" , DataType :: Utf8View , true ) ,
@@ -882,9 +877,8 @@ mod tests {
882877 false ,
883878 ) ) ;
884879
885- let expected = Arc :: new ( CastColumnExpr :: new (
880+ let expected = Arc :: new ( CastExpr :: new_with_target_field (
886881 Arc :: new ( Column :: new ( "data" , 0 ) ) ,
887- physical_field,
888882 logical_field,
889883 None ,
890884 ) ) as Arc < dyn PhysicalExpr > ;
@@ -1558,11 +1552,11 @@ mod tests {
15581552
15591553 let result = adapter. rewrite ( column_expr) . unwrap ( ) ;
15601554
1561- // Should be a CastColumnExpr
1555+ // Should be a CastExpr
15621556 let cast_expr = result
15631557 . as_any ( )
1564- . downcast_ref :: < CastColumnExpr > ( )
1565- . expect ( "Expected CastColumnExpr " ) ;
1558+ . downcast_ref :: < CastExpr > ( )
1559+ . expect ( "Expected CastExpr " ) ;
15661560
15671561 // Verify the inner column points to the correct physical index (1)
15681562 let inner_col = cast_expr
@@ -1581,7 +1575,7 @@ mod tests {
15811575 }
15821576
15831577 #[ test]
1584- fn test_create_cast_column_expr_uses_name_lookup_not_column_index ( ) {
1578+ fn test_create_cast_expr_uses_name_lookup_not_column_index ( ) {
15851579 // Physical schema has column `a` at index 1; index 0 is an incompatible type.
15861580 let physical_schema = Arc :: new ( Schema :: new ( vec ! [
15871581 Field :: new( "b" , DataType :: Binary , true ) ,
@@ -1601,7 +1595,7 @@ mod tests {
16011595 // Deliberately provide the wrong index for column `a`.
16021596 // Regression: this must still resolve against physical field `a` by name.
16031597 let transformed = rewriter
1604- . create_cast_column_expr (
1598+ . create_cast_expr (
16051599 Column :: new ( "a" , 0 ) ,
16061600 Arc :: new ( physical_schema. field_with_name ( "a" ) . unwrap ( ) . clone ( ) ) ,
16071601 logical_schema. field_with_name ( "a" ) . unwrap ( ) ,
@@ -1611,11 +1605,16 @@ mod tests {
16111605 let cast_expr = transformed
16121606 . data
16131607 . as_any ( )
1614- . downcast_ref :: < CastColumnExpr > ( )
1615- . expect ( "Expected CastColumnExpr " ) ;
1608+ . downcast_ref :: < CastExpr > ( )
1609+ . expect ( "Expected CastExpr " ) ;
16161610
1617- assert_eq ! ( cast_expr. input_field( ) . name( ) , "a" ) ;
1618- assert_eq ! ( cast_expr. input_field( ) . data_type( ) , & DataType :: Int32 ) ;
1611+ let inner_col = cast_expr
1612+ . expr ( )
1613+ . as_any ( )
1614+ . downcast_ref :: < Column > ( )
1615+ . expect ( "Expected inner Column" ) ;
1616+ assert_eq ! ( inner_col. name( ) , "a" ) ;
1617+ assert_eq ! ( inner_col. index( ) , 1 ) ;
16191618 assert_eq ! ( cast_expr. target_field( ) . data_type( ) , & DataType :: Int64 ) ;
16201619 }
16211620}
0 commit comments