1616// under the License.
1717
1818use parking_lot:: RwLock ;
19- use std:: { fmt:: Display , hash:: Hash , sync:: Arc } ;
19+ use std:: {
20+ fmt:: Display ,
21+ hash:: Hash ,
22+ sync:: {
23+ Arc , LazyLock ,
24+ atomic:: { AtomicU64 , Ordering } ,
25+ } ,
26+ } ;
2027use tokio:: sync:: watch;
2128
2229use crate :: PhysicalExpr ;
@@ -26,9 +33,8 @@ use datafusion_common::{
2633 tree_node:: { Transformed , TransformedResult , TreeNode } ,
2734} ;
2835use datafusion_expr:: ColumnarValue ;
29- use datafusion_physical_expr_common:: physical_expr:: {
30- DynHash , PhysicalExprId , expr_id_from_arc,
31- } ;
36+ use datafusion_physical_expr_common:: physical_expr:: DynHash ;
37+ use rand:: random;
3238
3339/// State of a dynamic filter, tracking both updates and completion.
3440#[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
@@ -59,6 +65,7 @@ impl FilterState {
5965/// [Dynamic Filters: Passing Information Between Operators During Execution for 25x Faster Queries blog]: https://datafusion.apache.org/blog/2025/09/10/dynamic-filters
6066#[ derive( Debug ) ]
6167pub struct DynamicFilterPhysicalExpr {
68+ expression_id : u64 ,
6269 /// The original children of this PhysicalExpr, if any.
6370 /// This is necessary because the dynamic filter may be initialized with a placeholder (e.g. `lit(true)`)
6471 /// and later remapped to the actual expressions that are being filtered.
@@ -93,6 +100,7 @@ struct Inner {
93100/// An atomic snapshot of a [`DynamicFilterPhysicalExpr`] used to reconstruct the expression during
94101/// serialization / deserialization.
95102pub struct DynamicFilterSnapshot {
103+ expression_id : u64 ,
96104 children : Vec < Arc < dyn PhysicalExpr > > ,
97105 remapped_children : Option < Vec < Arc < dyn PhysicalExpr > > > ,
98106 // Inner state.
@@ -103,13 +111,15 @@ pub struct DynamicFilterSnapshot {
103111
104112impl DynamicFilterSnapshot {
105113 pub fn new (
114+ expression_id : u64 ,
106115 children : Vec < Arc < dyn PhysicalExpr > > ,
107116 remapped_children : Option < Vec < Arc < dyn PhysicalExpr > > > ,
108117 generation : u64 ,
109118 inner_expr : Arc < dyn PhysicalExpr > ,
110119 is_complete : bool ,
111120 ) -> Self {
112121 Self {
122+ expression_id,
113123 children,
114124 remapped_children,
115125 generation,
@@ -122,6 +132,10 @@ impl DynamicFilterSnapshot {
122132 & self . children
123133 }
124134
135+ pub fn expression_id ( & self ) -> u64 {
136+ self . expression_id
137+ }
138+
125139 pub fn remapped_children ( & self ) -> Option < & [ Arc < dyn PhysicalExpr > ] > {
126140 self . remapped_children . as_deref ( )
127141 }
@@ -143,7 +157,8 @@ impl Display for DynamicFilterSnapshot {
143157 fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
144158 write ! (
145159 f,
146- "DynamicFilterSnapshot {{ children: {:?}, remapped_children: {:?}, generation: {}, inner_expr: {:?}, is_complete: {} }}" ,
160+ "DynamicFilterSnapshot {{ expression_id: {}, children: {:?}, remapped_children: {:?}, generation: {}, inner_expr: {:?}, is_complete: {} }}" ,
161+ self . expression_id,
147162 self . children,
148163 self . remapped_children,
149164 self . generation,
@@ -156,6 +171,7 @@ impl Display for DynamicFilterSnapshot {
156171impl From < DynamicFilterSnapshot > for DynamicFilterPhysicalExpr {
157172 fn from ( snapshot : DynamicFilterSnapshot ) -> Self {
158173 let DynamicFilterSnapshot {
174+ expression_id,
159175 children,
160176 remapped_children,
161177 generation,
@@ -171,6 +187,7 @@ impl From<DynamicFilterSnapshot> for DynamicFilterPhysicalExpr {
171187 let ( state_watch, _) = watch:: channel ( state) ;
172188
173189 Self {
190+ expression_id,
174191 children,
175192 remapped_children,
176193 inner : Arc :: new ( RwLock :: new ( Inner {
@@ -193,6 +210,7 @@ impl From<&DynamicFilterPhysicalExpr> for DynamicFilterSnapshot {
193210 ( inner. generation , Arc :: clone ( & inner. expr ) , inner. is_complete )
194211 } ;
195212 DynamicFilterSnapshot {
213+ expression_id : expr. expression_id ,
196214 children : expr. children . clone ( ) ,
197215 remapped_children : expr. remapped_children . clone ( ) ,
198216 generation,
@@ -252,6 +270,12 @@ impl Display for DynamicFilterPhysicalExpr {
252270}
253271
254272impl DynamicFilterPhysicalExpr {
273+ pub fn new_expression_id ( ) -> u64 {
274+ static NEXT_EXPRESSION_ID : LazyLock < AtomicU64 > =
275+ LazyLock :: new ( || AtomicU64 :: new ( random :: < u64 > ( ) ) ) ;
276+ NEXT_EXPRESSION_ID . fetch_add ( 1 , Ordering :: Relaxed )
277+ }
278+
255279 /// Create a new [`DynamicFilterPhysicalExpr`]
256280 /// from an initial expression and a list of children.
257281 /// The list of children is provided separately because
@@ -285,6 +309,7 @@ impl DynamicFilterPhysicalExpr {
285309 ) -> Self {
286310 let ( state_watch, _) = watch:: channel ( FilterState :: InProgress { generation : 1 } ) ;
287311 Self {
312+ expression_id : Self :: new_expression_id ( ) ,
288313 children,
289314 remapped_children : None , // Initially no remapped children
290315 inner : Arc :: new ( RwLock :: new ( Inner :: new ( inner) ) ) ,
@@ -476,6 +501,7 @@ impl PhysicalExpr for DynamicFilterPhysicalExpr {
476501 children : Vec < Arc < dyn PhysicalExpr > > ,
477502 ) -> Result < Arc < dyn PhysicalExpr > > {
478503 Ok ( Arc :: new ( Self {
504+ expression_id : self . expression_id ,
479505 children : self . children . clone ( ) ,
480506 remapped_children : Some ( children) ,
481507 inner : Arc :: clone ( & self . inner ) ,
@@ -559,13 +585,8 @@ impl PhysicalExpr for DynamicFilterPhysicalExpr {
559585 self . inner . read ( ) . generation
560586 }
561587
562- fn expr_id ( self : Arc < Self > , salt : & [ u64 ] ) -> Option < PhysicalExprId > {
563- Some ( PhysicalExprId :: new (
564- // Capture the outer arc, which contains children and the expr.
565- expr_id_from_arc ( & self , salt) ,
566- // Capture the inner arc, which contains the expr only.
567- Some ( expr_id_from_arc ( & self . inner , salt) ) ,
568- ) )
588+ fn expression_id ( & self ) -> Option < u64 > {
589+ Some ( self . expression_id )
569590 }
570591}
571592
@@ -1033,7 +1054,7 @@ mod test {
10331054 }
10341055
10351056 #[ tokio:: test]
1036- async fn test_expr_id ( ) {
1057+ async fn test_expression_id ( ) {
10371058 let source_schema =
10381059 Arc :: new ( Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Int32 , false ) ] ) ) ;
10391060 let col_a = col ( "a" , & source_schema) . unwrap ( ) ;
@@ -1056,38 +1077,24 @@ mod test {
10561077 )
10571078 . expect ( "reassign_expr_columns should succeed" ) ;
10581079
1059- let derived_expr_id = Arc :: clone ( & derived)
1060- . expr_id ( & [ ] )
1061- . expect ( "combined filter should have an expr_id " ) ;
1062- let source_expr_id = Arc :: clone ( & source)
1063- . expr_id ( & [ ] )
1064- . expect ( "source filter should have an expr_id " ) ;
1065- let source_clone_expr_id = Arc :: clone ( & source_clone)
1066- . expr_id ( & [ ] )
1067- . expect ( "source clone should have an expr_id " ) ;
1080+ let derived_expression_id = derived
1081+ . expression_id ( )
1082+ . expect ( "derived filter should have an expression id " ) ;
1083+ let source_expression_id = source
1084+ . expression_id ( )
1085+ . expect ( "source filter should have an expression id " ) ;
1086+ let source_clone_expression_id = source_clone
1087+ . expression_id ( )
1088+ . expect ( "source clone should have an expression id " ) ;
10681089
10691090 assert_eq ! (
1070- source_clone_expr_id. exact( ) ,
1071- source_expr_id. exact( ) ,
1072- "cloned filter should have the same exact id because the children are the same" ,
1073- ) ;
1074-
1075- assert_eq ! (
1076- source_clone_expr_id. shallow( ) ,
1077- source_expr_id. shallow( ) ,
1078- "cloned filter should have the same shallow id because the exprs are the same" ,
1079- ) ;
1080-
1081- assert_ne ! (
1082- derived_expr_id. exact( ) ,
1083- source_expr_id. exact( ) ,
1084- "filters should have different exact ids because the children are different" ,
1091+ source_clone_expression_id, source_expression_id,
1092+ "cloned filter should preserve its expression id" ,
10851093 ) ;
10861094
10871095 assert_eq ! (
1088- derived_expr_id. shallow( ) ,
1089- source_expr_id. shallow( ) ,
1090- "filters should have the same shallow id because the exprs are the same" ,
1096+ derived_expression_id, source_expression_id,
1097+ "derived filters should carry forward the source expression id" ,
10911098 ) ;
10921099 }
10931100}
0 commit comments