@@ -215,12 +215,8 @@ pub(super) struct JoinLeftData {
215215 /// Shared atomic flag indicating if any probe partition saw data (for null-aware anti/mark joins)
216216 /// This is shared across all probe partitions to provide global knowledge
217217 pub ( super ) probe_side_non_empty : AtomicBool ,
218- /// Shared atomic flag indicating if any probe partition saw NULL in join keys (for null-aware anti joins)
218+ /// Shared atomic flag indicating if any probe partition saw NULL in join keys
219219 pub ( super ) probe_side_has_null : AtomicBool ,
220- /// Shared atomic flag indicating if any build partition saw NULL in join keys (for null-aware mark joins)
221- pub ( super ) build_side_has_nulls : AtomicBool ,
222- /// Not sure how to use this yet
223- pub ( super ) build_side_is_empty : AtomicBool ,
224220}
225221
226222impl JoinLeftData {
@@ -409,15 +405,15 @@ impl HashJoinExecBuilder {
409405 // Validate null_aware flag
410406 if exec. null_aware {
411407 let join_type = exec. join_type ( ) ;
412- if !matches ! ( join_type, JoinType :: LeftAnti ) {
408+ if !matches ! ( join_type, JoinType :: LeftAnti | JoinType :: LeftMark ) {
413409 return plan_err ! (
414- "null_aware can only be true for LeftAnti joins, got {join_type}"
410+ "null_aware can only be true for LeftAnti or LeftMark joins, got {join_type}"
415411 ) ;
416412 }
417413 let on = exec. on ( ) ;
418414 if on. len ( ) != 1 {
419415 return plan_err ! (
420- "null_aware anti join only supports single column join key, got {} columns" ,
416+ "null_aware joins only support single column join key, got {} columns" ,
421417 on. len( )
422418 ) ;
423419 }
@@ -2079,9 +2075,6 @@ async fn collect_left_input(
20792075 bounds = None ;
20802076 }
20812077
2082- let build_side_has_nulls = batch. columns ( ) . iter ( ) . any ( |col| col. null_count ( ) > 0 ) ;
2083- let build_side_is_empty = batch. num_rows ( ) == 0 ;
2084-
20852078 let data = JoinLeftData {
20862079 map,
20872080 batch,
@@ -2093,8 +2086,6 @@ async fn collect_left_input(
20932086 membership,
20942087 probe_side_non_empty : AtomicBool :: new ( false ) ,
20952088 probe_side_has_null : AtomicBool :: new ( false ) ,
2096- build_side_has_nulls : AtomicBool :: new ( build_side_has_nulls) ,
2097- build_side_is_empty : AtomicBool :: new ( build_side_is_empty) ,
20982089 } ;
20992090
21002091 Ok ( data)
@@ -6067,7 +6058,7 @@ mod tests {
60676058 Ok ( ( ) )
60686059 }
60696060
6070- /// Test that null_aware validation rejects non-LeftAnti join types
6061+ /// Test that null_aware validation rejects unsupported join types
60716062 #[ tokio:: test]
60726063 async fn test_null_aware_validation_wrong_join_type ( ) {
60736064 let left =
@@ -6098,7 +6089,7 @@ mod tests {
60986089 result
60996090 . unwrap_err( )
61006091 . to_string( )
6101- . contains( "null_aware can only be true for LeftAnti joins" )
6092+ . contains( "null_aware can only be true for LeftAnti or LeftMark joins" )
61026093 ) ;
61036094 }
61046095
@@ -6138,8 +6129,114 @@ mod tests {
61386129 result
61396130 . unwrap_err( )
61406131 . to_string( )
6141- . contains( "null_aware anti join only supports single column join key" )
6132+ . contains( "null_aware joins only support single column join key" )
6133+ ) ;
6134+ }
6135+
6136+ /// Test null-aware left mark join when probe side contains NULL.
6137+ /// Expected:
6138+ /// - matched rows => true
6139+ /// - unmatched non-NULL rows => NULL
6140+ /// - NULL build keys with non-empty probe side => NULL
6141+ #[ apply( hash_join_exec_configs) ]
6142+ #[ tokio:: test]
6143+ async fn test_null_aware_left_mark_probe_null ( batch_size : usize ) -> Result < ( ) > {
6144+ let task_ctx = prepare_task_ctx ( batch_size, false ) ;
6145+
6146+ let left = build_table_two_cols (
6147+ ( "c1" , & vec ! [ Some ( 1 ) , Some ( 4 ) , None ] ) ,
6148+ ( "dummy" , & vec ! [ Some ( 10 ) , Some ( 40 ) , Some ( 0 ) ] ) ,
6149+ ) ;
6150+
6151+ let right = build_table_two_cols (
6152+ ( "c2" , & vec ! [ Some ( 1 ) , Some ( 2 ) , None ] ) ,
6153+ ( "dummy" , & vec ! [ Some ( 100 ) , Some ( 200 ) , Some ( 300 ) ] ) ,
6154+ ) ;
6155+
6156+ let on = vec ! [ (
6157+ Arc :: new( Column :: new_with_schema( "c1" , & left. schema( ) ) ?) as _,
6158+ Arc :: new( Column :: new_with_schema( "c2" , & right. schema( ) ) ?) as _,
6159+ ) ] ;
6160+
6161+ let join = HashJoinExec :: try_new (
6162+ left,
6163+ right,
6164+ on,
6165+ None ,
6166+ & JoinType :: LeftMark ,
6167+ None ,
6168+ PartitionMode :: CollectLeft ,
6169+ NullEquality :: NullEqualsNothing ,
6170+ true , // null_aware = true
6171+ ) ?;
6172+
6173+ let stream = join. execute ( 0 , task_ctx) ?;
6174+ let batches = common:: collect ( stream) . await ?;
6175+
6176+ allow_duplicates ! {
6177+ assert_snapshot!( batches_to_sort_string( & batches) , @r"
6178+ +----+-------+------+
6179+ | c1 | dummy | mark |
6180+ +----+-------+------+
6181+ | | 0 | |
6182+ | 1 | 10 | true |
6183+ | 4 | 40 | |
6184+ +----+-------+------+
6185+ " ) ;
6186+ }
6187+
6188+ Ok ( ( ) )
6189+ }
6190+
6191+ /// Test null-aware left mark join when probe side is empty.
6192+ /// Expected: all rows are marked false, including NULL build keys.
6193+ #[ apply( hash_join_exec_configs) ]
6194+ #[ tokio:: test]
6195+ async fn test_null_aware_left_mark_empty_probe ( batch_size : usize ) -> Result < ( ) > {
6196+ let task_ctx = prepare_task_ctx ( batch_size, false ) ;
6197+
6198+ let left = build_table_two_cols (
6199+ ( "c1" , & vec ! [ Some ( 1 ) , None ] ) ,
6200+ ( "dummy" , & vec ! [ Some ( 10 ) , Some ( 0 ) ] ) ,
61426201 ) ;
6202+
6203+ let right = build_table_two_cols (
6204+ ( "c2" , & Vec :: < Option < i32 > > :: new ( ) ) ,
6205+ ( "dummy" , & Vec :: < Option < i32 > > :: new ( ) ) ,
6206+ ) ;
6207+
6208+ let on = vec ! [ (
6209+ Arc :: new( Column :: new_with_schema( "c1" , & left. schema( ) ) ?) as _,
6210+ Arc :: new( Column :: new_with_schema( "c2" , & right. schema( ) ) ?) as _,
6211+ ) ] ;
6212+
6213+ let join = HashJoinExec :: try_new (
6214+ left,
6215+ right,
6216+ on,
6217+ None ,
6218+ & JoinType :: LeftMark ,
6219+ None ,
6220+ PartitionMode :: CollectLeft ,
6221+ NullEquality :: NullEqualsNothing ,
6222+ true , // null_aware = true
6223+ ) ?;
6224+
6225+ let stream = join. execute ( 0 , task_ctx) ?;
6226+ let batches = common:: collect ( stream) . await ?;
6227+
6228+ allow_duplicates ! {
6229+ assert_snapshot!( batches_to_sort_string( & batches) , @r"
6230+ +----+-------+-------+
6231+ | c1 | dummy | mark |
6232+ +----+-------+-------+
6233+ | | 0 | false |
6234+ | 1 | 10 | false |
6235+ +----+-------+-------+
6236+ " ) ;
6237+ }
6238+
6239+ Ok ( ( ) )
61436240 }
61446241
61456242 #[ test]
0 commit comments