@@ -72,6 +72,37 @@ physical_plan
727204)------DataSourceExec: partitions=1, partition_sizes=[1]
737305)------DataSourceExec: partitions=1, partition_sizes=[1]
7474
75+ query IT rowsort
76+ SELECT id, value
77+ FROM outer_table
78+ WHERE (id NOT IN (SELECT id FROM inner_table_with_null)) IS NULL;
79+ ----
80+ 1 a
81+ 3 c
82+ 4 d
83+ NULL e
84+
85+ query TT
86+ EXPLAIN
87+ SELECT value
88+ FROM outer_table
89+ WHERE (id NOT IN (SELECT id FROM inner_table_with_null)) IS NULL;
90+ ----
91+ logical_plan
92+ 01)Projection: outer_table.value
93+ 02)--Filter: NOT __correlated_sq_1.mark IS NULL
94+ 03)----Projection: outer_table.value, __correlated_sq_1.mark
95+ 04)------LeftMark Join: outer_table.id = __correlated_sq_1.id
96+ 05)--------TableScan: outer_table projection=[id, value]
97+ 06)--------SubqueryAlias: __correlated_sq_1
98+ 07)----------TableScan: inner_table_with_null projection=[id]
99+ physical_plan
100+ 01)FilterExec: NOT mark@1 IS NULL, projection=[value@0]
101+ 02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
102+ 03)----HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(id@0, id@0)], projection=[value@1, mark@2]
103+ 04)------DataSourceExec: partitions=1, partition_sizes=[1]
104+ 05)------DataSourceExec: partitions=1, partition_sizes=[1]
105+
75106query T rowsort
76107SELECT value
77108FROM outer_table
121152d
122153e
123154
155+ query TT
156+ EXPLAIN
157+ SELECT value
158+ FROM outer_table
159+ WHERE (id NOT IN (SELECT id FROM empty_table)) IS TRUE;
160+ ----
161+ logical_plan
162+ 01)Projection: outer_table.value
163+ 02)--Filter: NOT __correlated_sq_1.mark IS TRUE
164+ 03)----Projection: outer_table.value, __correlated_sq_1.mark
165+ 04)------LeftMark Join: outer_table.id = __correlated_sq_1.id
166+ 05)--------TableScan: outer_table projection=[id, value]
167+ 06)--------SubqueryAlias: __correlated_sq_1
168+ 07)----------TableScan: empty_table projection=[id]
169+ physical_plan
170+ 01)FilterExec: NOT mark@1 IS NOT DISTINCT FROM true, projection=[value@0]
171+ 02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
172+ 03)----HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(id@0, id@0)], projection=[value@1, mark@2]
173+ 04)------DataSourceExec: partitions=1, partition_sizes=[1]
174+ 05)------DataSourceExec: partitions=1, partition_sizes=[0]
175+
124176###################################
125177## Sort-merge join null-aware mark
178+ As of this work, sort-merge join actually don't support null-aware semantics, so they still end up using a hash-join.
126179###################################
127180
128181statement ok
@@ -149,12 +202,20 @@ logical_plan
14920206)--------TableScan: inner_table_with_null projection=[id]
150203physical_plan
15120401)FilterExec: NOT mark@2 IS NULL, projection=[id@0, value@1]
152- 02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
153- 03)----SortMergeJoinExec: join_type=LeftMark, on=[(id@0, id@0)]
154- 04)------SortExec: expr=[id@0 ASC], preserve_partitioning=[false]
155- 05)--------DataSourceExec: partitions=1, partition_sizes=[1]
156- 06)------SortExec: expr=[id@0 ASC], preserve_partitioning=[false]
157- 07)--------DataSourceExec: partitions=1, partition_sizes=[1]
205+ 02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
206+ 03)----HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(id@0, id@0)]
207+ 04)------DataSourceExec: partitions=1, partition_sizes=[1]
208+ 05)------DataSourceExec: partitions=1, partition_sizes=[1]
209+
210+ query IT rowsort
211+ SELECT id, value
212+ FROM outer_table
213+ WHERE (id NOT IN (SELECT id FROM inner_table_with_null)) IS NULL;
214+ ----
215+ 1 a
216+ 3 c
217+ 4 d
218+ NULL e
158219
159220query TT
160221EXPLAIN
@@ -173,12 +234,9 @@ logical_plan
173234physical_plan
17423501)FilterExec: NOT mark@1 IS NULL, projection=[value@0]
17523602)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
176- 03)----ProjectionExec: expr=[value@1 as value, mark@2 as mark]
177- 04)------SortMergeJoinExec: join_type=LeftMark, on=[(id@0, id@0)]
178- 05)--------SortExec: expr=[id@0 ASC], preserve_partitioning=[false]
179- 06)----------DataSourceExec: partitions=1, partition_sizes=[1]
180- 07)--------SortExec: expr=[id@0 ASC], preserve_partitioning=[false]
181- 08)----------DataSourceExec: partitions=1, partition_sizes=[1]
237+ 03)----HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(id@0, id@0)], projection=[value@1, mark@2]
238+ 04)------DataSourceExec: partitions=1, partition_sizes=[1]
239+ 05)------DataSourceExec: partitions=1, partition_sizes=[1]
182240
183241query T rowsort
184242SELECT value
@@ -197,6 +255,27 @@ WHERE (id NOT IN (SELECT id FROM inner_table_no_null)) IS NULL;
197255----
198256e
199257
258+ query TT
259+ EXPLAIN
260+ SELECT value
261+ FROM outer_table
262+ WHERE (id NOT IN (SELECT id FROM inner_table_no_null)) IS NULL;
263+ ----
264+ logical_plan
265+ 01)Projection: outer_table.value
266+ 02)--Filter: NOT __correlated_sq_1.mark IS NULL
267+ 03)----Projection: outer_table.value, __correlated_sq_1.mark
268+ 04)------LeftMark Join: outer_table.id = __correlated_sq_1.id
269+ 05)--------TableScan: outer_table projection=[id, value]
270+ 06)--------SubqueryAlias: __correlated_sq_1
271+ 07)----------TableScan: inner_table_no_null projection=[id]
272+ physical_plan
273+ 01)FilterExec: NOT mark@1 IS NULL, projection=[value@0]
274+ 02)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
275+ 03)----HashJoinExec: mode=CollectLeft, join_type=LeftMark, on=[(id@0, id@0)], projection=[value@1, mark@2]
276+ 04)------DataSourceExec: partitions=1, partition_sizes=[1]
277+ 05)------DataSourceExec: partitions=1, partition_sizes=[1]
278+
200279####################################
201280## Nested loop mark join with NULLs
202281####################################
0 commit comments