diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index 0609109ec6e58..d23c562672046 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -234,11 +234,11 @@ fn has_subquery(expr: &Expr) -> bool { /// /// ```text /// Projection: t1.a, t1.b -/// LeftSemi Join: Filter: t1.a = __correlated_sq_1.a AND t1.b = __correlated_sq_1.b AND t1.c > __correlated_sq_1.c -/// TableScan: t1 +/// RightSemi Join: Filter: t1.a = __correlated_sq_1.a AND t1.b = __correlated_sq_1.b AND t1.c > __correlated_sq_1.c /// SubqueryAlias: __correlated_sq_1 /// Projection: t2.a, t2.b, t2.c /// TableScan: t2 +/// TableScan: t1 /// ``` /// /// Given another query like: @@ -248,11 +248,11 @@ fn has_subquery(expr: &Expr) -> bool { /// /// ```text /// Projection: t1.id -/// LeftSemi Join: Filter: t1.id = __correlated_sq_1.id -/// TableScan: t1 +/// RightSemi Join: Filter: t1.id = __correlated_sq_1.id /// SubqueryAlias: __correlated_sq_1 /// Projection: t2.id /// TableScan: t2 +/// TableScan: t1 /// ``` fn build_join_top( query_info: &SubqueryInfo, @@ -275,7 +275,7 @@ fn build_join_top( let join_type = match query_info.negated { true => JoinType::LeftAnti, - false => JoinType::LeftSemi, + false => JoinType::RightSemi, }; let subquery = query_info.query.subquery.as_ref(); let subquery_alias = alias.next("__correlated_sq"); @@ -486,6 +486,12 @@ fn build_join( true, // null_aware )? .build()? + } else if join_type == JoinType::RightSemi { + // RightSemi outputs rows from the right input; put the outer query on the + // right so the join result still contains the outer rows. + LogicalPlanBuilder::from(sub_query_alias) + .join_on(left.clone(), join_type, Some(join_filter))? + .build()? } else { LogicalPlanBuilder::from(left.clone()) .join_on(sub_query_alias, join_type, Some(join_filter))? @@ -587,15 +593,15 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.b = __correlated_sq_2.c [a:UInt32, b:UInt32, c:UInt32] - LeftSemi Join: Filter: test.c = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_1 [c:UInt32] - Projection: sq_1.c [c:UInt32] - TableScan: sq_1 [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.b = __correlated_sq_2.c [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_2 [c:UInt32] Projection: sq_2.c [c:UInt32] TableScan: sq_2 [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] + SubqueryAlias: __correlated_sq_1 [c:UInt32] + Projection: sq_1.c [c:UInt32] + TableScan: sq_1 [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -620,11 +626,11 @@ mod tests { @r" Projection: test.b [b:UInt32] Filter: test.a = UInt32(1) AND test.b < UInt32(30) [a:UInt32, b:UInt32, c:UInt32] - LeftSemi Join: Filter: test.c = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32] Projection: sq.c [c:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -648,15 +654,15 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.b = __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.b = __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_2 [a:UInt32] Projection: sq.a [a:UInt32] - LeftSemi Join: Filter: sq.a = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] - TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: sq.a = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32] Projection: sq_nested.c [c:UInt32] TableScan: sq_nested [a:UInt32, b:UInt32, c:UInt32] + TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -687,15 +693,15 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] - SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] - Projection: orders.o_custkey [o_custkey:Int64] - TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_2 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] + SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] + Projection: orders.o_custkey [o_custkey:Int64] + TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -735,15 +741,15 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_2 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] - LeftSemi Join: Filter: orders.o_orderkey = __correlated_sq_1.l_orderkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] - TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + RightSemi Join: Filter: orders.o_orderkey = __correlated_sq_1.l_orderkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] SubqueryAlias: __correlated_sq_1 [l_orderkey:Int64] Projection: lineitem.l_orderkey [l_orderkey:Int64] TableScan: lineitem [l_orderkey:Int64, l_partkey:Int64, l_suppkey:Int64, l_linenumber:Int32, l_quantity:Float64, l_extendedprice:Float64] + TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -771,12 +777,12 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] Filter: orders.o_orderkey = Int32(1) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -803,11 +809,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -831,12 +837,12 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] Filter: orders.o_custkey = orders.o_custkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -863,11 +869,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey AND customer.c_custkey != __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey AND customer.c_custkey != __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -894,11 +900,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey AND customer.c_custkey < __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey AND customer.c_custkey < __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -926,11 +932,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey AND (customer.c_custkey = __correlated_sq_1.o_custkey OR __correlated_sq_1.o_orderkey = Int32(1)) [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey AND (customer.c_custkey = __correlated_sq_1.o_custkey OR __correlated_sq_1.o_orderkey = Int32(1)) [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64, o_orderkey:Int64] Projection: orders.o_custkey, orders.o_orderkey [o_custkey:Int64, o_orderkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -980,11 +986,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey + Int32(1) = __correlated_sq_1.o_custkey AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey + Int32(1) = __correlated_sq_1.o_custkey AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1011,11 +1017,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.orders.o_custkey + Int32(1) AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.orders.o_custkey + Int32(1) AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [orders.o_custkey + Int32(1):Int64, o_custkey:Int64] Projection: orders.o_custkey + Int32(1), orders.o_custkey [orders.o_custkey + Int32(1):Int64, o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1075,11 +1081,11 @@ mod tests { @r" Projection: customer.c_custkey [c_custkey:Int64] Filter: customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1103,11 +1109,11 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c = __correlated_sq_1.c AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c = __correlated_sq_1.c AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32, a:UInt32] Projection: sq.c, sq.a [c:UInt32, a:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1125,11 +1131,11 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32] Projection: sq.c [c:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1192,11 +1198,11 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32] Projection: sq.c [c:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1219,11 +1225,11 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32] Projection: sq.c * UInt32(2) [sq.c * UInt32(2):UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1251,12 +1257,12 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32, a:UInt32] Projection: sq.c * UInt32(2), sq.a [sq.c * UInt32(2):UInt32, a:UInt32] Filter: sq.a + UInt32(1) = sq.b [a:UInt32, b:UInt32, c:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1285,12 +1291,12 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) AND test.a + test.b = __correlated_sq_1.a + __correlated_sq_1.b [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) AND test.a + test.b = __correlated_sq_1.a + __correlated_sq_1.b [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32] Projection: sq.c * UInt32(2), sq.a, sq.b [sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32] Filter: sq.a + UInt32(1) = sq.b [a:UInt32, b:UInt32, c:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1326,15 +1332,15 @@ mod tests { @r" Projection: test.b [b:UInt32] Filter: test.c > UInt32(1) [a:UInt32, b:UInt32, c:UInt32] - LeftSemi Join: Filter: test.c * UInt32(2) = __correlated_sq_2.sq2.c * UInt32(2) AND test.a > __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32] - LeftSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq1.c * UInt32(2) AND test.a > __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_1 [sq1.c * UInt32(2):UInt32, a:UInt32] - Projection: sq1.c * UInt32(2), sq1.a [sq1.c * UInt32(2):UInt32, a:UInt32] - TableScan: sq1 [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c * UInt32(2) = __correlated_sq_2.sq2.c * UInt32(2) AND test.a > __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_2 [sq2.c * UInt32(2):UInt32, a:UInt32] Projection: sq2.c * UInt32(2), sq2.a [sq2.c * UInt32(2):UInt32, a:UInt32] TableScan: sq2 [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.c + UInt32(1) = __correlated_sq_1.sq1.c * UInt32(2) AND test.a > __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] + SubqueryAlias: __correlated_sq_1 [sq1.c * UInt32(2):UInt32, a:UInt32] + Projection: sq1.c * UInt32(2), sq1.a [sq1.c * UInt32(2):UInt32, a:UInt32] + TableScan: sq1 [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1358,12 +1364,12 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: test.a = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.a = __correlated_sq_1.c [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32] Projection: test.c [c:UInt32] Filter: test.a > test.b [a:UInt32, b:UInt32, c:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1390,15 +1396,15 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: __correlated_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8] - LeftSemi Join: Filter: __correlated_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] - SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] - Projection: orders.o_custkey [o_custkey:Int64] - TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + RightSemi Join: Filter: __correlated_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_2 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + RightSemi Join: Filter: __correlated_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8] + SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] + Projection: orders.o_custkey [o_custkey:Int64] + TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1437,15 +1443,15 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: __correlated_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: __correlated_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_2 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] - LeftSemi Join: Filter: __correlated_sq_1.l_orderkey = orders.o_orderkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] - TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + RightSemi Join: Filter: __correlated_sq_1.l_orderkey = orders.o_orderkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] SubqueryAlias: __correlated_sq_1 [l_orderkey:Int64] Projection: lineitem.l_orderkey [l_orderkey:Int64] TableScan: lineitem [l_orderkey:Int64, l_partkey:Int64, l_suppkey:Int64, l_linenumber:Int32, l_quantity:Float64, l_extendedprice:Float64] + TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1473,12 +1479,12 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] Filter: orders.o_orderkey = Int32(1) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1502,11 +1508,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = UInt32(1) [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = UInt32(1) [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1530,12 +1536,12 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: Boolean(true) [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: Boolean(true) [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] Filter: orders.o_custkey = orders.o_custkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1562,11 +1568,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey != __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey != __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1593,11 +1599,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey < __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey < __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1625,11 +1631,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey OR __correlated_sq_1.o_orderkey = Int32(1) [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey OR __correlated_sq_1.o_orderkey = Int32(1) [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64, o_orderkey:Int64] Projection: orders.o_custkey, orders.o_orderkey [o_custkey:Int64, o_orderkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1655,10 +1661,10 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1685,11 +1691,11 @@ mod tests { plan, @r" Projection: customer.c_custkey [c_custkey:Int64] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [orders.o_custkey + Int32(1):Int64, o_custkey:Int64] Projection: orders.o_custkey + Int32(1), orders.o_custkey [orders.o_custkey + Int32(1):Int64, o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1716,11 +1722,11 @@ mod tests { @r" Projection: customer.c_custkey [c_custkey:Int64] Filter: customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8] - LeftSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] - TableScan: customer [c_custkey:Int64, c_name:Utf8] + RightSemi Join: Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8] SubqueryAlias: __correlated_sq_1 [o_custkey:Int64] Projection: orders.o_custkey [o_custkey:Int64] TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N] + TableScan: customer [c_custkey:Int64, c_name:Utf8] " ) } @@ -1775,11 +1781,11 @@ mod tests { plan, @r" Projection: test.c [c:UInt32] - LeftSemi Join: Filter: test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32, a:UInt32] Projection: sq.c, sq.a [c:UInt32, a:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1797,11 +1803,11 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32] Projection: sq.c [c:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1857,15 +1863,15 @@ mod tests { @r" Projection: test.b [b:UInt32] Filter: test.c > UInt32(1) [a:UInt32, b:UInt32, c:UInt32] - LeftSemi Join: Filter: test.a = __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32] - LeftSemi Join: Filter: test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] - SubqueryAlias: __correlated_sq_1 [c:UInt32, a:UInt32] - Projection: sq1.c, sq1.a [c:UInt32, a:UInt32] - TableScan: sq1 [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.a = __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_2 [c:UInt32, a:UInt32] Projection: sq2.c, sq2.a [c:UInt32, a:UInt32] TableScan: sq2 [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32] + SubqueryAlias: __correlated_sq_1 [c:UInt32, a:UInt32] + Projection: sq1.c, sq1.a [c:UInt32, a:UInt32] + TableScan: sq1 [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1890,11 +1896,11 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [UInt32(1):UInt32, a:UInt32] Projection: UInt32(1), sq.a [UInt32(1):UInt32, a:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1918,12 +1924,12 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32] Projection: test.c [c:UInt32] Filter: test.a > test.b [a:UInt32, b:UInt32, c:UInt32] TableScan: test [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1949,12 +1955,12 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [c:UInt32, a:UInt32] Distinct: [c:UInt32, a:UInt32] Projection: sq.c, sq.a [c:UInt32, a:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -1980,12 +1986,12 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [sq.b + sq.c:UInt32, a:UInt32] Distinct: [sq.b + sq.c:UInt32, a:UInt32] Projection: sq.b + sq.c, sq.a [sq.b + sq.c:UInt32, a:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -2011,12 +2017,12 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [UInt32(1):UInt32, c:UInt32, a:UInt32] Distinct: [UInt32(1):UInt32, c:UInt32, a:UInt32] Projection: UInt32(1), sq.c, sq.a [UInt32(1):UInt32, c:UInt32, a:UInt32] TableScan: sq [a:UInt32, b:UInt32, c:UInt32] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -2046,11 +2052,11 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [arr:Int32;N] Unnest: lists[sq.arr|depth=1] structs[] [arr:Int32;N] TableScan: sq [arr:List(Int32);N] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -2081,11 +2087,11 @@ mod tests { plan, @r" Projection: test.b [b:UInt32] - LeftSemi Join: Filter: __correlated_sq_1.a = test.b [a:UInt32, b:UInt32, c:UInt32] - TableScan: test [a:UInt32, b:UInt32, c:UInt32] + RightSemi Join: Filter: __correlated_sq_1.a = test.b [a:UInt32, b:UInt32, c:UInt32] SubqueryAlias: __correlated_sq_1 [a:UInt32;N] Unnest: lists[sq.a|depth=1] structs[] [a:UInt32;N] TableScan: sq [a:List(UInt32);N] + TableScan: test [a:UInt32, b:UInt32, c:UInt32] " ) } @@ -2115,11 +2121,11 @@ mod tests { plan, @r" Projection: TEST_A.B [B:UInt32] - LeftSemi Join: Filter: __correlated_sq_1.A = TEST_A.A [A:UInt32, B:UInt32] - TableScan: TEST_A [A:UInt32, B:UInt32] + RightSemi Join: Filter: __correlated_sq_1.A = TEST_A.A [A:UInt32, B:UInt32] SubqueryAlias: __correlated_sq_1 [Int32(1):Int32, A:UInt32] Projection: Int32(1), TEST_B.A [Int32(1):Int32, A:UInt32] TableScan: TEST_B [A:UInt32, B:UInt32] + TableScan: TEST_A [A:UInt32, B:UInt32] " ) } diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index ad7427acf92ac..fa7eb137a6dcb 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -210,13 +210,13 @@ fn semi_join_with_join_filter() -> Result<()> { format!("{plan}"), @r" Projection: test.col_utf8 - LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32 - Filter: test.col_int32 IS NOT NULL - TableScan: test projection=[col_int32, col_uint32, col_utf8] + RightSemi Join: __correlated_sq_1.col_int32 = test.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32 SubqueryAlias: __correlated_sq_1 SubqueryAlias: t2 Filter: test.col_int32 IS NOT NULL TableScan: test projection=[col_int32, col_uint32] + Filter: test.col_int32 IS NOT NULL + TableScan: test projection=[col_int32, col_uint32, col_utf8] " ); Ok(()) @@ -254,14 +254,14 @@ fn where_exists_distinct() -> Result<()> { assert_snapshot!( format!("{plan}"), @r" - LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32 - Filter: test.col_int32 IS NOT NULL - TableScan: test projection=[col_int32] + RightSemi Join: __correlated_sq_1.col_int32 = test.col_int32 SubqueryAlias: __correlated_sq_1 Aggregate: groupBy=[[t2.col_int32]], aggr=[[]] SubqueryAlias: t2 Filter: test.col_int32 IS NOT NULL TableScan: test projection=[col_int32] + Filter: test.col_int32 IS NOT NULL + TableScan: test projection=[col_int32] " ); @@ -518,14 +518,14 @@ fn select_correlated_predicate_subquery_with_uppercase_ident() { assert_snapshot!( format!("{plan}"), @r" - LeftSemi Join: test.col_int32 = __correlated_sq_1.COL_INT32 - Filter: test.col_int32 IS NOT NULL - TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc] + RightSemi Join: __correlated_sq_1.COL_INT32 = test.col_int32 SubqueryAlias: __correlated_sq_1 SubqueryAlias: T1 Projection: test.col_int32 AS COL_INT32 Filter: test.col_int32 IS NOT NULL TableScan: test projection=[col_int32] + Filter: test.col_int32 IS NOT NULL + TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc] " ); } diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt index e9c1c0245d1c8..4449b98c64f58 100644 --- a/datafusion/sqllogictest/test_files/cte.slt +++ b/datafusion/sqllogictest/test_files/cte.slt @@ -1022,13 +1022,13 @@ query TT explain with cte as (select * from person) SELECT * FROM person WHERE EXISTS (SELECT * FROM cte WHERE id = person.id); ---- logical_plan -01)LeftSemi Join: person.id = __correlated_sq_1.id -02)--TableScan: person projection=[id, name] -03)--SubqueryAlias: __correlated_sq_1 -04)----SubqueryAlias: cte -05)------TableScan: person projection=[id] +01)RightSemi Join: __correlated_sq_1.id = person.id +02)--SubqueryAlias: __correlated_sq_1 +03)----SubqueryAlias: cte +04)------TableScan: person projection=[id] +05)--TableScan: person projection=[id, name] physical_plan -01)HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(id@0, id@0)] +01)HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(id@0, id@0)] 02)--DataSourceExec: partitions=1, partition_sizes=[0] 03)--DataSourceExec: partitions=1, partition_sizes=[0] diff --git a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt index 179a611d37e1f..98d62b3f96e81 100644 --- a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt +++ b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt @@ -275,12 +275,12 @@ FROM left_parquet l WHERE l.id IN (SELECT r.id FROM right_parquet r); ---- logical_plan -01)LeftSemi Join: l.id = __correlated_sq_1.id -02)--SubqueryAlias: l -03)----TableScan: left_parquet projection=[id, data] -04)--SubqueryAlias: __correlated_sq_1 -05)----SubqueryAlias: r -06)------TableScan: right_parquet projection=[id] +01)RightSemi Join: __correlated_sq_1.id = l.id +02)--SubqueryAlias: __correlated_sq_1 +03)----SubqueryAlias: r +04)------TableScan: right_parquet projection=[id] +05)--SubqueryAlias: l +06)----TableScan: left_parquet projection=[id, data] physical_plan 01)HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(id@0, id@0)] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id], file_type=parquet @@ -295,12 +295,12 @@ FROM right_parquet r WHERE r.id IN (SELECT l.id FROM left_parquet l); ---- logical_plan -01)LeftSemi Join: r.id = __correlated_sq_1.id -02)--SubqueryAlias: r -03)----TableScan: right_parquet projection=[id, info] -04)--SubqueryAlias: __correlated_sq_1 -05)----SubqueryAlias: l -06)------TableScan: left_parquet projection=[id] +01)RightSemi Join: __correlated_sq_1.id = r.id +02)--SubqueryAlias: __correlated_sq_1 +03)----SubqueryAlias: l +04)------TableScan: left_parquet projection=[id] +05)--SubqueryAlias: r +06)----TableScan: right_parquet projection=[id, info] physical_plan 01)HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(id@0, id@0)] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet @@ -327,12 +327,12 @@ ORDER BY l.data DESC; ---- logical_plan 01)Sort: l.data DESC NULLS FIRST -02)--LeftSemi Join: l.id = __correlated_sq_1.id -03)----SubqueryAlias: l -04)------TableScan: left_parquet projection=[id, data] -05)----SubqueryAlias: __correlated_sq_1 -06)------SubqueryAlias: r -07)--------TableScan: right_parquet projection=[id] +02)--RightSemi Join: __correlated_sq_1.id = l.id +03)----SubqueryAlias: __correlated_sq_1 +04)------SubqueryAlias: r +05)--------TableScan: right_parquet projection=[id] +06)----SubqueryAlias: l +07)------TableScan: left_parquet projection=[id, data] physical_plan 01)HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(id@0, id@0)] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id], file_type=parquet @@ -351,12 +351,12 @@ ORDER BY l.data DESC; ---- logical_plan 01)Sort: l.data DESC NULLS FIRST -02)--LeftSemi Join: l.id = __correlated_sq_1.id -03)----SubqueryAlias: l -04)------TableScan: left_parquet projection=[id, data] -05)----SubqueryAlias: __correlated_sq_1 -06)------SubqueryAlias: r -07)--------TableScan: right_parquet projection=[id] +02)--RightSemi Join: __correlated_sq_1.id = l.id +03)----SubqueryAlias: __correlated_sq_1 +04)------SubqueryAlias: r +05)--------TableScan: right_parquet projection=[id] +06)----SubqueryAlias: l +07)------TableScan: left_parquet projection=[id, data] physical_plan 01)HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(id@0, id@0)] 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id], file_type=parquet @@ -435,12 +435,12 @@ ORDER BY l.id LIMIT 2; ---- logical_plan 01)Sort: l.id ASC NULLS LAST, fetch=2 -02)--LeftSemi Join: l.id = __correlated_sq_1.id -03)----SubqueryAlias: l -04)------TableScan: left_parquet projection=[id, data] -05)----SubqueryAlias: __correlated_sq_1 -06)------SubqueryAlias: r -07)--------TableScan: right_parquet projection=[id] +02)--RightSemi Join: __correlated_sq_1.id = l.id +03)----SubqueryAlias: __correlated_sq_1 +04)------SubqueryAlias: r +05)--------TableScan: right_parquet projection=[id] +06)----SubqueryAlias: l +07)------TableScan: left_parquet projection=[id, data] physical_plan 01)SortExec: TopK(fetch=2), expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] 02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(id@0, id@0)] diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index 2e8a65385541e..6fb37b84b0f91 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -439,14 +439,14 @@ query TT explain select a from t1 where exists (select count(*) from t2); ---- logical_plan -01)LeftSemi Join: -02)--TableScan: t1 projection=[a] -03)--SubqueryAlias: __correlated_sq_1 -04)----EmptyRelation: rows=1 +01)RightSemi Join: +02)--SubqueryAlias: __correlated_sq_1 +03)----EmptyRelation: rows=1 +04)--TableScan: t1 projection=[a] physical_plan -01)NestedLoopJoinExec: join_type=LeftSemi -02)--DataSourceExec: partitions=1, partition_sizes=[0] -03)--PlaceholderRowExec +01)NestedLoopJoinExec: join_type=RightSemi +02)--PlaceholderRowExec +03)--DataSourceExec: partitions=1, partition_sizes=[0] statement ok drop table t1; diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 46d01f39a920b..5743769b6a094 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -1113,14 +1113,23 @@ physical_plan 01)┌───────────────────────────┐ 02)│ NestedLoopJoinExec │ 03)│ -------------------- ├──────────────┐ -04)│ join_type: LeftSemi │ │ +04)│ join_type: RightSemi │ │ 05)└─────────────┬─────────────┘ │ 06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -07)│ DataSourceExec ││ PlaceholderRowExec │ -08)│ -------------------- ││ │ -09)│ files: 1 ││ │ -10)│ format: csv ││ │ -11)└───────────────────────────┘└───────────────────────────┘ +07)│ PlaceholderRowExec ││ RepartitionExec │ +08)│ ││ -------------------- │ +09)│ ││ partition_count(in->out): │ +10)│ ││ 1 -> 4 │ +11)│ ││ │ +12)│ ││ partitioning_scheme: │ +13)│ ││ RoundRobinBatch(4) │ +14)└───────────────────────────┘└─────────────┬─────────────┘ +15)-----------------------------┌─────────────┴─────────────┐ +16)-----------------------------│ DataSourceExec │ +17)-----------------------------│ -------------------- │ +18)-----------------------------│ files: 1 │ +19)-----------------------------│ format: csv │ +20)-----------------------------└───────────────────────────┘ # Query with cross join. query TT diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index b313424951532..1245c0d8a7d98 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -5137,10 +5137,10 @@ GROUP BY logical_plan 01)Projection: date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }"),keywords_stream.ts,Utf8("2000-01-01")) AS ts_chunk, count(keywords_stream.keyword) AS alert_keyword_count 02)--Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }"), keywords_stream.ts, TimestampNanosecond(946684800000000000, None)) AS date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }"),keywords_stream.ts,Utf8("2000-01-01"))]], aggr=[[count(keywords_stream.keyword)]] -03)----LeftSemi Join: keywords_stream.keyword = __correlated_sq_1.keyword -04)------TableScan: keywords_stream projection=[ts, keyword] -05)------SubqueryAlias: __correlated_sq_1 -06)--------TableScan: alert_keywords projection=[keyword] +03)----RightSemi Join: __correlated_sq_1.keyword = keywords_stream.keyword +04)------SubqueryAlias: __correlated_sq_1 +05)--------TableScan: alert_keywords projection=[keyword] +06)------TableScan: keywords_stream projection=[ts, keyword] physical_plan 01)ProjectionExec: expr=[date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }"),keywords_stream.ts,Utf8("2000-01-01"))@0 as ts_chunk, count(keywords_stream.keyword)@1 as alert_keyword_count] 02)--AggregateExec: mode=Single, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }, ts@0, 946684800000000000) as date_bin(IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 }"),keywords_stream.ts,Utf8("2000-01-01"))], aggr=[count(keywords_stream.keyword)] @@ -5634,7 +5634,7 @@ physical_plan statement ok reset datafusion.execution.batch_size; -# The SLT runner sets `target_partitions` to 4 instead of using the default, so +# The SLT runner sets `target_partitions` to 4 instead of using the default, so # reset it explicitly. statement ok set datafusion.execution.target_partitions = 4; diff --git a/datafusion/sqllogictest/test_files/join_limit_pushdown.slt b/datafusion/sqllogictest/test_files/join_limit_pushdown.slt index ea9c971065411..753e94c27fa0d 100644 --- a/datafusion/sqllogictest/test_files/join_limit_pushdown.slt +++ b/datafusion/sqllogictest/test_files/join_limit_pushdown.slt @@ -134,12 +134,12 @@ EXPLAIN SELECT t2.x FROM t2 WHERE EXISTS (SELECT 1 FROM t1 WHERE t1.a = t2.x) LI ---- logical_plan 01)Limit: skip=0, fetch=2 -02)--LeftSemi Join: t2.x = __correlated_sq_1.a -03)----TableScan: t2 projection=[x] -04)----SubqueryAlias: __correlated_sq_1 -05)------TableScan: t1 projection=[a] +02)--RightSemi Join: __correlated_sq_1.a = t2.x +03)----SubqueryAlias: __correlated_sq_1 +04)------TableScan: t1 projection=[a] +05)----TableScan: t2 projection=[x] physical_plan -01)HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(x@0, a@0)], fetch=2 +01)HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(a@0, x@0)], fetch=2 02)--DataSourceExec: partitions=1, partition_sizes=[1] 03)--DataSourceExec: partitions=1, partition_sizes=[1] @@ -259,7 +259,7 @@ SELECT t1.a, t2.x FROM t1 INNER JOIN t2 ON t1.a = t2.x LIMIT 100; # Config reset -# The SLT runner sets `target_partitions` to 4 instead of using the default, so +# The SLT runner sets `target_partitions` to 4 instead of using the default, so # reset it explicitly. statement ok set datafusion.execution.target_partitions = 4; diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index e0be63fe71525..b3bb7985fbe27 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -1838,11 +1838,11 @@ from join_t1 where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2) ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) -02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -03)--SubqueryAlias: __correlated_sq_1 -04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) -05)------TableScan: join_t2 projection=[t2_id] +01)RightSemi Join: __correlated_sq_1.join_t2.t2_id + Int64(1) = CAST(join_t1.t1_id AS Int64) + Int64(12) +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) +04)------TableScan: join_t2 projection=[t2_id] +05)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] query ITI rowsort select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int @@ -1867,12 +1867,12 @@ where join_t1.t1_id + 12 in ) ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int -02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -03)--SubqueryAlias: __correlated_sq_1 -04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int -05)------Filter: join_t2.t2_int > UInt32(0) -06)--------TableScan: join_t2 projection=[t2_id, t2_int] +01)RightSemi Join: __correlated_sq_1.join_t2.t2_id + Int64(1) = CAST(join_t1.t1_id AS Int64) + Int64(12) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int +04)------Filter: join_t2.t2_int > UInt32(0) +05)--------TableScan: join_t2 projection=[t2_id, t2_int] +06)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] query ITI rowsort select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int @@ -1903,12 +1903,12 @@ where join_t1.t1_id + 12 in ) ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name -02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -03)--SubqueryAlias: __correlated_sq_1 -04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name -05)------Filter: join_t2.t2_int > UInt32(0) -06)--------TableScan: join_t2 projection=[t2_id, t2_name, t2_int] +01)RightSemi Join: __correlated_sq_1.join_t2.t2_id + Int64(1) = CAST(join_t1.t1_id AS Int64) + Int64(12) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name +04)------Filter: join_t2.t2_int > UInt32(0) +05)--------TableScan: join_t2 projection=[t2_id, t2_name, t2_int] +06)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] query ITI rowsort select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int @@ -1935,12 +1935,12 @@ where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2 where join_t1.t1_int > 0) ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) -02)--Filter: join_t1.t1_int > UInt32(0) -03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -04)--SubqueryAlias: __correlated_sq_1 -05)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) -06)------TableScan: join_t2 projection=[t2_id] +01)RightSemi Join: __correlated_sq_1.join_t2.t2_id + Int64(1) = CAST(join_t1.t1_id AS Int64) + Int64(12) +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) +04)------TableScan: join_t2 projection=[t2_id] +05)--Filter: join_t1.t1_int > UInt32(0) +06)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] # Not in subquery to join with correlated outer filter @@ -1974,12 +1974,12 @@ where join_t1.t1_id + 12 in and join_t1.t1_id > 0 ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name -02)--Filter: join_t1.t1_id > UInt32(0) -03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -04)--SubqueryAlias: __correlated_sq_1 -05)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name -06)------TableScan: join_t2 projection=[t2_id, t2_name, t2_int] +01)RightSemi Join: __correlated_sq_1.join_t2.t2_id + Int64(1) = CAST(join_t1.t1_id AS Int64) + Int64(12) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name +04)------TableScan: join_t2 projection=[t2_id, t2_name, t2_int] +05)--Filter: join_t1.t1_id > UInt32(0) +06)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] query ITI rowsort select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int @@ -2007,16 +2007,16 @@ where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2) and join_t1.t1_id > 0 ---- logical_plan -01)LeftSemi Join: CAST(join_t1.t1_int AS Int64) = __correlated_sq_2.join_t2.t2_int + Int64(1) -02)--LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) -03)----Filter: join_t1.t1_id > UInt32(0) -04)------TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -05)----SubqueryAlias: __correlated_sq_1 -06)------Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) -07)--------TableScan: join_t2 projection=[t2_id] -08)--SubqueryAlias: __correlated_sq_2 -09)----Projection: CAST(join_t2.t2_int AS Int64) + Int64(1) -10)------TableScan: join_t2 projection=[t2_int] +01)RightSemi Join: __correlated_sq_2.join_t2.t2_int + Int64(1) = CAST(join_t1.t1_int AS Int64) +02)--SubqueryAlias: __correlated_sq_2 +03)----Projection: CAST(join_t2.t2_int AS Int64) + Int64(1) +04)------TableScan: join_t2 projection=[t2_int] +05)--RightSemi Join: __correlated_sq_1.join_t2.t2_id + Int64(1) = CAST(join_t1.t1_id AS Int64) + Int64(12) +06)----SubqueryAlias: __correlated_sq_1 +07)------Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) +08)--------TableScan: join_t2 projection=[t2_id] +09)----Filter: join_t1.t1_id > UInt32(0) +10)------TableScan: join_t1 projection=[t1_id, t1_name, t1_int] query ITI select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int @@ -2170,10 +2170,10 @@ WHERE EXISTS ( ) ---- logical_plan -01)LeftSemi Join: Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) -02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -03)--SubqueryAlias: __correlated_sq_1 -04)----TableScan: join_t2 projection=[t2_id] +01)RightSemi Join: Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) +02)--SubqueryAlias: __correlated_sq_1 +03)----TableScan: join_t2 projection=[t2_id] +04)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] statement ok set datafusion.optimizer.repartition_joins = false; @@ -2222,12 +2222,12 @@ WHERE EXISTS ( ) ---- logical_plan -01)LeftSemi Join: Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) -02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -03)--SubqueryAlias: __correlated_sq_1 -04)----Projection: join_t2.t2_id -05)------Filter: join_t2.t2_int < UInt32(3) -06)--------TableScan: join_t2 projection=[t2_id, t2_int] +01)RightSemi Join: Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: join_t2.t2_id +04)------Filter: join_t2.t2_int < UInt32(3) +05)--------TableScan: join_t2 projection=[t2_id, t2_int] +06)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int] statement ok set datafusion.optimizer.repartition_joins = false; @@ -2270,11 +2270,11 @@ WHERE EXISTS ( ) ---- logical_plan -01)LeftSemi Join: Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) -02)--Filter: join_t1.t1_int < UInt32(3) -03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] -04)--SubqueryAlias: __correlated_sq_1 -05)----TableScan: join_t2 projection=[t2_id] +01)RightSemi Join: Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) +02)--SubqueryAlias: __correlated_sq_1 +03)----TableScan: join_t2 projection=[t2_id] +04)--Filter: join_t1.t1_int < UInt32(3) +05)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int] statement ok set datafusion.optimizer.repartition_joins = false; @@ -3049,7 +3049,7 @@ explain SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t1 t1 WHER ---- physical_plan 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST] -02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@1 != t1_name@0 +02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@0 != t1_name@1 03)----DataSourceExec: partitions=1, partition_sizes=[1] 04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true] 05)------FilterExec: DynamicFilter [ empty ] @@ -3121,7 +3121,7 @@ explain SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t1 t1 WHER ---- physical_plan 01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST] -02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@1 != t1_name@0 +02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@0 != t1_name@1 03)----DataSourceExec: partitions=1, partition_sizes=[1] 04)----SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true] 05)------FilterExec: DynamicFilter [ empty ] @@ -4439,12 +4439,12 @@ explain select * from test where a in (select a from test where b > 3) order by ---- logical_plan 01)Sort: test.c DESC NULLS FIRST -02)--LeftSemi Join: test.a = __correlated_sq_1.a -03)----TableScan: test projection=[a, b, c] -04)----SubqueryAlias: __correlated_sq_1 -05)------Projection: test.a -06)--------Filter: test.b > Int32(3) -07)----------TableScan: test projection=[a, b] +02)--RightSemi Join: __correlated_sq_1.a = test.a +03)----SubqueryAlias: __correlated_sq_1 +04)------Projection: test.a +05)--------Filter: test.b > Int32(3) +06)----------TableScan: test projection=[a, b] +07)----TableScan: test projection=[a, b, c] physical_plan 01)SortPreservingMergeExec: [c@2 DESC] 02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(a@0, a@0)] @@ -4460,12 +4460,12 @@ explain select * from test where a in (select a from test where b > 3) order by ---- logical_plan 01)Sort: test.c DESC NULLS LAST -02)--LeftSemi Join: test.a = __correlated_sq_1.a -03)----TableScan: test projection=[a, b, c] -04)----SubqueryAlias: __correlated_sq_1 -05)------Projection: test.a -06)--------Filter: test.b > Int32(3) -07)----------TableScan: test projection=[a, b] +02)--RightSemi Join: __correlated_sq_1.a = test.a +03)----SubqueryAlias: __correlated_sq_1 +04)------Projection: test.a +05)--------Filter: test.b > Int32(3) +06)----------TableScan: test projection=[a, b] +07)----TableScan: test projection=[a, b, c] physical_plan 01)SortPreservingMergeExec: [c@2 DESC NULLS LAST] 02)--HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(a@0, a@0)] diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index 368d252e25006..4d7cdbe77b698 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -360,11 +360,11 @@ where o_orderstatus in ( ---- logical_plan 01)Projection: orders.o_orderkey -02)--LeftSemi Join: orders.o_orderstatus = __correlated_sq_1.l_linestatus, orders.o_orderkey = __correlated_sq_1.l_orderkey -03)----TableScan: orders projection=[o_orderkey, o_orderstatus] -04)----SubqueryAlias: __correlated_sq_1 -05)------Projection: lineitem.l_linestatus, lineitem.l_orderkey -06)--------TableScan: lineitem projection=[l_orderkey, l_linestatus] +02)--RightSemi Join: __correlated_sq_1.l_linestatus = orders.o_orderstatus, __correlated_sq_1.l_orderkey = orders.o_orderkey +03)----SubqueryAlias: __correlated_sq_1 +04)------Projection: lineitem.l_linestatus, lineitem.l_orderkey +05)--------TableScan: lineitem projection=[l_orderkey, l_linestatus] +06)----TableScan: orders projection=[o_orderkey, o_orderstatus] query I rowsort select o_orderkey from orders @@ -387,12 +387,12 @@ query TT explain SELECT t1_id, t1_name, t1_int FROM t1 WHERE EXISTS(SELECT t1_int FROM t1 WHERE t1.t1_id > t1.t1_int) ---- logical_plan -01)LeftSemi Join: -02)--TableScan: t1 projection=[t1_id, t1_name, t1_int] -03)--SubqueryAlias: __correlated_sq_1 -04)----Projection: -05)------Filter: t1.t1_int < t1.t1_id -06)--------TableScan: t1 projection=[t1_id, t1_int] +01)RightSemi Join: +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: +04)------Filter: t1.t1_int < t1.t1_id +05)--------TableScan: t1 projection=[t1_id, t1_int] +06)--TableScan: t1 projection=[t1_id, t1_name, t1_int] #in_subquery_with_same_table @@ -402,26 +402,26 @@ query TT explain SELECT t1_id, t1_name, t1_int FROM t1 WHERE t1_id IN(SELECT t1_int FROM t1 WHERE t1.t1_id > t1.t1_int) ---- logical_plan -01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t1_int -02)--TableScan: t1 projection=[t1_id, t1_name, t1_int] -03)--SubqueryAlias: __correlated_sq_1 -04)----Projection: t1.t1_int -05)------Filter: t1.t1_int < t1.t1_id -06)--------TableScan: t1 projection=[t1_id, t1_int] +01)RightSemi Join: __correlated_sq_1.t1_int = t1.t1_id +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: t1.t1_int +04)------Filter: t1.t1_int < t1.t1_id +05)--------TableScan: t1 projection=[t1_id, t1_int] +06)--TableScan: t1 projection=[t1_id, t1_name, t1_int] #in_subquery_nested_exist_subquery query TT explain SELECT t1_id, t1_name, t1_int FROM t1 WHERE t1_id IN(SELECT t2_id FROM t2 WHERE EXISTS(select * from t1 WHERE t1.t1_int > t2.t2_int)) ---- logical_plan -01)LeftSemi Join: t1.t1_id = __correlated_sq_2.t2_id -02)--TableScan: t1 projection=[t1_id, t1_name, t1_int] -03)--SubqueryAlias: __correlated_sq_2 -04)----Projection: t2.t2_id -05)------LeftSemi Join: Filter: __correlated_sq_1.t1_int > t2.t2_int -06)--------TableScan: t2 projection=[t2_id, t2_int] -07)--------SubqueryAlias: __correlated_sq_1 -08)----------TableScan: t1 projection=[t1_int] +01)RightSemi Join: __correlated_sq_2.t2_id = t1.t1_id +02)--SubqueryAlias: __correlated_sq_2 +03)----Projection: t2.t2_id +04)------RightSemi Join: Filter: __correlated_sq_1.t1_int > t2.t2_int +05)--------SubqueryAlias: __correlated_sq_1 +06)----------TableScan: t1 projection=[t1_int] +07)--------TableScan: t2 projection=[t2_id, t2_int] +08)--TableScan: t1 projection=[t1_id, t1_name, t1_int] #invalid_scalar_subquery statement error DataFusion error: Error during planning: Too many columns! The subquery should only return one column: t2.t2_id, t2.t2_name @@ -551,42 +551,42 @@ query TT explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (SELECT 1 FROM t1 INNER JOIN t2 ON(t1.t1_id = t2.t2_id and t1.t1_name = t0.t0_name)) ---- logical_plan -01)LeftSemi Join: t0.t0_name = __correlated_sq_2.t1_name -02)--TableScan: t0 projection=[t0_id, t0_name] -03)--SubqueryAlias: __correlated_sq_2 -04)----Projection: t1.t1_name -05)------Inner Join: t1.t1_id = t2.t2_id -06)--------TableScan: t1 projection=[t1_id, t1_name] -07)--------TableScan: t2 projection=[t2_id] +01)RightSemi Join: __correlated_sq_2.t1_name = t0.t0_name +02)--SubqueryAlias: __correlated_sq_2 +03)----Projection: t1.t1_name +04)------Inner Join: t1.t1_id = t2.t2_id +05)--------TableScan: t1 projection=[t1_id, t1_name] +06)--------TableScan: t2 projection=[t2_id] +07)--TableScan: t0 projection=[t0_id, t0_name] #subquery_contains_join_contains_correlated_columns query TT explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (SELECT 1 FROM t1 INNER JOIN (select * from t2 where t2.t2_name = t0.t0_name) as t2 ON(t1.t1_id = t2.t2_id )) ---- logical_plan -01)LeftSemi Join: t0.t0_name = __correlated_sq_1.t2_name -02)--TableScan: t0 projection=[t0_id, t0_name] -03)--SubqueryAlias: __correlated_sq_1 -04)----Projection: t2.t2_name -05)------Inner Join: t1.t1_id = t2.t2_id -06)--------TableScan: t1 projection=[t1_id] -07)--------SubqueryAlias: t2 -08)----------TableScan: t2 projection=[t2_id, t2_name] +01)RightSemi Join: __correlated_sq_1.t2_name = t0.t0_name +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: t2.t2_name +04)------Inner Join: t1.t1_id = t2.t2_id +05)--------TableScan: t1 projection=[t1_id] +06)--------SubqueryAlias: t2 +07)----------TableScan: t2 projection=[t2_id, t2_name] +08)--TableScan: t0 projection=[t0_id, t0_name] #subquery_contains_join_contains_sub_query_alias_correlated_columns query TT explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (select 1 from (SELECT * FROM t1 where t1.t1_id = t0.t0_id) as x INNER JOIN (select * from t2 where t2.t2_name = t0.t0_name) as y ON(x.t1_id = y.t2_id)) ---- logical_plan -01)LeftSemi Join: t0.t0_id = __correlated_sq_1.t1_id, t0.t0_name = __correlated_sq_1.t2_name -02)--TableScan: t0 projection=[t0_id, t0_name] -03)--SubqueryAlias: __correlated_sq_1 -04)----Projection: x.t1_id, y.t2_name -05)------Inner Join: x.t1_id = y.t2_id -06)--------SubqueryAlias: x -07)----------TableScan: t1 projection=[t1_id] -08)--------SubqueryAlias: y -09)----------TableScan: t2 projection=[t2_id, t2_name] +01)RightSemi Join: __correlated_sq_1.t1_id = t0.t0_id, __correlated_sq_1.t2_name = t0.t0_name +02)--SubqueryAlias: __correlated_sq_1 +03)----Projection: x.t1_id, y.t2_name +04)------Inner Join: x.t1_id = y.t2_id +05)--------SubqueryAlias: x +06)----------TableScan: t1 projection=[t1_id] +07)--------SubqueryAlias: y +08)----------TableScan: t2 projection=[t2_id, t2_name] +09)--TableScan: t0 projection=[t0_id, t0_name] #support_order_by_correlated_columns query TT @@ -606,10 +606,10 @@ query TT explain SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT NULL) ---- logical_plan -01)LeftSemi Join: -02)--TableScan: t1 projection=[t1_id, t1_name] -03)--SubqueryAlias: __correlated_sq_1 -04)----EmptyRelation: rows=1 +01)RightSemi Join: +02)--SubqueryAlias: __correlated_sq_1 +03)----EmptyRelation: rows=1 +04)--TableScan: t1 projection=[t1_id, t1_name] #exists_subquery_with_limit #de-correlated, limit is removed @@ -617,10 +617,10 @@ query TT explain SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 1) ---- logical_plan -01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id -02)--TableScan: t1 projection=[t1_id, t1_name] -03)--SubqueryAlias: __correlated_sq_1 -04)----TableScan: t2 projection=[t2_id] +01)RightSemi Join: __correlated_sq_1.t2_id = t1.t1_id +02)--SubqueryAlias: __correlated_sq_1 +03)----TableScan: t2 projection=[t2_id] +04)--TableScan: t1 projection=[t1_id, t1_name] query IT rowsort SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 1) @@ -676,11 +676,11 @@ query TT explain SELECT t1_id, t1_name FROM t1 WHERE t1_id in (SELECT t2_id FROM t2 limit 10) ---- logical_plan -01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id -02)--TableScan: t1 projection=[t1_id, t1_name] -03)--SubqueryAlias: __correlated_sq_1 -04)----Limit: skip=0, fetch=10 -05)------TableScan: t2 projection=[t2_id], fetch=10 +01)RightSemi Join: __correlated_sq_1.t2_id = t1.t1_id +02)--SubqueryAlias: __correlated_sq_1 +03)----Limit: skip=0, fetch=10 +04)------TableScan: t2 projection=[t2_id], fetch=10 +05)--TableScan: t1 projection=[t1_id, t1_name] #uncorrelated_scalar_subquery_with_limit0 @@ -1208,10 +1208,10 @@ logical_plan 01)Projection: t1.t1_id, t1.t1_name, t1.t1_int 02)--Filter: t1.t1_id > Int32(40) OR __correlated_sq_2.mark 03)----LeftMark Join: t1.t1_id = __correlated_sq_2.t2_id Filter: t1.t1_int > Int32(0) -04)------LeftSemi Join: t1.t1_id = __correlated_sq_1.t3_id -05)--------TableScan: t1 projection=[t1_id, t1_name, t1_int] -06)--------SubqueryAlias: __correlated_sq_1 -07)----------TableScan: t3 projection=[t3_id] +04)------RightSemi Join: __correlated_sq_1.t3_id = t1.t1_id +05)--------SubqueryAlias: __correlated_sq_1 +06)----------TableScan: t3 projection=[t3_id] +07)--------TableScan: t1 projection=[t1_id, t1_name, t1_int] 08)------SubqueryAlias: __correlated_sq_2 09)--------TableScan: t2 projection=[t2_id] @@ -1436,10 +1436,10 @@ query TT explain SELECT a FROM t1 WHERE EXISTS (SELECT count(*) FROM t2) ---- logical_plan -01)LeftSemi Join: -02)--TableScan: t1 projection=[a] -03)--SubqueryAlias: __correlated_sq_1 -04)----EmptyRelation: rows=1 +01)RightSemi Join: +02)--SubqueryAlias: __correlated_sq_1 +03)----EmptyRelation: rows=1 +04)--TableScan: t1 projection=[a] statement count 0 drop table t1; @@ -1458,11 +1458,11 @@ explain SELECT id FROM person p WHERE EXISTS ---- logical_plan 01)Projection: p.id -02)--LeftSemi Join: p.last_name = __correlated_sq_1.last_name, p.state = __correlated_sq_1.state -03)----SubqueryAlias: p -04)------TableScan: person projection=[id, last_name, state] -05)----SubqueryAlias: __correlated_sq_1 -06)------TableScan: person projection=[last_name, state] +02)--RightSemi Join: __correlated_sq_1.last_name = p.last_name, __correlated_sq_1.state = p.state +03)----SubqueryAlias: __correlated_sq_1 +04)------TableScan: person projection=[last_name, state] +05)----SubqueryAlias: p +06)------TableScan: person projection=[id, last_name, state] statement count 0 drop table person; @@ -1547,10 +1547,10 @@ logical_plan 06)--------Projection: sum(orders.o_totalprice), orders.o_custkey 07)----------Aggregate: groupBy=[[orders.o_custkey]], aggr=[[sum(orders.o_totalprice)]] 08)------------Projection: orders.o_custkey, orders.o_totalprice -09)--------------LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey Filter: __correlated_sq_1.l_extendedprice < customer.c_acctbal -10)----------------TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice] -11)----------------SubqueryAlias: __correlated_sq_1 -12)------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice] +09)--------------RightSemi Join: __correlated_sq_1.l_orderkey = orders.o_orderkey Filter: __correlated_sq_1.l_extendedprice < customer.c_acctbal +10)----------------SubqueryAlias: __correlated_sq_1 +11)------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice] +12)----------------TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice] # correlated_recursive_scalar_subquery_with_level_3_in_subquery_referencing_level1_relation query TT @@ -1573,11 +1573,11 @@ logical_plan 06)--------Projection: sum(orders.o_totalprice), orders.o_custkey 07)----------Aggregate: groupBy=[[orders.o_custkey]], aggr=[[sum(orders.o_totalprice)]] 08)------------Projection: orders.o_custkey, orders.o_totalprice -09)--------------LeftSemi Join: orders.o_totalprice = __correlated_sq_1.price, orders.o_orderkey = __correlated_sq_1.l_orderkey Filter: __correlated_sq_1.l_extendedprice < customer.c_acctbal -10)----------------TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice] -11)----------------SubqueryAlias: __correlated_sq_1 -12)------------------Projection: lineitem.l_extendedprice AS price, lineitem.l_extendedprice, lineitem.l_orderkey -13)--------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice] +09)--------------RightSemi Join: __correlated_sq_1.price = orders.o_totalprice, __correlated_sq_1.l_orderkey = orders.o_orderkey Filter: __correlated_sq_1.l_extendedprice < customer.c_acctbal +10)----------------SubqueryAlias: __correlated_sq_1 +11)------------------Projection: lineitem.l_extendedprice AS price, lineitem.l_extendedprice, lineitem.l_orderkey +12)--------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice] +13)----------------TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice] # Setup tables for recursive correlation tests statement ok @@ -1644,17 +1644,17 @@ WHERE EXISTS ( ); ---- logical_plan -01)LeftSemi Join: -02)--SubqueryAlias: e1 -03)----TableScan: employees projection=[employee_id] -04)--SubqueryAlias: __correlated_sq_2 -05)----Projection: -06)------LeftSemi Join: e2.dept_id = __correlated_sq_1.project_id -07)--------SubqueryAlias: e2 -08)----------TableScan: employees projection=[dept_id] -09)--------SubqueryAlias: __correlated_sq_1 -10)----------SubqueryAlias: p -11)------------TableScan: project_assignments projection=[project_id] +01)RightSemi Join: +02)--SubqueryAlias: __correlated_sq_2 +03)----Projection: +04)------RightSemi Join: __correlated_sq_1.project_id = e2.dept_id +05)--------SubqueryAlias: __correlated_sq_1 +06)----------SubqueryAlias: p +07)------------TableScan: project_assignments projection=[project_id] +08)--------SubqueryAlias: e2 +09)----------TableScan: employees projection=[dept_id] +10)--SubqueryAlias: e1 +11)----TableScan: employees projection=[employee_id] # Config reset statement ok diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part index 617051d602bd6..af6a3876d3d6a 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part @@ -53,35 +53,35 @@ order by logical_plan 01)Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST 02)--Aggregate: groupBy=[[customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]], aggr=[[sum(lineitem.l_quantity)]] -03)----LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey -04)------Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate, lineitem.l_quantity -05)--------Inner Join: orders.o_orderkey = lineitem.l_orderkey -06)----------Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate -07)------------Inner Join: customer.c_custkey = orders.o_custkey -08)--------------TableScan: customer projection=[c_custkey, c_name] -09)--------------TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] -10)----------TableScan: lineitem projection=[l_orderkey, l_quantity] -11)------SubqueryAlias: __correlated_sq_1 -12)--------Projection: lineitem.l_orderkey -13)----------Filter: sum(lineitem.l_quantity) > Decimal128(Some(30000),25,2) -14)------------Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_quantity)]] -15)--------------TableScan: lineitem projection=[l_orderkey, l_quantity] +03)----RightSemi Join: __correlated_sq_1.l_orderkey = orders.o_orderkey +04)------SubqueryAlias: __correlated_sq_1 +05)--------Projection: lineitem.l_orderkey +06)----------Filter: sum(lineitem.l_quantity) > Decimal128(Some(30000),25,2) +07)------------Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_quantity)]] +08)--------------TableScan: lineitem projection=[l_orderkey, l_quantity] +09)------Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate, lineitem.l_quantity +10)--------Inner Join: orders.o_orderkey = lineitem.l_orderkey +11)----------Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate +12)------------Inner Join: customer.c_custkey = orders.o_custkey +13)--------------TableScan: customer projection=[c_custkey, c_name] +14)--------------TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] +15)----------TableScan: lineitem projection=[l_orderkey, l_quantity] physical_plan 01)SortPreservingMergeExec: [o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST] 02)--SortExec: expr=[o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] 03)----AggregateExec: mode=SinglePartitioned, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] -04)------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@2, l_orderkey@0)] -05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] -06)----------RepartitionExec: partitioning=Hash([o_orderkey@2], 4), input_partitions=4 -07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] -08)--------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1 -09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name], file_type=csv, has_header=false -10)--------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 -11)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate], file_type=csv, has_header=false -12)----------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 -13)------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false -14)--------FilterExec: sum(lineitem.l_quantity)@1 > Some(30000),25,2, projection=[l_orderkey@0] -15)----------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] -16)------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 -17)--------------AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] -18)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false +04)------HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] +05)--------FilterExec: sum(lineitem.l_quantity)@1 > Some(30000),25,2, projection=[l_orderkey@0] +06)----------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] +07)------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 +08)--------------AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] +09)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false +10)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] +11)----------RepartitionExec: partitioning=Hash([o_orderkey@2], 4), input_partitions=4 +12)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] +13)--------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1 +14)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name], file_type=csv, has_header=false +15)--------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 +16)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate], file_type=csv, has_header=false +17)----------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 +18)------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part index 426a1cbaa4e22..59ff2dad18d44 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part @@ -58,53 +58,53 @@ order by logical_plan 01)Sort: supplier.s_name ASC NULLS LAST 02)--Projection: supplier.s_name, supplier.s_address -03)----LeftSemi Join: supplier.s_suppkey = __correlated_sq_2.ps_suppkey -04)------Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address -05)--------Inner Join: supplier.s_nationkey = nation.n_nationkey -06)----------TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey] -07)----------Projection: nation.n_nationkey -08)------------Filter: nation.n_name = Utf8View("CANADA") -09)--------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8View("CANADA")] -10)------SubqueryAlias: __correlated_sq_2 -11)--------Projection: partsupp.ps_suppkey -12)----------Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.Float64(0.5) * sum(lineitem.l_quantity) -13)------------LeftSemi Join: partsupp.ps_partkey = __correlated_sq_1.p_partkey -14)--------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] -15)--------------SubqueryAlias: __correlated_sq_1 -16)----------------Projection: part.p_partkey -17)------------------Filter: part.p_name LIKE Utf8View("forest%") -18)--------------------TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8View("forest%")] -19)------------SubqueryAlias: __scalar_sq_3 -20)--------------Projection: Float64(0.5) * CAST(sum(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey -21)----------------Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[sum(lineitem.l_quantity)]] -22)------------------Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity -23)--------------------Filter: lineitem.l_shipdate >= Date32("1994-01-01") AND lineitem.l_shipdate < Date32("1995-01-01") -24)----------------------TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1994-01-01"), lineitem.l_shipdate < Date32("1995-01-01")] +03)----RightSemi Join: __correlated_sq_2.ps_suppkey = supplier.s_suppkey +04)------SubqueryAlias: __correlated_sq_2 +05)--------Projection: partsupp.ps_suppkey +06)----------Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.Float64(0.5) * sum(lineitem.l_quantity) +07)------------RightSemi Join: __correlated_sq_1.p_partkey = partsupp.ps_partkey +08)--------------SubqueryAlias: __correlated_sq_1 +09)----------------Projection: part.p_partkey +10)------------------Filter: part.p_name LIKE Utf8View("forest%") +11)--------------------TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8View("forest%")] +12)--------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] +13)------------SubqueryAlias: __scalar_sq_3 +14)--------------Projection: Float64(0.5) * CAST(sum(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey +15)----------------Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[sum(lineitem.l_quantity)]] +16)------------------Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity +17)--------------------Filter: lineitem.l_shipdate >= Date32("1994-01-01") AND lineitem.l_shipdate < Date32("1995-01-01") +18)----------------------TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1994-01-01"), lineitem.l_shipdate < Date32("1995-01-01")] +19)------Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address +20)--------Inner Join: supplier.s_nationkey = nation.n_nationkey +21)----------TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey] +22)----------Projection: nation.n_nationkey +23)------------Filter: nation.n_name = Utf8View("CANADA") +24)--------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8View("CANADA")] physical_plan 01)SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] 02)--SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] -04)------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 -05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[s_suppkey@0, s_name@1, s_address@2] -06)----------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=1 -07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey], file_type=csv, has_header=false -08)----------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 -09)------------FilterExec: n_name@1 = CANADA, projection=[n_nationkey@0] -10)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -11)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false -12)------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 -13)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] -14)----------RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4 -15)------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(ps_partkey@0, p_partkey@0)] -16)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 -17)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty], file_type=csv, has_header=false -18)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 -19)----------------FilterExec: p_name@1 LIKE forest%, projection=[p_partkey@0] -20)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -21)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false -22)----------ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] -23)------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] -24)--------------RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4 -25)----------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] -26)------------------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01, projection=[l_partkey@0, l_suppkey@1, l_quantity@2] -27)--------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], file_type=csv, has_header=false +03)----HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(ps_suppkey@0, s_suppkey@0)], projection=[s_name@1, s_address@2] +04)------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 +05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] +06)----------RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4 +07)------------HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] +08)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 +09)----------------FilterExec: p_name@1 LIKE forest%, projection=[p_partkey@0] +10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false +12)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 +13)----------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty], file_type=csv, has_header=false +14)----------ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] +15)------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] +16)--------------RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4 +17)----------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] +18)------------------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01, projection=[l_partkey@0, l_suppkey@1, l_quantity@2] +19)--------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], file_type=csv, has_header=false +20)------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 +21)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[s_suppkey@0, s_name@1, s_address@2] +22)----------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=1 +23)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey], file_type=csv, has_header=false +24)----------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 +25)------------FilterExec: n_name@1 = CANADA, projection=[n_nationkey@0] +26)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +27)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part index 5e9192d677532..4374d707e36e1 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part @@ -63,27 +63,27 @@ logical_plan 03)----Aggregate: groupBy=[[supplier.s_name]], aggr=[[count(Int64(1))]] 04)------Projection: supplier.s_name 05)--------LeftAnti Join: l1.l_orderkey = __correlated_sq_2.l_orderkey Filter: __correlated_sq_2.l_suppkey != l1.l_suppkey -06)----------LeftSemi Join: l1.l_orderkey = __correlated_sq_1.l_orderkey Filter: __correlated_sq_1.l_suppkey != l1.l_suppkey -07)------------Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey -08)--------------Inner Join: supplier.s_nationkey = nation.n_nationkey -09)----------------Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey -10)------------------Inner Join: l1.l_orderkey = orders.o_orderkey -11)--------------------Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey -12)----------------------Inner Join: supplier.s_suppkey = l1.l_suppkey -13)------------------------TableScan: supplier projection=[s_suppkey, s_name, s_nationkey] -14)------------------------SubqueryAlias: l1 -15)--------------------------Projection: lineitem.l_orderkey, lineitem.l_suppkey -16)----------------------------Filter: lineitem.l_receiptdate > lineitem.l_commitdate -17)------------------------------TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] -18)--------------------Projection: orders.o_orderkey -19)----------------------Filter: orders.o_orderstatus = Utf8View("F") -20)------------------------TableScan: orders projection=[o_orderkey, o_orderstatus], partial_filters=[orders.o_orderstatus = Utf8View("F")] -21)----------------Projection: nation.n_nationkey -22)------------------Filter: nation.n_name = Utf8View("SAUDI ARABIA") -23)--------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8View("SAUDI ARABIA")] -24)------------SubqueryAlias: __correlated_sq_1 -25)--------------SubqueryAlias: l2 -26)----------------TableScan: lineitem projection=[l_orderkey, l_suppkey] +06)----------RightSemi Join: __correlated_sq_1.l_orderkey = l1.l_orderkey Filter: __correlated_sq_1.l_suppkey != l1.l_suppkey +07)------------SubqueryAlias: __correlated_sq_1 +08)--------------SubqueryAlias: l2 +09)----------------TableScan: lineitem projection=[l_orderkey, l_suppkey] +10)------------Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey +11)--------------Inner Join: supplier.s_nationkey = nation.n_nationkey +12)----------------Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey +13)------------------Inner Join: l1.l_orderkey = orders.o_orderkey +14)--------------------Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey +15)----------------------Inner Join: supplier.s_suppkey = l1.l_suppkey +16)------------------------TableScan: supplier projection=[s_suppkey, s_name, s_nationkey] +17)------------------------SubqueryAlias: l1 +18)--------------------------Projection: lineitem.l_orderkey, lineitem.l_suppkey +19)----------------------------Filter: lineitem.l_receiptdate > lineitem.l_commitdate +20)------------------------------TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] +21)--------------------Projection: orders.o_orderkey +22)----------------------Filter: orders.o_orderstatus = Utf8View("F") +23)------------------------TableScan: orders projection=[o_orderkey, o_orderstatus], partial_filters=[orders.o_orderstatus = Utf8View("F")] +24)----------------Projection: nation.n_nationkey +25)------------------Filter: nation.n_name = Utf8View("SAUDI ARABIA") +26)--------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8View("SAUDI ARABIA")] 27)----------SubqueryAlias: __correlated_sq_2 28)------------SubqueryAlias: l3 29)--------------Projection: lineitem.l_orderkey, lineitem.l_suppkey @@ -97,27 +97,27 @@ physical_plan 05)--------RepartitionExec: partitioning=Hash([s_name@0], 4), input_partitions=4 06)----------AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(Int64(1))] 07)------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] -08)--------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 -09)----------------RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 -10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@1, n_nationkey@0)], projection=[s_name@0, l_orderkey@2, l_suppkey@3] -11)--------------------RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4 -12)----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@2, o_orderkey@0)], projection=[s_name@0, s_nationkey@1, l_orderkey@2, l_suppkey@3] -13)------------------------RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4 -14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] -15)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1 -16)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_nationkey], file_type=csv, has_header=false -17)----------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 -18)------------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1] -19)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false -20)------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 -21)--------------------------FilterExec: o_orderstatus@1 = F, projection=[o_orderkey@0] -22)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderstatus], file_type=csv, has_header=false -23)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 -24)----------------------FilterExec: n_name@1 = SAUDI ARABIA, projection=[n_nationkey@0] -25)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -26)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false -27)----------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 -28)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey], file_type=csv, has_header=false +08)--------------HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, l_orderkey@1)], filter=l_suppkey@0 != l_suppkey@1 +09)----------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 +10)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey], file_type=csv, has_header=false +11)----------------RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 +12)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@1, n_nationkey@0)], projection=[s_name@0, l_orderkey@2, l_suppkey@3] +13)--------------------RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4 +14)----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@2, o_orderkey@0)], projection=[s_name@0, s_nationkey@1, l_orderkey@2, l_suppkey@3] +15)------------------------RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4 +16)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] +17)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1 +18)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_nationkey], file_type=csv, has_header=false +19)----------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 +20)------------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1] +21)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false +22)------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 +23)--------------------------FilterExec: o_orderstatus@1 = F, projection=[o_orderkey@0] +24)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderstatus], file_type=csv, has_header=false +25)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 +26)----------------------FilterExec: n_name@1 = SAUDI ARABIA, projection=[n_nationkey@0] +27)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +28)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false 29)--------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 30)----------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1] 31)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q4.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q4.slt.part index 0007666f15365..44c97f3d458fc 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q4.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q4.slt.part @@ -44,14 +44,14 @@ logical_plan 02)--Projection: orders.o_orderpriority, count(Int64(1)) AS count(*) AS order_count 03)----Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[count(Int64(1))]] 04)------Projection: orders.o_orderpriority -05)--------LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey -06)----------Projection: orders.o_orderkey, orders.o_orderpriority -07)------------Filter: orders.o_orderdate >= Date32("1993-07-01") AND orders.o_orderdate < Date32("1993-10-01") -08)--------------TableScan: orders projection=[o_orderkey, o_orderdate, o_orderpriority], partial_filters=[orders.o_orderdate >= Date32("1993-07-01"), orders.o_orderdate < Date32("1993-10-01")] -09)----------SubqueryAlias: __correlated_sq_1 -10)------------Projection: lineitem.l_orderkey -11)--------------Filter: lineitem.l_receiptdate > lineitem.l_commitdate -12)----------------TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] +05)--------RightSemi Join: __correlated_sq_1.l_orderkey = orders.o_orderkey +06)----------SubqueryAlias: __correlated_sq_1 +07)------------Projection: lineitem.l_orderkey +08)--------------Filter: lineitem.l_receiptdate > lineitem.l_commitdate +09)----------------TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] +10)----------Projection: orders.o_orderkey, orders.o_orderpriority +11)------------Filter: orders.o_orderdate >= Date32("1993-07-01") AND orders.o_orderdate < Date32("1993-10-01") +12)--------------TableScan: orders projection=[o_orderkey, o_orderdate, o_orderpriority], partial_filters=[orders.o_orderdate >= Date32("1993-07-01"), orders.o_orderdate < Date32("1993-10-01")] physical_plan 01)SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] 02)--SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] @@ -59,10 +59,10 @@ physical_plan 04)------AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(Int64(1))] 05)--------RepartitionExec: partitioning=Hash([o_orderpriority@0], 4), input_partitions=4 06)----------AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(Int64(1))] -07)------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] -08)--------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 -09)----------------FilterExec: o_orderdate@1 >= 1993-07-01 AND o_orderdate@1 < 1993-10-01, projection=[o_orderkey@0, o_orderpriority@2] -10)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], file_type=csv, has_header=false -11)--------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 -12)----------------FilterExec: l_receiptdate@2 > l_commitdate@1, projection=[l_orderkey@0] -13)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false +07)------------HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@0)], projection=[o_orderpriority@1] +08)--------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 +09)----------------FilterExec: l_receiptdate@2 > l_commitdate@1, projection=[l_orderkey@0] +10)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false +11)--------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 +12)----------------FilterExec: o_orderdate@1 >= 1993-07-01 AND o_orderdate@1 < 1993-10-01, projection=[o_orderkey@0, o_orderpriority@2] +13)------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], file_type=csv, has_header=false