@@ -260,6 +260,66 @@ physical_plan
26026002)--SortExec: expr=[c2@1 ASC NULLS LAST, c3@2 ASC NULLS LAST], preserve_partitioning=[false]
26126103)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], file_type=csv, has_header=true
262262
263+
264+ # eliminate redundant fd sort expr
265+ query TT
266+ explain SELECT c2, SUM(c3) AS total_sal FROM aggregate_test_100 GROUP BY c2 ORDER BY c2, total_sal
267+ ----
268+ logical_plan
269+ 01)Sort: aggregate_test_100.c2 ASC NULLS LAST
270+ 02)--Projection: aggregate_test_100.c2, sum(aggregate_test_100.c3) AS total_sal
271+ 03)----Aggregate: groupBy=[[aggregate_test_100.c2]], aggr=[[sum(CAST(aggregate_test_100.c3 AS Int64))]]
272+ 04)------TableScan: aggregate_test_100 projection=[c2, c3]
273+ physical_plan
274+ 01)SortPreservingMergeExec: [c2@0 ASC NULLS LAST]
275+ 02)--SortExec: expr=[c2@0 ASC NULLS LAST], preserve_partitioning=[true]
276+ 03)----ProjectionExec: expr=[c2@0 as c2, sum(aggregate_test_100.c3)@1 as total_sal]
277+ 04)------AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[sum(aggregate_test_100.c3)]
278+ 05)--------RepartitionExec: partitioning=Hash([c2@0], 4), input_partitions=4
279+ 06)----------AggregateExec: mode=Partial, gby=[c2@0 as c2], aggr=[sum(aggregate_test_100.c3)]
280+ 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
281+ 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3], file_type=csv, has_header=true
282+
283+ # keep order by when dependency comes later
284+ query TT
285+ explain SELECT c2, SUM(c3) AS total_sal FROM aggregate_test_100 GROUP BY c2 ORDER BY total_sal, c2
286+ ----
287+ logical_plan
288+ 01)Sort: total_sal ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST
289+ 02)--Projection: aggregate_test_100.c2, sum(aggregate_test_100.c3) AS total_sal
290+ 03)----Aggregate: groupBy=[[aggregate_test_100.c2]], aggr=[[sum(CAST(aggregate_test_100.c3 AS Int64))]]
291+ 04)------TableScan: aggregate_test_100 projection=[c2, c3]
292+ physical_plan
293+ 01)SortPreservingMergeExec: [total_sal@1 ASC NULLS LAST, c2@0 ASC NULLS LAST]
294+ 02)--SortExec: expr=[total_sal@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[true]
295+ 03)----ProjectionExec: expr=[c2@0 as c2, sum(aggregate_test_100.c3)@1 as total_sal]
296+ 04)------AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[sum(aggregate_test_100.c3)]
297+ 05)--------RepartitionExec: partitioning=Hash([c2@0], 4), input_partitions=4
298+ 06)----------AggregateExec: mode=Partial, gby=[c2@0 as c2], aggr=[sum(aggregate_test_100.c3)]
299+ 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
300+ 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3], file_type=csv, has_header=true
301+
302+ # eliminate redundant sort expr even with non schema expr
303+ query TT
304+ explain SELECT c2, SUM(c3) AS total_sal FROM aggregate_test_100 GROUP BY c2 ORDER BY c2, total_sal, abs(c2)
305+ ----
306+ logical_plan
307+ 01)Sort: aggregate_test_100.c2 ASC NULLS LAST, abs(aggregate_test_100.c2) ASC NULLS LAST
308+ 02)--Projection: aggregate_test_100.c2, sum(aggregate_test_100.c3) AS total_sal
309+ 03)----Aggregate: groupBy=[[aggregate_test_100.c2]], aggr=[[sum(CAST(aggregate_test_100.c3 AS Int64))]]
310+ 04)------TableScan: aggregate_test_100 projection=[c2, c3]
311+ physical_plan
312+ 01)SortPreservingMergeExec: [c2@0 ASC NULLS LAST, abs(c2@0) ASC NULLS LAST]
313+ 02)--SortExec: expr=[c2@0 ASC NULLS LAST, abs(c2@0) ASC NULLS LAST], preserve_partitioning=[true]
314+ 03)----ProjectionExec: expr=[c2@0 as c2, sum(aggregate_test_100.c3)@1 as total_sal]
315+ 04)------AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2], aggr=[sum(aggregate_test_100.c3)]
316+ 05)--------RepartitionExec: partitioning=Hash([c2@0], 4), input_partitions=4
317+ 06)----------AggregateExec: mode=Partial, gby=[c2@0 as c2], aggr=[sum(aggregate_test_100.c3)]
318+ 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
319+ 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3], file_type=csv, has_header=true
320+
321+ statement ok
322+
263323query II
264324SELECT c2, c3 FROM aggregate_test_100 ORDER BY c2, c3, c2
265325----
@@ -1637,7 +1697,7 @@ physical_plan
16371697statement ok
16381698reset datafusion.catalog.information_schema;
16391699
1640- # The SLT runner sets `target_partitions` to 4 instead of using the default, so
1700+ # The SLT runner sets `target_partitions` to 4 instead of using the default, so
16411701# reset it explicitly.
16421702statement ok
16431703set datafusion.execution.target_partitions = 4;
0 commit comments