apache
diff --git a/‎Cargo.lock‎
Lines changed: 3 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎benchmarks/README.md‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/bench.sh‎
Lines changed: 30 additions & 0 deletions b/‎benchmarks/bench.sh‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎benchmarks/compile_profile.py‎
Lines changed: 33 additions & 10 deletions b/‎benchmarks/compile_profile.py‎
Lines changed: 33 additions & 10 deletions
diff --git a/‎benchmarks/queries/sort_pushdown/q1.sql‎
Lines changed: 6 additions & 0 deletions b/‎benchmarks/queries/sort_pushdown/q1.sql‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎benchmarks/queries/sort_pushdown/q2.sql‎
Lines changed: 7 additions & 0 deletions b/‎benchmarks/queries/sort_pushdown/q2.sql‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎benchmarks/queries/sort_pushdown/q3.sql‎
Lines changed: 5 additions & 0 deletions b/‎benchmarks/queries/sort_pushdown/q3.sql‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎benchmarks/queries/sort_pushdown/q4.sql‎
Lines changed: 5 additions & 0 deletions b/‎benchmarks/queries/sort_pushdown/q4.sql‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎benchmarks/src/bin/dfbench.rs‎
Lines changed: 4 additions & 1 deletion b/‎benchmarks/src/bin/dfbench.rs‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎benchmarks/src/lib.rs‎
Lines changed: 1 addition & 0 deletions b/‎benchmarks/src/lib.rs‎
Lines changed: 1 addition & 0 deletions
@@ -95,7 +95,7 @@ Generate the data required for the compile profile helper (TPC-H SF=1):
 ./bench.sh data compile_profile
 ```
 
-Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `release-nonlto`):
+Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `ci-optimized`, `release-nonlto`, `profiling`):
 
 ```shell
 ./bench.sh run compile_profile
 
@@ -106,6 +106,10 @@ clickbench_partitioned: ClickBench queries against partitioned (100 files) parqu
 clickbench_pushdown:    ClickBench queries against partitioned (100 files) parquet w/ filter_pushdown enabled
 clickbench_extended:    ClickBench \"inspired\" queries against a single parquet (DataFusion specific)
 
+# Sort Pushdown Benchmarks
+sort_pushdown:          Sort pushdown baseline (no WITH ORDER) on TPC-H data (SF=1)
+sort_pushdown_sorted:   Sort pushdown with WITH ORDER — tests sort elimination on non-overlapping files
+
 # Sorted Data Benchmarks (ORDER BY Optimization)
 clickbench_sorted:     ClickBench queries on pre-sorted data using prefer_existing_sort (tests sort elimination optimization)
 
@@ -309,6 +313,10 @@ main() {
                     # same data as for tpch
                     data_tpch "1" "parquet"
                     ;;
+                sort_pushdown|sort_pushdown_sorted)
+                    # same data as for tpch
+                    data_tpch "1" "parquet"
+                    ;;
                 sort_tpch)
                     # same data as for tpch
                     data_tpch "1" "parquet"
@@ -509,6 +517,12 @@ main() {
                 external_aggr)
                     run_external_aggr
                     ;;
+                sort_pushdown)
+                    run_sort_pushdown
+                    ;;
+                sort_pushdown_sorted)
+                    run_sort_pushdown_sorted
+                    ;;
                 sort_tpch)
                     run_sort_tpch "1"
                     ;;
@@ -1070,6 +1084,22 @@ run_external_aggr() {
     debug_run $CARGO_COMMAND --bin external_aggr -- benchmark --partitions 4 --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG}
 }
 
+# Runs the sort pushdown benchmark (without WITH ORDER)
+run_sort_pushdown() {
+    TPCH_DIR="${DATA_DIR}/tpch_sf1"
+    RESULTS_FILE="${RESULTS_DIR}/sort_pushdown.json"
+    echo "Running sort pushdown benchmark (no WITH ORDER)..."
+    debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
+}
+
+# Runs the sort pushdown benchmark with WITH ORDER (enables sort elimination)
+run_sort_pushdown_sorted() {
+    TPCH_DIR="${DATA_DIR}/tpch_sf1"
+    RESULTS_FILE="${RESULTS_DIR}/sort_pushdown_sorted.json"
+    echo "Running sort pushdown benchmark (with WITH ORDER)..."
+    debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --sorted --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
+}
+
 # Runs the sort integration benchmark
 run_sort_tpch() {
     SCALE_FACTOR=$1
 
@@ -19,8 +19,10 @@
 
 """Compile profile benchmark runner for DataFusion.
 
-Builds the `tpch` benchmark binary with several Cargo profiles (e.g. `--release` or `--profile ci`), runs the full TPC-H suite against the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile time, execution time, and resulting 
-binary size.
+Builds the `dfbench` benchmark binary with several Cargo profiles
+(e.g. `--release` or `--profile ci`), runs the full TPC-H suite against
+the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile
+time, execution time, and resulting binary size.
 
 See `benchmarks/README.md` for usages.
 """
@@ -40,12 +42,15 @@
 DEFAULT_ITERATIONS = 1
 DEFAULT_FORMAT = "parquet"
 DEFAULT_PARTITIONS: int | None = None
-TPCH_BINARY = "tpch.exe" if os.name == "nt" else "tpch"
+BENCHMARK_PACKAGE = "datafusion-benchmarks"
+BENCHMARK_BINARY = "dfbench.exe" if os.name == "nt" else "dfbench"
 PROFILE_TARGET_DIR = {
     "dev": "debug",
     "release": "release",
     "ci": "ci",
+    "ci-optimized": "ci-optimized",
     "release-nonlto": "release-nonlto",
+    "profiling": "profiling",
 }
 
 
@@ -62,7 +67,10 @@ def parse_args() -> argparse.Namespace:
         "--profiles",
         nargs="+",
         default=list(PROFILE_TARGET_DIR.keys()),
-        help="Cargo profiles to test (default: dev release ci release-nonlto)",
+        help=(
+            "Cargo profiles to test "
+            "(default: dev release ci ci-optimized release-nonlto profiling)"
+        ),
     )
     parser.add_argument(
         "--data",
@@ -84,9 +92,25 @@ def timed_run(command: Iterable[str]) -> float:
 
 def cargo_build(profile: str) -> float:
     if profile == "dev":
-        command = ["cargo", "build", "--bin", "tpch"]
+        command = [
+            "cargo",
+            "build",
+            "--package",
+            BENCHMARK_PACKAGE,
+            "--bin",
+            "dfbench",
+        ]
     else:
-        command = ["cargo", "build", "--profile", profile, "--bin", "tpch"]
+        command = [
+            "cargo",
+            "build",
+            "--profile",
+            profile,
+            "--package",
+            BENCHMARK_PACKAGE,
+            "--bin",
+            "dfbench",
+        ]
     return timed_run(command)
 
 
@@ -102,14 +126,13 @@ def run_benchmark(profile: str, data_path: Path) -> float:
     binary_dir = PROFILE_TARGET_DIR.get(profile)
     if not binary_dir:
         raise ValueError(f"unknown profile '{profile}'")
-    binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY
+    binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY
     if not binary_path.exists():
         raise FileNotFoundError(f"compiled binary not found at {binary_path}")
 
     command = [
         str(binary_path),
-        "benchmark",
-        "datafusion",
+        "tpch",
         "--iterations",
         str(DEFAULT_ITERATIONS),
         "--path",
@@ -132,7 +155,7 @@ def run_benchmark(profile: str, data_path: Path) -> float:
 
 def binary_size(profile: str) -> int:
     binary_dir = PROFILE_TARGET_DIR[profile]
-    binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY
+    binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY
     return binary_path.stat().st_size
 
 
 
@@ -0,0 +1,6 @@
+-- Sort elimination: ORDER BY sort key ASC (full scan)
+-- With --sorted: SortExec removed, sequential scan in file order
+-- Without --sorted: full SortExec required
+SELECT l_orderkey, l_partkey, l_suppkey
+FROM lineitem
+ORDER BY l_orderkey
@@ -0,0 +1,7 @@
+-- Sort elimination + limit pushdown
+-- With --sorted: SortExec removed + limit pushed to DataSourceExec
+-- Without --sorted: TopK sort over all data
+SELECT l_orderkey, l_partkey, l_suppkey
+FROM lineitem
+ORDER BY l_orderkey
+LIMIT 100
@@ -0,0 +1,5 @@
+-- Sort elimination: wide projection (all columns)
+-- Tests sort elimination benefit with larger row payload
+SELECT *
+FROM lineitem
+ORDER BY l_orderkey
@@ -0,0 +1,5 @@
+-- Sort elimination + limit: wide projection
+SELECT *
+FROM lineitem
+ORDER BY l_orderkey
+LIMIT 100
@@ -34,7 +34,8 @@ static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
 use datafusion_benchmarks::{
-    cancellation, clickbench, h2o, hj, imdb, nlj, smj, sort_tpch, tpcds, tpch,
+    cancellation, clickbench, h2o, hj, imdb, nlj, smj, sort_pushdown, sort_tpch, tpcds,
+    tpch,
 };
 
 #[derive(Debug, Parser)]
@@ -53,6 +54,7 @@ enum Options {
     Imdb(imdb::RunOpt),
     Nlj(nlj::RunOpt),
     Smj(smj::RunOpt),
+    SortPushdown(sort_pushdown::RunOpt),
     SortTpch(sort_tpch::RunOpt),
     Tpch(tpch::RunOpt),
     Tpcds(tpcds::RunOpt),
@@ -72,6 +74,7 @@ pub async fn main() -> Result<()> {
         Options::Imdb(opt) => Box::pin(opt.run()).await,
         Options::Nlj(opt) => opt.run().await,
         Options::Smj(opt) => opt.run().await,
+        Options::SortPushdown(opt) => opt.run().await,
         Options::SortTpch(opt) => opt.run().await,
         Options::Tpch(opt) => Box::pin(opt.run()).await,
         Options::Tpcds(opt) => Box::pin(opt.run()).await,
 
@@ -23,6 +23,7 @@ pub mod hj;
 pub mod imdb;
 pub mod nlj;
 pub mod smj;
+pub mod sort_pushdown;
 pub mod sort_tpch;
 pub mod tpcds;
 pub mod tpch;