From 4c3d39bc1a27ce1f74af9eccbf52a2d599c5780b Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Sun, 29 Mar 2026 10:04:41 +0800
Subject: [PATCH] update compile_profile benchmark

---
 benchmarks/README.md          |  2 +-
 benchmarks/compile_profile.py | 43 +++++++++++++++++++++++++++--------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 3aa4f4bb8640c..df602ea538102 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -95,7 +95,7 @@ Generate the data required for the compile profile helper (TPC-H SF=1):
 ./bench.sh data compile_profile
 ```
 
-Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `release-nonlto`):
+Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `ci-optimized`, `release-nonlto`, `profiling`):
 
 ```shell
 ./bench.sh run compile_profile
diff --git a/benchmarks/compile_profile.py b/benchmarks/compile_profile.py
index ae51de94937bf..a85e15ddacc04 100644
--- a/benchmarks/compile_profile.py
+++ b/benchmarks/compile_profile.py
@@ -19,8 +19,10 @@
 
 """Compile profile benchmark runner for DataFusion.
 
-Builds the `tpch` benchmark binary with several Cargo profiles (e.g. `--release` or `--profile ci`), runs the full TPC-H suite against the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile time, execution time, and resulting 
-binary size.
+Builds the `dfbench` benchmark binary with several Cargo profiles
+(e.g. `--release` or `--profile ci`), runs the full TPC-H suite against
+the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile
+time, execution time, and resulting binary size.
 
 See `benchmarks/README.md` for usages.
 """
@@ -40,12 +42,15 @@
 DEFAULT_ITERATIONS = 1
 DEFAULT_FORMAT = "parquet"
 DEFAULT_PARTITIONS: int | None = None
-TPCH_BINARY = "tpch.exe" if os.name == "nt" else "tpch"
+BENCHMARK_PACKAGE = "datafusion-benchmarks"
+BENCHMARK_BINARY = "dfbench.exe" if os.name == "nt" else "dfbench"
 PROFILE_TARGET_DIR = {
     "dev": "debug",
     "release": "release",
     "ci": "ci",
+    "ci-optimized": "ci-optimized",
     "release-nonlto": "release-nonlto",
+    "profiling": "profiling",
 }
 
 
@@ -62,7 +67,10 @@ def parse_args() -> argparse.Namespace:
         "--profiles",
         nargs="+",
         default=list(PROFILE_TARGET_DIR.keys()),
-        help="Cargo profiles to test (default: dev release ci release-nonlto)",
+        help=(
+            "Cargo profiles to test "
+            "(default: dev release ci ci-optimized release-nonlto profiling)"
+        ),
     )
     parser.add_argument(
         "--data",
@@ -84,9 +92,25 @@ def timed_run(command: Iterable[str]) -> float:
 
 def cargo_build(profile: str) -> float:
     if profile == "dev":
-        command = ["cargo", "build", "--bin", "tpch"]
+        command = [
+            "cargo",
+            "build",
+            "--package",
+            BENCHMARK_PACKAGE,
+            "--bin",
+            "dfbench",
+        ]
     else:
-        command = ["cargo", "build", "--profile", profile, "--bin", "tpch"]
+        command = [
+            "cargo",
+            "build",
+            "--profile",
+            profile,
+            "--package",
+            BENCHMARK_PACKAGE,
+            "--bin",
+            "dfbench",
+        ]
     return timed_run(command)
 
 
@@ -102,14 +126,13 @@ def run_benchmark(profile: str, data_path: Path) -> float:
     binary_dir = PROFILE_TARGET_DIR.get(profile)
     if not binary_dir:
         raise ValueError(f"unknown profile '{profile}'")
-    binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY
+    binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY
     if not binary_path.exists():
         raise FileNotFoundError(f"compiled binary not found at {binary_path}")
 
     command = [
         str(binary_path),
-        "benchmark",
-        "datafusion",
+        "tpch",
         "--iterations",
         str(DEFAULT_ITERATIONS),
         "--path",
@@ -132,7 +155,7 @@ def run_benchmark(profile: str, data_path: Path) -> float:
 
 def binary_size(profile: str) -> int:
     binary_dir = PROFILE_TARGET_DIR[profile]
-    binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY
+    binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY
     return binary_path.stat().st_size