From 4c3d39bc1a27ce1f74af9eccbf52a2d599c5780b Mon Sep 17 00:00:00 2001 From: Yongting You <2010youy01@gmail.com> Date: Sun, 29 Mar 2026 10:04:41 +0800 Subject: [PATCH] update compile_profile benchmark --- benchmarks/README.md | 2 +- benchmarks/compile_profile.py | 43 +++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 3aa4f4bb8640c..df602ea538102 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -95,7 +95,7 @@ Generate the data required for the compile profile helper (TPC-H SF=1): ./bench.sh data compile_profile ``` -Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `release-nonlto`): +Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `ci-optimized`, `release-nonlto`, `profiling`): ```shell ./bench.sh run compile_profile diff --git a/benchmarks/compile_profile.py b/benchmarks/compile_profile.py index ae51de94937bf..a85e15ddacc04 100644 --- a/benchmarks/compile_profile.py +++ b/benchmarks/compile_profile.py @@ -19,8 +19,10 @@ """Compile profile benchmark runner for DataFusion. -Builds the `tpch` benchmark binary with several Cargo profiles (e.g. `--release` or `--profile ci`), runs the full TPC-H suite against the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile time, execution time, and resulting -binary size. +Builds the `dfbench` benchmark binary with several Cargo profiles +(e.g. `--release` or `--profile ci`), runs the full TPC-H suite against +the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile +time, execution time, and resulting binary size. See `benchmarks/README.md` for usages. """ @@ -40,12 +42,15 @@ DEFAULT_ITERATIONS = 1 DEFAULT_FORMAT = "parquet" DEFAULT_PARTITIONS: int | None = None -TPCH_BINARY = "tpch.exe" if os.name == "nt" else "tpch" +BENCHMARK_PACKAGE = "datafusion-benchmarks" +BENCHMARK_BINARY = "dfbench.exe" if os.name == "nt" else "dfbench" PROFILE_TARGET_DIR = { "dev": "debug", "release": "release", "ci": "ci", + "ci-optimized": "ci-optimized", "release-nonlto": "release-nonlto", + "profiling": "profiling", } @@ -62,7 +67,10 @@ def parse_args() -> argparse.Namespace: "--profiles", nargs="+", default=list(PROFILE_TARGET_DIR.keys()), - help="Cargo profiles to test (default: dev release ci release-nonlto)", + help=( + "Cargo profiles to test " + "(default: dev release ci ci-optimized release-nonlto profiling)" + ), ) parser.add_argument( "--data", @@ -84,9 +92,25 @@ def timed_run(command: Iterable[str]) -> float: def cargo_build(profile: str) -> float: if profile == "dev": - command = ["cargo", "build", "--bin", "tpch"] + command = [ + "cargo", + "build", + "--package", + BENCHMARK_PACKAGE, + "--bin", + "dfbench", + ] else: - command = ["cargo", "build", "--profile", profile, "--bin", "tpch"] + command = [ + "cargo", + "build", + "--profile", + profile, + "--package", + BENCHMARK_PACKAGE, + "--bin", + "dfbench", + ] return timed_run(command) @@ -102,14 +126,13 @@ def run_benchmark(profile: str, data_path: Path) -> float: binary_dir = PROFILE_TARGET_DIR.get(profile) if not binary_dir: raise ValueError(f"unknown profile '{profile}'") - binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY + binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY if not binary_path.exists(): raise FileNotFoundError(f"compiled binary not found at {binary_path}") command = [ str(binary_path), - "benchmark", - "datafusion", + "tpch", "--iterations", str(DEFAULT_ITERATIONS), "--path", @@ -132,7 +155,7 @@ def run_benchmark(profile: str, data_path: Path) -> float: def binary_size(profile: str) -> int: binary_dir = PROFILE_TARGET_DIR[profile] - binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY + binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY return binary_path.stat().st_size