Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Generate the data required for the compile profile helper (TPC-H SF=1):
./bench.sh data compile_profile
```

Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `release-nonlto`):
Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `ci-optimized`, `release-nonlto`, `profiling`):

```shell
./bench.sh run compile_profile
Expand Down
43 changes: 33 additions & 10 deletions benchmarks/compile_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@

"""Compile profile benchmark runner for DataFusion.

Builds the `tpch` benchmark binary with several Cargo profiles (e.g. `--release` or `--profile ci`), runs the full TPC-H suite against the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile time, execution time, and resulting
binary size.
Builds the `dfbench` benchmark binary with several Cargo profiles
(e.g. `--release` or `--profile ci`), runs the full TPC-H suite against
the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile
time, execution time, and resulting binary size.

See `benchmarks/README.md` for usages.
"""
Expand All @@ -40,12 +42,15 @@
DEFAULT_ITERATIONS = 1
DEFAULT_FORMAT = "parquet"
DEFAULT_PARTITIONS: int | None = None
TPCH_BINARY = "tpch.exe" if os.name == "nt" else "tpch"
BENCHMARK_PACKAGE = "datafusion-benchmarks"
BENCHMARK_BINARY = "dfbench.exe" if os.name == "nt" else "dfbench"
PROFILE_TARGET_DIR = {
"dev": "debug",
"release": "release",
"ci": "ci",
"ci-optimized": "ci-optimized",
"release-nonlto": "release-nonlto",
"profiling": "profiling",
}


Expand All @@ -62,7 +67,10 @@ def parse_args() -> argparse.Namespace:
"--profiles",
nargs="+",
default=list(PROFILE_TARGET_DIR.keys()),
help="Cargo profiles to test (default: dev release ci release-nonlto)",
help=(
"Cargo profiles to test "
"(default: dev release ci ci-optimized release-nonlto profiling)"
),
)
parser.add_argument(
"--data",
Expand All @@ -84,9 +92,25 @@ def timed_run(command: Iterable[str]) -> float:

def cargo_build(profile: str) -> float:
if profile == "dev":
command = ["cargo", "build", "--bin", "tpch"]
command = [
"cargo",
"build",
"--package",
BENCHMARK_PACKAGE,
"--bin",
"dfbench",
]
else:
command = ["cargo", "build", "--profile", profile, "--bin", "tpch"]
command = [
"cargo",
"build",
"--profile",
profile,
"--package",
BENCHMARK_PACKAGE,
"--bin",
"dfbench",
]
return timed_run(command)


Expand All @@ -102,14 +126,13 @@ def run_benchmark(profile: str, data_path: Path) -> float:
binary_dir = PROFILE_TARGET_DIR.get(profile)
if not binary_dir:
raise ValueError(f"unknown profile '{profile}'")
binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY
binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY
if not binary_path.exists():
raise FileNotFoundError(f"compiled binary not found at {binary_path}")

command = [
str(binary_path),
"benchmark",
"datafusion",
"tpch",
"--iterations",
str(DEFAULT_ITERATIONS),
"--path",
Expand All @@ -132,7 +155,7 @@ def run_benchmark(profile: str, data_path: Path) -> float:

def binary_size(profile: str) -> int:
binary_dir = PROFILE_TARGET_DIR[profile]
binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY
binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY
return binary_path.stat().st_size


Expand Down
Loading