Skip to content

Commit a022a91

Browse files
authored
Merge branch 'main' into feat_migrate_ffi_to_stabby
2 parents 155ccea + a0869e9 commit a022a91

27 files changed

Lines changed: 1404 additions & 164 deletions

File tree

.github/workflows/extended.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ jobs:
9393
linux-test-extended:
9494
name: cargo test 'extended_tests' (amd64)
9595
needs: [linux-build-lib]
96-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=32,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }}
97-
# spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing
96+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=32,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
9897
# note: do not use amd/rust container to preserve disk space
9998
steps:
10099
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
@@ -162,8 +161,7 @@ jobs:
162161
163162
sqllogictest-sqlite:
164163
name: "Run sqllogictests with the sqlite test suite"
165-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=48,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }}
166-
# spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing
164+
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=32,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
167165
container:
168166
image: amd64/rust
169167
steps:

Cargo.toml

Lines changed: 23 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -224,62 +224,57 @@ unused_qualifications = "deny"
224224
# --------------------
225225
# Compilation Profiles
226226
# --------------------
227-
# A Cargo profile is a preset for the compiler/linker knobs that trade off:
227+
# A Cargo profile is a preset for the compiler/linker knobs that trades off:
228228
# - Build time: how quickly code compiles and links
229229
# - Runtime performance: how fast the resulting binaries execute
230230
# - Binary size: how large the executables end up
231231
# - Debuggability: how much debug information is preserved for debugging and profiling
232232
#
233+
# To use a profile: `cargo [ build | run | ... ] --profile <profile-name>`
234+
#
233235
# Profiles available:
234-
# - dev: default debug build; fastest to compile, slowest to run, full debug info
235-
# for everyday development.
236-
# Run: cargo run
237-
# - release: optimized build; slowest to compile, fastest to run, smallest
238-
# binaries for public releases.
239-
# Run: cargo run --release
240-
# - release-nonlto: skips LTO, so it builds quicker while staying close to
241-
# release performance. It is useful when developing performance optimizations.
242-
# Run: cargo run --profile release-nonlto
236+
# - dev: default debug build; fastest to compile, slowest to run, full debug info.
237+
# For everyday development; default for "cargo [ build | test | run ]".
238+
# - release: fully optimized build; slowest to compile, fastest to run, smallest
239+
# binaries. For public releases; default for "cargo [ bench | install ]".
240+
# - release-nonlto: skips LTO, so it builds much faster while staying close to
241+
# release performance. Useful when developing performance optimizations.
243242
# - profiling: inherits release optimizations but retains debug info to support
244243
# profiling tools and flamegraphs.
245-
# Run: cargo run --profile profiling
246-
# - ci: derived from `dev` but disables incremental builds and strips dependency
247-
# symbols to keep CI artifacts small and reproducible.
248-
# Run: cargo run --profile ci
249-
# - ci-optimized: derived from `release` but enables debug assertions, and uses
250-
# lighter optimizations. Used for long-running CI tasks.
251-
# Run: cargo run --profile ci-release
244+
# - ci: derived from `dev` but disables debug info and incremental builds to keep
245+
# CI artifacts small and reproducible.
246+
# - ci-optimized: derived from `release` but enables debug assertions and uses
247+
# less aggressive optimizations for faster builds. Used for long-running CI
248+
# tasks.
252249
#
253250
# If you want to optimize compilation, the `compile_profile` benchmark can be useful.
254251
# See `benchmarks/README.md` for more details.
255252
[profile.release]
256253
codegen-units = 1
257254
lto = true
258-
strip = true # Eliminate debug information to minimize binary size
255+
strip = true # Eliminate debug info to minimize binary size
259256

260257
[profile.release-nonlto]
261-
codegen-units = 16
262-
debug-assertions = false
263-
incremental = false
264258
inherits = "release"
259+
codegen-units = 16
265260
lto = false
266-
opt-level = 3
267-
overflow-checks = false
268-
rpath = false
269-
strip = false # Retain debug info for flamegraphs
261+
strip = false # Retain debug info for flamegraphs
262+
263+
[profile.profiling]
264+
inherits = "release"
265+
debug = true
266+
strip = false
270267

271268
[profile.ci-optimized]
272269
inherits = "release"
273270
debug-assertions = true
274271
codegen-units = 16
275272
lto = "thin"
276-
strip = true
277273

278274
[profile.ci]
279-
debug = false
280275
inherits = "dev"
276+
debug = false
281277
incremental = false
282-
debug-assertions = true
283278

284279
# This rule applies to every package except workspace members (dependencies
285280
# such as `arrow` and `tokio`). It disables debug info and related features on
@@ -289,8 +284,3 @@ debug = false
289284
debug-assertions = false
290285
strip = "debuginfo"
291286
incremental = false
292-
293-
[profile.profiling]
294-
inherits = "release"
295-
debug = true
296-
strip = false

benchmarks/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ Generate the data required for the compile profile helper (TPC-H SF=1):
9595
./bench.sh data compile_profile
9696
```
9797

98-
Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `release-nonlto`):
98+
Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `ci-optimized`, `release-nonlto`, `profiling`):
9999

100100
```shell
101101
./bench.sh run compile_profile

benchmarks/bench.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ clickbench_partitioned: ClickBench queries against partitioned (100 files) parqu
106106
clickbench_pushdown: ClickBench queries against partitioned (100 files) parquet w/ filter_pushdown enabled
107107
clickbench_extended: ClickBench \"inspired\" queries against a single parquet (DataFusion specific)
108108
109+
# Sort Pushdown Benchmarks
110+
sort_pushdown: Sort pushdown baseline (no WITH ORDER) on TPC-H data (SF=1)
111+
sort_pushdown_sorted: Sort pushdown with WITH ORDER — tests sort elimination on non-overlapping files
112+
109113
# Sorted Data Benchmarks (ORDER BY Optimization)
110114
clickbench_sorted: ClickBench queries on pre-sorted data using prefer_existing_sort (tests sort elimination optimization)
111115
@@ -309,6 +313,10 @@ main() {
309313
# same data as for tpch
310314
data_tpch "1" "parquet"
311315
;;
316+
sort_pushdown|sort_pushdown_sorted)
317+
# same data as for tpch
318+
data_tpch "1" "parquet"
319+
;;
312320
sort_tpch)
313321
# same data as for tpch
314322
data_tpch "1" "parquet"
@@ -509,6 +517,12 @@ main() {
509517
external_aggr)
510518
run_external_aggr
511519
;;
520+
sort_pushdown)
521+
run_sort_pushdown
522+
;;
523+
sort_pushdown_sorted)
524+
run_sort_pushdown_sorted
525+
;;
512526
sort_tpch)
513527
run_sort_tpch "1"
514528
;;
@@ -1070,6 +1084,22 @@ run_external_aggr() {
10701084
debug_run $CARGO_COMMAND --bin external_aggr -- benchmark --partitions 4 --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG}
10711085
}
10721086

1087+
# Runs the sort pushdown benchmark (without WITH ORDER)
1088+
run_sort_pushdown() {
1089+
TPCH_DIR="${DATA_DIR}/tpch_sf1"
1090+
RESULTS_FILE="${RESULTS_DIR}/sort_pushdown.json"
1091+
echo "Running sort pushdown benchmark (no WITH ORDER)..."
1092+
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
1093+
}
1094+
1095+
# Runs the sort pushdown benchmark with WITH ORDER (enables sort elimination)
1096+
run_sort_pushdown_sorted() {
1097+
TPCH_DIR="${DATA_DIR}/tpch_sf1"
1098+
RESULTS_FILE="${RESULTS_DIR}/sort_pushdown_sorted.json"
1099+
echo "Running sort pushdown benchmark (with WITH ORDER)..."
1100+
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --sorted --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
1101+
}
1102+
10731103
# Runs the sort integration benchmark
10741104
run_sort_tpch() {
10751105
SCALE_FACTOR=$1

benchmarks/compile_profile.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919

2020
"""Compile profile benchmark runner for DataFusion.
2121
22-
Builds the `tpch` benchmark binary with several Cargo profiles (e.g. `--release` or `--profile ci`), runs the full TPC-H suite against the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile time, execution time, and resulting
23-
binary size.
22+
Builds the `dfbench` benchmark binary with several Cargo profiles
23+
(e.g. `--release` or `--profile ci`), runs the full TPC-H suite against
24+
the Parquet data under `benchmarks/data/tpch_sf1`, and reports compile
25+
time, execution time, and resulting binary size.
2426
2527
See `benchmarks/README.md` for usages.
2628
"""
@@ -40,12 +42,15 @@
4042
DEFAULT_ITERATIONS = 1
4143
DEFAULT_FORMAT = "parquet"
4244
DEFAULT_PARTITIONS: int | None = None
43-
TPCH_BINARY = "tpch.exe" if os.name == "nt" else "tpch"
45+
BENCHMARK_PACKAGE = "datafusion-benchmarks"
46+
BENCHMARK_BINARY = "dfbench.exe" if os.name == "nt" else "dfbench"
4447
PROFILE_TARGET_DIR = {
4548
"dev": "debug",
4649
"release": "release",
4750
"ci": "ci",
51+
"ci-optimized": "ci-optimized",
4852
"release-nonlto": "release-nonlto",
53+
"profiling": "profiling",
4954
}
5055

5156

@@ -62,7 +67,10 @@ def parse_args() -> argparse.Namespace:
6267
"--profiles",
6368
nargs="+",
6469
default=list(PROFILE_TARGET_DIR.keys()),
65-
help="Cargo profiles to test (default: dev release ci release-nonlto)",
70+
help=(
71+
"Cargo profiles to test "
72+
"(default: dev release ci ci-optimized release-nonlto profiling)"
73+
),
6674
)
6775
parser.add_argument(
6876
"--data",
@@ -84,9 +92,25 @@ def timed_run(command: Iterable[str]) -> float:
8492

8593
def cargo_build(profile: str) -> float:
8694
if profile == "dev":
87-
command = ["cargo", "build", "--bin", "tpch"]
95+
command = [
96+
"cargo",
97+
"build",
98+
"--package",
99+
BENCHMARK_PACKAGE,
100+
"--bin",
101+
"dfbench",
102+
]
88103
else:
89-
command = ["cargo", "build", "--profile", profile, "--bin", "tpch"]
104+
command = [
105+
"cargo",
106+
"build",
107+
"--profile",
108+
profile,
109+
"--package",
110+
BENCHMARK_PACKAGE,
111+
"--bin",
112+
"dfbench",
113+
]
90114
return timed_run(command)
91115

92116

@@ -102,14 +126,13 @@ def run_benchmark(profile: str, data_path: Path) -> float:
102126
binary_dir = PROFILE_TARGET_DIR.get(profile)
103127
if not binary_dir:
104128
raise ValueError(f"unknown profile '{profile}'")
105-
binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY
129+
binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY
106130
if not binary_path.exists():
107131
raise FileNotFoundError(f"compiled binary not found at {binary_path}")
108132

109133
command = [
110134
str(binary_path),
111-
"benchmark",
112-
"datafusion",
135+
"tpch",
113136
"--iterations",
114137
str(DEFAULT_ITERATIONS),
115138
"--path",
@@ -132,7 +155,7 @@ def run_benchmark(profile: str, data_path: Path) -> float:
132155

133156
def binary_size(profile: str) -> int:
134157
binary_dir = PROFILE_TARGET_DIR[profile]
135-
binary_path = REPO_ROOT / "target" / binary_dir / TPCH_BINARY
158+
binary_path = REPO_ROOT / "target" / binary_dir / BENCHMARK_BINARY
136159
return binary_path.stat().st_size
137160

138161

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- Sort elimination: ORDER BY sort key ASC (full scan)
2+
-- With --sorted: SortExec removed, sequential scan in file order
3+
-- Without --sorted: full SortExec required
4+
SELECT l_orderkey, l_partkey, l_suppkey
5+
FROM lineitem
6+
ORDER BY l_orderkey
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Sort elimination + limit pushdown
2+
-- With --sorted: SortExec removed + limit pushed to DataSourceExec
3+
-- Without --sorted: TopK sort over all data
4+
SELECT l_orderkey, l_partkey, l_suppkey
5+
FROM lineitem
6+
ORDER BY l_orderkey
7+
LIMIT 100
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Sort elimination: wide projection (all columns)
2+
-- Tests sort elimination benefit with larger row payload
3+
SELECT *
4+
FROM lineitem
5+
ORDER BY l_orderkey
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Sort elimination + limit: wide projection
2+
SELECT *
3+
FROM lineitem
4+
ORDER BY l_orderkey
5+
LIMIT 100

benchmarks/src/bin/dfbench.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
3434
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
3535

3636
use datafusion_benchmarks::{
37-
cancellation, clickbench, h2o, hj, imdb, nlj, smj, sort_tpch, tpcds, tpch,
37+
cancellation, clickbench, h2o, hj, imdb, nlj, smj, sort_pushdown, sort_tpch, tpcds,
38+
tpch,
3839
};
3940

4041
#[derive(Debug, Parser)]
@@ -53,6 +54,7 @@ enum Options {
5354
Imdb(imdb::RunOpt),
5455
Nlj(nlj::RunOpt),
5556
Smj(smj::RunOpt),
57+
SortPushdown(sort_pushdown::RunOpt),
5658
SortTpch(sort_tpch::RunOpt),
5759
Tpch(tpch::RunOpt),
5860
Tpcds(tpcds::RunOpt),
@@ -72,6 +74,7 @@ pub async fn main() -> Result<()> {
7274
Options::Imdb(opt) => Box::pin(opt.run()).await,
7375
Options::Nlj(opt) => opt.run().await,
7476
Options::Smj(opt) => opt.run().await,
77+
Options::SortPushdown(opt) => opt.run().await,
7578
Options::SortTpch(opt) => opt.run().await,
7679
Options::Tpch(opt) => Box::pin(opt.run()).await,
7780
Options::Tpcds(opt) => Box::pin(opt.run()).await,

0 commit comments

Comments
 (0)