Skip to content

Commit 92922ae

Browse files
authored
Merge branch 'main' into bugfux/pushdown-filter-order
2 parents fe40d77 + 42cd2fa commit 92922ae

40 files changed

Lines changed: 5443 additions & 156 deletions

.asf.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ github:
5858
- "Check Markdown Links"
5959
- "Validate required_status_checks in .asf.yaml"
6060
- "Spell Check with Typos"
61+
- "Circular Dependency Check"
62+
- "Detect Unused Dependencies"
6163
# needs to be updated as part of the release process
6264
# .asf.yaml doesn't support wildcard branch protection rules, only exact branch names
6365
# https://github.com/apache/infrastructure-asfyaml?tab=readme-ov-file#branch-protection

.github/workflows/dependencies.yml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,7 @@ on:
2525
push:
2626
branches-ignore:
2727
- 'gh-readonly-queue/**'
28-
paths:
29-
- "**/Cargo.toml"
30-
- "**/Cargo.lock"
3128
pull_request:
32-
paths:
33-
- "**/Cargo.toml"
34-
- "**/Cargo.lock"
3529
merge_group:
3630
# manual trigger
3731
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
@@ -42,7 +36,7 @@ permissions:
4236

4337
jobs:
4438
depcheck:
45-
name: circular dependency check
39+
name: Circular Dependency Check
4640
runs-on: ubuntu-latest
4741
container:
4842
image: amd64/rust
@@ -61,6 +55,7 @@ jobs:
6155
cargo run
6256
6357
detect-unused-dependencies:
58+
name: Detect Unused Dependencies
6459
runs-on: ubuntu-latest
6560
container:
6661
image: amd64/rust

.github/workflows/docs.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ on:
2727
name: Deploy DataFusion site
2828

2929
jobs:
30-
permissions:
31-
contents: write
3230
build-docs:
31+
permissions:
32+
contents: write
3333
name: Build docs
3434
runs-on: ubuntu-latest
3535
steps:

Cargo.lock

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmarks/.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
data
2-
results
2+
data_csv
3+
./results/
34
venv
5+
!sql_benchmarks/**/results/

benchmarks/Cargo.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ mimalloc_extended = ["libmimalloc-sys/extended"]
4343
arrow = { workspace = true }
4444
async-trait = "0.1"
4545
bytes = { workspace = true }
46-
clap = { version = "4.5.60", features = ["derive"] }
46+
clap = { version = "4.6.0", features = ["derive", "env"] }
47+
criterion = { workspace = true, features = ["html_reports"] }
4748
datafusion = { workspace = true, default-features = true }
4849
datafusion-common = { workspace = true, default-features = true }
4950
env_logger = { workspace = true }
@@ -63,3 +64,8 @@ tokio-util = { version = "0.7.17" }
6364

6465
[dev-dependencies]
6566
datafusion-proto = { workspace = true }
67+
tempfile = { workspace = true }
68+
69+
[[bench]]
70+
harness = false
71+
name = "sql"

benchmarks/bench.sh

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ BENCHMARK=all
4141
DATAFUSION_DIR=${DATAFUSION_DIR:-$SCRIPT_DIR/..}
4242
DATA_DIR=${DATA_DIR:-$SCRIPT_DIR/data}
4343
CARGO_COMMAND=${CARGO_COMMAND:-"cargo run --release"}
44+
SQL_CARGO_COMMAND=${SQL_CARGO_COMMAND:-"cargo bench --bench sql"}
4445
PREFER_HASH_JOIN=${PREFER_HASH_JOIN:-true}
4546
SIMULATE_LATENCY=${SIMULATE_LATENCY:-false}
4647

@@ -685,14 +686,16 @@ run_tpch() {
685686
echo "Internal error: Scale factor not specified"
686687
exit 1
687688
fi
688-
TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"
689-
690-
RESULTS_FILE="${RESULTS_DIR}/tpch_sf${SCALE_FACTOR}.json"
691-
echo "RESULTS_FILE: ${RESULTS_FILE}"
689+
FORMAT=$2
692690
echo "Running tpch benchmark..."
693691

694-
FORMAT=$2
695-
debug_run $CARGO_COMMAND --bin dfbench -- tpch --iterations 5 --path "${TPCH_DIR}" --scale-factor "${SCALE_FACTOR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format ${FORMAT} -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
692+
debug_run env BENCH_NAME=tpch \
693+
BENCH_SIZE="${SCALE_FACTOR}" \
694+
PREFER_HASH_JOIN="${PREFER_HASH_JOIN}" \
695+
TPCH_FILE_TYPE="${FORMAT}" \
696+
SIMULATE_LATENCY="${SIMULATE_LATENCY}" \
697+
${QUERY:+BENCH_QUERY="${QUERY}"} \
698+
bash -c "$SQL_CARGO_COMMAND"
696699
}
697700

698701
# Runs the tpch in memory (needs tpch parquet data)
@@ -702,13 +705,15 @@ run_tpch_mem() {
702705
echo "Internal error: Scale factor not specified"
703706
exit 1
704707
fi
705-
TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"
706-
707-
RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json"
708-
echo "RESULTS_FILE: ${RESULTS_FILE}"
709708
echo "Running tpch_mem benchmark..."
710-
# -m means in memory
711-
debug_run $CARGO_COMMAND --bin dfbench -- tpch --iterations 5 --path "${TPCH_DIR}" --scale-factor "${SCALE_FACTOR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
709+
710+
debug_run env BENCH_NAME=tpch \
711+
BENCH_SIZE="${SCALE_FACTOR}" \
712+
TPCH_FILE_TYPE="mem" \
713+
PREFER_HASH_JOIN="${PREFER_HASH_JOIN}" \
714+
SIMULATE_LATENCY="${SIMULATE_LATENCY}" \
715+
${QUERY:+BENCH_QUERY="${QUERY}"} \
716+
bash -c "$SQL_CARGO_COMMAND"
712717
}
713718

714719
# Runs the tpcds benchmark

0 commit comments

Comments
 (0)