Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 43 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ snmalloc = ["snmalloc-rs"]
mimalloc_extended = ["libmimalloc-sys/extended"]

[dependencies]
anstream = "1.0"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I double checked and it seems like anstream is already a dependency of clap_builder, so this is not not a (net) new dependency

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coloured output ftw :) At least locally, depends on your terminal I think.

arrow = { workspace = true }
async-trait = "0.1"
bytes = { workspace = true }
clap = { version = "4.6.0", features = ["derive", "env"] }
clap = { version = "4.6.1", features = ["derive", "env", "color"] }
criterion = { workspace = true, features = ["html_reports"] }
datafusion = { workspace = true, default-features = true }
datafusion-common = { workspace = true, default-features = true }
Expand All @@ -61,6 +62,7 @@ serde_json = { workspace = true }
snmalloc-rs = { version = "0.7", optional = true }
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
tokio-util = { version = "0.7.17" }
toml = "1.1"

[dev-dependencies]
datafusion-proto = { workspace = true }
Expand Down
16 changes: 16 additions & 0 deletions benchmarks/sql_benchmarks/tpch/tpch.suite
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name = "tpch"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed? It seems somewhat redundant with SqlBenchmark, yes?

pub struct SqlBenchmark {
/// Human‑readable name of the benchmark.
name: String,
/// Top‑level group name (derived from the file path or defined in a benchmark).
group: String,
/// Subgroup name, often a logical grouping.
subgroup: String,
/// Full path to the benchmark file.
benchmark_path: PathBuf,
/// Mapping of placeholder keys to concrete values (e.g. `"BENCHMARK_DIR"`).
replacement_mapping: HashMap<String, String>,
/// Expected string that must appear in the physical plan of the queries.
expect: Vec<String>,
/// All SQL queries grouped by directive (`load`, `run`, etc.).
queries: HashMap<QueryDirective, Vec<String>>,
/// Queries whose results are persisted to disk for later comparison.
result_queries: Vec<BenchmarkQuery>,
/// Queries whose results are asserted against an expected table.
assert_queries: Vec<BenchmarkQuery>,
/// Flag indicating whether the benchmark has been fully loaded
is_loaded: bool,
/// Stores the last run results if needed so they can be compared or persisted.
last_results: Option<Vec<RecordBatch>>,
/// echo statements
echo: Vec<String>,
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name doesn't really need to be there. The suite file though does for later PR's. The SqlBenchmark is per file, the suite is ... per suite.

description = "TPC-H SQL benchmarks"

[[options]]
name = "format"
short = "f"
default = "parquet"
values = ["parquet", "csv", "mem"]
help = "Selects the TPC-H data format."

[[options]]
name = "scale-factor"
short = "sf"
default = "1"
values = ["1", "10"]
help = "Selects the TPC-H scale factor."
81 changes: 81 additions & 0 deletions benchmarks/src/benchmark_runner/cli.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! CLI construction and argument conversion for `benchmark_runner`.
//!
//! This module owns the clap command tree for the initial runner surface:
//! top-level help and suite listing.

use clap::builder::styling::{AnsiColor, Styles};
use clap::{ArgMatches, Command};
use datafusion_common::{Result, exec_datafusion_err};

const HELP_STYLES: Styles = Styles::styled()
.header(AnsiColor::Green.on_default().bold())
.usage(AnsiColor::Green.on_default().bold())
.literal(AnsiColor::Cyan.on_default().bold())
.placeholder(AnsiColor::Cyan.on_default());

#[derive(Debug)]
pub enum RunnerCommand {
Help,
List,
}

/// Builds the command tree for help and suite listing.
pub fn build_cli() -> Command {
Command::new("benchmark_runner")
.about("Inspect DataFusion SQL benchmark suites.")
.styles(HELP_STYLES)
.subcommand_required(false)
.arg_required_else_help(false)
.disable_help_subcommand(true)
.subcommand(Command::new("help").about("Print help"))
.subcommand(Command::new("list").about("List SQL benchmark suites"))
}

/// Converts clap matches into a typed command.
pub(crate) fn command_from_matches(matches: &ArgMatches) -> Result<RunnerCommand> {
match matches.subcommand() {
None | Some(("help", _)) => Ok(RunnerCommand::Help),
Some(("list", _)) => Ok(RunnerCommand::List),
Some((name, _)) => Err(exec_datafusion_err!("Unknown command '{name}'")),
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn list_rejects_unrecognized_options() {
let matches =
build_cli().try_get_matches_from(["benchmark_runner", "list", "--format"]);

assert!(matches.is_err(), "{matches:?}");
}

#[test]
fn help_mentions_list_command() {
let err = build_cli()
.try_get_matches_from(["benchmark_runner", "--help"])
.unwrap_err();
let help = err.to_string();

assert!(help.contains("list"));
}
}
104 changes: 104 additions & 0 deletions benchmarks/src/benchmark_runner/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Command-line inspection for SQL benchmark suites.
//!
//! This module backs the `benchmark_runner` binary. The initial command
//! surface lists discovered SQL benchmark suites from `.suite` files and
//! prints the top-level help.
//!
//! Common invocations:
//!
//! ```text
//! cargo run --bin benchmark_runner -- --help
//! cargo run --release --bin benchmark_runner -- list
//! ```
//!
//! The public entry point is [`run_cli`]. The submodules are kept private so
//! the command-line flow remains the single supported API:
//!
//! - `cli` builds the clap command tree and parses the selected command.
//! - `suite` loads `.suite` metadata and discovers benchmark query files.
//! - `output` formats colored `list` command output.

mod cli;
mod output;
mod suite;

use crate::benchmark_runner::cli::{RunnerCommand, build_cli, command_from_matches};
use crate::benchmark_runner::output::format_suite_list_styled;
use crate::benchmark_runner::suite::SuiteRegistry;
use datafusion::error::Result;
use datafusion_common::DataFusionError;
use std::io::Write;
use std::path::PathBuf;

/// Runs the benchmark runner command-line flow for the provided argument list.
///
/// This discovers suite metadata, parses the help/list command, and dispatches
/// to the selected implementation.
pub fn run_cli<I, T>(args: I) -> Result<()>
where
I: IntoIterator<Item = T>,
T: Clone + Into<std::ffi::OsString>,
{
let benchmark_dir = default_benchmark_dir();
let registry = SuiteRegistry::discover(&benchmark_dir)?;
let mut cli = build_cli();
let matches = match cli.try_get_matches_from_mut(args) {
Ok(matches) => matches,
Err(e) if e.kind() == clap::error::ErrorKind::DisplayHelp => {
e.print()?;
return Ok(());
}
Err(e) => return Err(DataFusionError::External(Box::new(e))),
};
let command = command_from_matches(&matches)?;

match command {
RunnerCommand::Help => {
cli.print_long_help()?;
println!();
}
RunnerCommand::List => {
print_styled(&format_suite_list_styled(&registry)?)?;
}
}

Ok(())
}

/// Writes already styled output through `anstream` so ANSI color handling
/// matches clap help output on supported terminals.
fn print_styled(output: &str) -> Result<()> {
let mut stdout = anstream::stdout();

write!(&mut stdout, "{output}")
.map_err(|e| DataFusionError::External(Box::new(e)))?;
Ok(())
}

/// Resolves the SQL benchmark root from either the repository root or the
/// benchmarks crate manifest directory.
fn default_benchmark_dir() -> PathBuf {
let repo_root_path = PathBuf::from("benchmarks/sql_benchmarks");
if repo_root_path.exists() {
repo_root_path
} else {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("sql_benchmarks")
}
}
Loading
Loading