Skip to content

Commit 46af363

Browse files
adriangbclaude
andcommitted
feat(sql): Postgres-style EXPLAIN (...) option list
Extends DataFusion's `EXPLAIN` to accept a Postgres-style parenthesized option list alongside the existing keyword form, on dialects that enable it (the default `GenericDialect`, `PostgreSqlDialect`, `DuckDbDialect`, etc.). This surfaces the metric-category and verbosity knobs introduced in PR #21160 (currently only reachable via `SET`) directly in the statement, matching Postgres's one-liner ergonomics: EXPLAIN (ANALYZE, VERBOSE, METRICS 'rows,bytes', LEVEL dev) SELECT ... Options recognized: `ANALYZE`, `VERBOSE`, `FORMAT`, `METRICS`, `LEVEL`, `TIMING`, `SUMMARY`, `COSTS`. Statement-level values override the corresponding session config. Postgres-only options that DataFusion does not model (`BUFFERS`, `WAL`, `SETTINGS`, `GENERIC_PLAN`, `MEMORY`) return a clear unsupported-option error rather than silently accepting them. The legacy keyword form (`EXPLAIN ANALYZE VERBOSE FORMAT tree ...`) is unchanged. Parser delegates to sqlparser's `parse_utility_options()` under the dialect gate; a new `ExplainStatementOptions` struct in `datafusion-common` normalizes both forms into a single representation that flows through `explain_to_plan` into the `Analyze` / `Explain` logical plan nodes. `handle_analyze` / `handle_explain` in the physical planner prefer statement-level overrides over session config before constructing `AnalyzeExec` / `ExplainExec`. Proto serialization of the new fields is left as a follow-up (TODO comments in `datafusion/proto/src/logical_plan/mod.rs`); fields default to `None` on the other side, matching prior behavior. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent cfafce4 commit 46af363

12 files changed

Lines changed: 1032 additions & 52 deletions

File tree

datafusion/common/src/format.rs

Lines changed: 469 additions & 0 deletions
Large diffs are not rendered by default.

datafusion/core/src/execution/session_state.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,7 @@ impl SessionState {
682682
stringified_plans,
683683
schema: Arc::clone(&e.schema),
684684
logical_optimization_succeeded: false,
685+
show_statistics: e.show_statistics,
685686
}));
686687
}
687688
Err(e) => return Err(e),
@@ -719,6 +720,7 @@ impl SessionState {
719720
stringified_plans,
720721
schema: Arc::clone(&e.schema),
721722
logical_optimization_succeeded,
723+
show_statistics: e.show_statistics,
722724
}))
723725
} else {
724726
let analyzed_plan = self.analyzer.execute_and_check(

datafusion/core/src/physical_planner.rs

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2528,6 +2528,8 @@ impl DefaultPhysicalPlanner {
25282528

25292529
let config = &session_state.config_options().explain;
25302530
let explain_format = &e.explain_format;
2531+
// Statement-level override wins over session config for show_statistics.
2532+
let show_statistics = e.show_statistics.unwrap_or(config.show_statistics);
25312533

25322534
if !e.logical_optimization_succeeded {
25332535
return Ok(Arc::new(ExplainExec::new(
@@ -2600,7 +2602,7 @@ impl DefaultPhysicalPlanner {
26002602
stringified_plans.push(StringifiedPlan::new(
26012603
InitialPhysicalPlan,
26022604
displayable(input.as_ref())
2603-
.set_show_statistics(config.show_statistics)
2605+
.set_show_statistics(show_statistics)
26042606
.set_show_schema(config.show_schema)
26052607
.indent(e.verbose)
26062608
.to_string(),
@@ -2609,7 +2611,7 @@ impl DefaultPhysicalPlanner {
26092611
// Show statistics + schema in verbose output even if not
26102612
// explicitly requested
26112613
if e.verbose {
2612-
if !config.show_statistics {
2614+
if !show_statistics {
26132615
stringified_plans.push(StringifiedPlan::new(
26142616
InitialPhysicalPlanWithStats,
26152617
displayable(input.as_ref())
@@ -2638,7 +2640,7 @@ impl DefaultPhysicalPlanner {
26382640
stringified_plans.push(StringifiedPlan::new(
26392641
plan_type,
26402642
displayable(plan)
2641-
.set_show_statistics(config.show_statistics)
2643+
.set_show_statistics(show_statistics)
26422644
.set_show_schema(config.show_schema)
26432645
.indent(e.verbose)
26442646
.to_string(),
@@ -2651,7 +2653,7 @@ impl DefaultPhysicalPlanner {
26512653
stringified_plans.push(StringifiedPlan::new(
26522654
FinalPhysicalPlan,
26532655
displayable(input.as_ref())
2654-
.set_show_statistics(config.show_statistics)
2656+
.set_show_statistics(show_statistics)
26552657
.set_show_schema(config.show_schema)
26562658
.indent(e.verbose)
26572659
.to_string(),
@@ -2660,7 +2662,7 @@ impl DefaultPhysicalPlanner {
26602662
// Show statistics + schema in verbose output even if not
26612663
// explicitly requested
26622664
if e.verbose {
2663-
if !config.show_statistics {
2665+
if !show_statistics {
26642666
stringified_plans.push(StringifiedPlan::new(
26652667
FinalPhysicalPlanWithStats,
26662668
displayable(input.as_ref())
@@ -2714,13 +2716,18 @@ impl DefaultPhysicalPlanner {
27142716
let input = self.create_physical_plan(&a.input, session_state).await?;
27152717
let schema = Arc::clone(a.schema.inner());
27162718
let show_statistics = session_state.config_options().explain.show_statistics;
2717-
let analyze_level = session_state.config_options().explain.analyze_level;
2719+
// Statement-level overrides take precedence over the session config.
2720+
let analyze_level = a
2721+
.analyze_level
2722+
.unwrap_or(session_state.config_options().explain.analyze_level);
27182723
let metric_types = analyze_level.included_types();
2719-
let analyze_categories = session_state
2720-
.config_options()
2721-
.explain
2722-
.analyze_categories
2723-
.clone();
2724+
let analyze_categories = a.analyze_categories.clone().unwrap_or_else(|| {
2725+
session_state
2726+
.config_options()
2727+
.explain
2728+
.analyze_categories
2729+
.clone()
2730+
});
27242731
let metric_categories = match analyze_categories {
27252732
ExplainAnalyzeCategories::All => None,
27262733
ExplainAnalyzeCategories::Only(cats) => Some(cats),
@@ -3844,6 +3851,7 @@ mod tests {
38443851
stringified_plans,
38453852
schema: schema.to_dfschema_ref().unwrap(),
38463853
logical_optimization_succeeded: false,
3854+
show_statistics: None,
38473855
};
38483856
let plan = planner
38493857
.handle_explain(&explain, &ctx.state())

datafusion/core/tests/sql/explain_analyze.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,3 +1267,115 @@ async fn explain_analyze_categories() {
12671267
);
12681268
}
12691269
}
1270+
1271+
/// Returns a [`SessionContext`] configured with the PostgreSQL dialect so
1272+
/// that `EXPLAIN (option, ...)` utility-option syntax is accepted.
1273+
fn session_ctx_with_pg_dialect() -> SessionContext {
1274+
use std::str::FromStr;
1275+
let mut config = SessionConfig::new();
1276+
let options = config.options_mut();
1277+
options.sql_parser.dialect =
1278+
datafusion::config::Dialect::from_str("PostgreSQL").unwrap();
1279+
SessionContext::new_with_config(config)
1280+
}
1281+
1282+
async fn collect_explain(ctx: &SessionContext, sql: &str) -> String {
1283+
let dataframe = ctx.sql(sql).await.unwrap();
1284+
let batches = dataframe.collect().await.unwrap();
1285+
arrow::util::pretty::pretty_format_batches(&batches)
1286+
.unwrap()
1287+
.to_string()
1288+
}
1289+
1290+
/// Verifies that the Postgres-style `EXPLAIN (METRICS '...')` form produces
1291+
/// the same category filtering as `SET datafusion.explain.analyze_categories`.
1292+
#[tokio::test]
1293+
async fn explain_analyze_paren_metrics_filtering() {
1294+
let ctx = session_ctx_with_pg_dialect();
1295+
let sql = "EXPLAIN (ANALYZE, METRICS 'rows') \
1296+
SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC";
1297+
let plan = collect_explain(&ctx, sql).await;
1298+
assert!(
1299+
plan.contains("output_rows"),
1300+
"rows category should include output_rows:\n{plan}"
1301+
);
1302+
assert!(
1303+
!plan.contains("elapsed_compute"),
1304+
"rows-only METRICS should exclude elapsed_compute:\n{plan}"
1305+
);
1306+
assert!(
1307+
!plan.contains("output_bytes"),
1308+
"rows-only METRICS should exclude output_bytes:\n{plan}"
1309+
);
1310+
}
1311+
1312+
/// Verifies that a statement-level METRICS overrides the session config.
1313+
#[tokio::test]
1314+
async fn explain_analyze_paren_metrics_overrides_session_config() {
1315+
let ctx = session_ctx_with_pg_dialect();
1316+
// Session default: show only `rows` via config.
1317+
{
1318+
let state = ctx.state_ref();
1319+
let mut state = state.write();
1320+
state.config_mut().options_mut().explain.analyze_categories =
1321+
ExplainAnalyzeCategories::Only(vec![MetricCategory::Rows]);
1322+
}
1323+
// Statement overrides with 'bytes' — we should see output_bytes but not
1324+
// output_rows (except row-count metrics with the `output_bytes` substring
1325+
// are avoided because the metric names are distinct).
1326+
let sql = "EXPLAIN (ANALYZE, METRICS 'bytes') \
1327+
SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC";
1328+
let plan = collect_explain(&ctx, sql).await;
1329+
assert!(
1330+
plan.contains("output_bytes"),
1331+
"statement-level METRICS='bytes' should show output_bytes:\n{plan}"
1332+
);
1333+
assert!(
1334+
!plan.contains("output_rows"),
1335+
"statement-level METRICS='bytes' should hide output_rows:\n{plan}"
1336+
);
1337+
}
1338+
1339+
/// Verifies that `EXPLAIN (ANALYZE, LEVEL summary)` only shows summary metrics,
1340+
/// overriding the session default of `dev`.
1341+
#[tokio::test]
1342+
async fn explain_analyze_paren_level_overrides_session_config() {
1343+
let ctx = session_ctx_with_pg_dialect();
1344+
// Session default: Dev
1345+
{
1346+
let state = ctx.state_ref();
1347+
let mut state = state.write();
1348+
state.config_mut().options_mut().explain.analyze_level = MetricType::Dev;
1349+
}
1350+
let sql = "EXPLAIN (ANALYZE, LEVEL summary) \
1351+
SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC";
1352+
let plan = collect_explain(&ctx, sql).await;
1353+
// `spill_count` is Dev-only; `output_rows` is Summary.
1354+
assert!(
1355+
plan.contains("output_rows"),
1356+
"summary should still show output_rows:\n{plan}"
1357+
);
1358+
assert!(
1359+
!plan.contains("spill_count"),
1360+
"summary should hide Dev-only spill_count:\n{plan}"
1361+
);
1362+
}
1363+
1364+
/// Verifies that `EXPLAIN (ANALYZE, BUFFERS)` returns a helpful error.
1365+
#[tokio::test]
1366+
async fn explain_paren_buffers_rejected() {
1367+
let ctx = session_ctx_with_pg_dialect();
1368+
let err = ctx
1369+
.sql("EXPLAIN (ANALYZE, BUFFERS) SELECT 1")
1370+
.await
1371+
.unwrap_err();
1372+
let msg = err.to_string();
1373+
assert!(
1374+
msg.contains("BUFFERS"),
1375+
"error should mention BUFFERS: {msg}"
1376+
);
1377+
assert!(
1378+
msg.contains("not supported"),
1379+
"error should say not supported: {msg}"
1380+
);
1381+
}

datafusion/expr/src/logical_plan/builder.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,8 @@ impl LogicalPlanBuilder {
13291329
verbose: explain_option.verbose,
13301330
input: self.plan,
13311331
schema,
1332+
analyze_level: None,
1333+
analyze_categories: None,
13321334
})))
13331335
} else {
13341336
let stringified_plans =
@@ -1341,6 +1343,7 @@ impl LogicalPlanBuilder {
13411343
stringified_plans,
13421344
schema,
13431345
logical_optimization_succeeded: false,
1346+
show_statistics: explain_option.show_statistics,
13441347
})))
13451348
}
13461349
}

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ use crate::{
5252

5353
use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef};
5454
use datafusion_common::cse::{NormalizeEq, Normalizeable};
55-
use datafusion_common::format::ExplainFormat;
55+
use datafusion_common::format::{ExplainAnalyzeCategories, ExplainFormat, MetricType};
5656
use datafusion_common::metadata::check_metadata_with_storage_equal;
5757
use datafusion_common::tree_node::{
5858
Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion,
@@ -1094,6 +1094,8 @@ impl LogicalPlan {
10941094
verbose: a.verbose,
10951095
schema: Arc::clone(&a.schema),
10961096
input: Arc::new(input),
1097+
analyze_level: a.analyze_level,
1098+
analyze_categories: a.analyze_categories.clone(),
10971099
}))
10981100
}
10991101
LogicalPlan::Explain(e) => {
@@ -1106,6 +1108,7 @@ impl LogicalPlan {
11061108
stringified_plans: e.stringified_plans.clone(),
11071109
schema: Arc::clone(&e.schema),
11081110
logical_optimization_succeeded: e.logical_optimization_succeeded,
1111+
show_statistics: e.show_statistics,
11091112
}))
11101113
}
11111114
LogicalPlan::Statement(Statement::Prepare(Prepare {
@@ -3205,6 +3208,9 @@ pub struct ExplainOption {
32053208
pub analyze: bool,
32063209
/// Output syntax/format
32073210
pub format: ExplainFormat,
3211+
/// Statement-level override for `datafusion.explain.show_statistics`.
3212+
/// `None` means "fall back to session config".
3213+
pub show_statistics: Option<bool>,
32083214
}
32093215

32103216
impl Default for ExplainOption {
@@ -3213,6 +3219,7 @@ impl Default for ExplainOption {
32133219
verbose: false,
32143220
analyze: false,
32153221
format: ExplainFormat::Indent,
3222+
show_statistics: None,
32163223
}
32173224
}
32183225
}
@@ -3235,6 +3242,13 @@ impl ExplainOption {
32353242
self.format = format;
32363243
self
32373244
}
3245+
3246+
/// Builder-style setter for a statement-level override of
3247+
/// `datafusion.explain.show_statistics`.
3248+
pub fn with_show_statistics(mut self, show_statistics: Option<bool>) -> Self {
3249+
self.show_statistics = show_statistics;
3250+
self
3251+
}
32383252
}
32393253

32403254
/// Produces a relation with string representations of
@@ -3258,6 +3272,9 @@ pub struct Explain {
32583272
pub schema: DFSchemaRef,
32593273
/// Used by physical planner to check if should proceed with planning
32603274
pub logical_optimization_succeeded: bool,
3275+
/// Statement-level override for `datafusion.explain.show_statistics`.
3276+
/// When `None`, the session-config value is used.
3277+
pub show_statistics: Option<bool>,
32613278
}
32623279

32633280
// Manual implementation needed because of `schema` field. Comparison excludes this field.
@@ -3273,18 +3290,22 @@ impl PartialOrd for Explain {
32733290
pub stringified_plans: &'a Vec<StringifiedPlan>,
32743291
/// Used by physical planner to check if should proceed with planning
32753292
pub logical_optimization_succeeded: &'a bool,
3293+
/// Statement-level override for show_statistics
3294+
pub show_statistics: &'a Option<bool>,
32763295
}
32773296
let comparable_self = ComparableExplain {
32783297
verbose: &self.verbose,
32793298
plan: &self.plan,
32803299
stringified_plans: &self.stringified_plans,
32813300
logical_optimization_succeeded: &self.logical_optimization_succeeded,
3301+
show_statistics: &self.show_statistics,
32823302
};
32833303
let comparable_other = ComparableExplain {
32843304
verbose: &other.verbose,
32853305
plan: &other.plan,
32863306
stringified_plans: &other.stringified_plans,
32873307
logical_optimization_succeeded: &other.logical_optimization_succeeded,
3308+
show_statistics: &other.show_statistics,
32883309
};
32893310
comparable_self
32903311
.partial_cmp(&comparable_other)
@@ -3303,9 +3324,18 @@ pub struct Analyze {
33033324
pub input: Arc<LogicalPlan>,
33043325
/// The output schema of the explain (2 columns of text)
33053326
pub schema: DFSchemaRef,
3327+
/// Statement-level override for `datafusion.explain.analyze_level`.
3328+
/// When `None`, the session-config value is used.
3329+
pub analyze_level: Option<MetricType>,
3330+
/// Statement-level override for `datafusion.explain.analyze_categories`.
3331+
/// When `None`, the session-config value is used.
3332+
pub analyze_categories: Option<ExplainAnalyzeCategories>,
33063333
}
33073334

3308-
// Manual implementation needed because of `schema` field. Comparison excludes this field.
3335+
// Manual implementation needed because of `schema` field and the lack of
3336+
// `PartialOrd` on `MetricType` / `ExplainAnalyzeCategories`. Ordering is
3337+
// defined over `(verbose, input)` and then falls back to `==` for the
3338+
// remaining statement-level override fields.
33093339
impl PartialOrd for Analyze {
33103340
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
33113341
match self.verbose.partial_cmp(&other.verbose) {

datafusion/expr/src/logical_plan/tree_node.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ impl TreeNode for LogicalPlan {
203203
stringified_plans,
204204
schema,
205205
logical_optimization_succeeded,
206+
show_statistics,
206207
}) => plan.map_elements(f)?.update_data(|plan| {
207208
LogicalPlan::Explain(Explain {
208209
verbose,
@@ -211,17 +212,22 @@ impl TreeNode for LogicalPlan {
211212
stringified_plans,
212213
schema,
213214
logical_optimization_succeeded,
215+
show_statistics,
214216
})
215217
}),
216218
LogicalPlan::Analyze(Analyze {
217219
verbose,
218220
input,
219221
schema,
222+
analyze_level,
223+
analyze_categories,
220224
}) => input.map_elements(f)?.update_data(|input| {
221225
LogicalPlan::Analyze(Analyze {
222226
verbose,
223227
input,
224228
schema,
229+
analyze_level,
230+
analyze_categories,
225231
})
226232
}),
227233
LogicalPlan::Dml(DmlStatement {

0 commit comments

Comments
 (0)