Skip to content

Commit 54231ee

Browse files
adriangbclaude
andcommitted
feat(sql): Postgres-style EXPLAIN (...) option list
Extends DataFusion's `EXPLAIN` to accept a Postgres-style parenthesized option list alongside the existing keyword form, on dialects that enable it (the default `GenericDialect`, `PostgreSqlDialect`, `DuckDbDialect`, etc.). This surfaces the metric-category and verbosity knobs introduced in PR #21160 (currently only reachable via `SET`) directly in the statement, matching Postgres's one-liner ergonomics: EXPLAIN (ANALYZE, VERBOSE, METRICS 'rows,bytes', LEVEL dev) SELECT ... Options recognized: `ANALYZE`, `VERBOSE`, `FORMAT`, `METRICS`, `LEVEL`, `TIMING`, `SUMMARY`, `COSTS`. Statement-level values override the corresponding session config. Postgres-only options that DataFusion does not model (`BUFFERS`, `WAL`, `SETTINGS`, `GENERIC_PLAN`, `MEMORY`) return a clear unsupported-option error rather than silently accepting them. The legacy keyword form (`EXPLAIN ANALYZE VERBOSE FORMAT tree ...`) is unchanged. Parser delegates to sqlparser's `parse_utility_options()` under the dialect gate; a new `ExplainStatementOptions` struct in `datafusion-common` normalizes both forms into a single representation that flows through `explain_to_plan` into the `Analyze` / `Explain` logical plan nodes. `handle_analyze` / `handle_explain` in the physical planner prefer statement-level overrides over session config before constructing `AnalyzeExec` / `ExplainExec`. Proto serialization of the new fields is left as a follow-up (TODO comments in `datafusion/proto/src/logical_plan/mod.rs`); fields default to `None` on the other side, matching prior behavior. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 5c653be commit 54231ee

12 files changed

Lines changed: 1032 additions & 52 deletions

File tree

datafusion/common/src/format.rs

Lines changed: 469 additions & 0 deletions
Large diffs are not rendered by default.

datafusion/core/src/execution/session_state.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,7 @@ impl SessionState {
682682
stringified_plans,
683683
schema: Arc::clone(&e.schema),
684684
logical_optimization_succeeded: false,
685+
show_statistics: e.show_statistics,
685686
}));
686687
}
687688
Err(e) => return Err(e),
@@ -719,6 +720,7 @@ impl SessionState {
719720
stringified_plans,
720721
schema: Arc::clone(&e.schema),
721722
logical_optimization_succeeded,
723+
show_statistics: e.show_statistics,
722724
}))
723725
} else {
724726
let analyzed_plan = self.analyzer.execute_and_check(

datafusion/core/src/physical_planner.rs

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,6 +2536,8 @@ impl DefaultPhysicalPlanner {
25362536

25372537
let config = &session_state.config_options().explain;
25382538
let explain_format = &e.explain_format;
2539+
// Statement-level override wins over session config for show_statistics.
2540+
let show_statistics = e.show_statistics.unwrap_or(config.show_statistics);
25392541

25402542
if !e.logical_optimization_succeeded {
25412543
return Ok(Arc::new(ExplainExec::new(
@@ -2608,7 +2610,7 @@ impl DefaultPhysicalPlanner {
26082610
stringified_plans.push(StringifiedPlan::new(
26092611
InitialPhysicalPlan,
26102612
displayable(input.as_ref())
2611-
.set_show_statistics(config.show_statistics)
2613+
.set_show_statistics(show_statistics)
26122614
.set_show_schema(config.show_schema)
26132615
.indent(e.verbose)
26142616
.to_string(),
@@ -2617,7 +2619,7 @@ impl DefaultPhysicalPlanner {
26172619
// Show statistics + schema in verbose output even if not
26182620
// explicitly requested
26192621
if e.verbose {
2620-
if !config.show_statistics {
2622+
if !show_statistics {
26212623
stringified_plans.push(StringifiedPlan::new(
26222624
InitialPhysicalPlanWithStats,
26232625
displayable(input.as_ref())
@@ -2646,7 +2648,7 @@ impl DefaultPhysicalPlanner {
26462648
stringified_plans.push(StringifiedPlan::new(
26472649
plan_type,
26482650
displayable(plan)
2649-
.set_show_statistics(config.show_statistics)
2651+
.set_show_statistics(show_statistics)
26502652
.set_show_schema(config.show_schema)
26512653
.indent(e.verbose)
26522654
.to_string(),
@@ -2659,7 +2661,7 @@ impl DefaultPhysicalPlanner {
26592661
stringified_plans.push(StringifiedPlan::new(
26602662
FinalPhysicalPlan,
26612663
displayable(input.as_ref())
2662-
.set_show_statistics(config.show_statistics)
2664+
.set_show_statistics(show_statistics)
26632665
.set_show_schema(config.show_schema)
26642666
.indent(e.verbose)
26652667
.to_string(),
@@ -2668,7 +2670,7 @@ impl DefaultPhysicalPlanner {
26682670
// Show statistics + schema in verbose output even if not
26692671
// explicitly requested
26702672
if e.verbose {
2671-
if !config.show_statistics {
2673+
if !show_statistics {
26722674
stringified_plans.push(StringifiedPlan::new(
26732675
FinalPhysicalPlanWithStats,
26742676
displayable(input.as_ref())
@@ -2722,13 +2724,18 @@ impl DefaultPhysicalPlanner {
27222724
let input = self.create_physical_plan(&a.input, session_state).await?;
27232725
let schema = Arc::clone(a.schema.inner());
27242726
let show_statistics = session_state.config_options().explain.show_statistics;
2725-
let analyze_level = session_state.config_options().explain.analyze_level;
2727+
// Statement-level overrides take precedence over the session config.
2728+
let analyze_level = a
2729+
.analyze_level
2730+
.unwrap_or(session_state.config_options().explain.analyze_level);
27262731
let metric_types = analyze_level.included_types();
2727-
let analyze_categories = session_state
2728-
.config_options()
2729-
.explain
2730-
.analyze_categories
2731-
.clone();
2732+
let analyze_categories = a.analyze_categories.clone().unwrap_or_else(|| {
2733+
session_state
2734+
.config_options()
2735+
.explain
2736+
.analyze_categories
2737+
.clone()
2738+
});
27322739
let metric_categories = match analyze_categories {
27332740
ExplainAnalyzeCategories::All => None,
27342741
ExplainAnalyzeCategories::Only(cats) => Some(cats),
@@ -3853,6 +3860,7 @@ mod tests {
38533860
stringified_plans,
38543861
schema: schema.to_dfschema_ref().unwrap(),
38553862
logical_optimization_succeeded: false,
3863+
show_statistics: None,
38563864
};
38573865
let plan = planner
38583866
.handle_explain(&explain, &ctx.state())

datafusion/core/tests/sql/explain_analyze.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,3 +1267,115 @@ async fn explain_analyze_categories() {
12671267
);
12681268
}
12691269
}
1270+
1271+
/// Returns a [`SessionContext`] configured with the PostgreSQL dialect so
1272+
/// that `EXPLAIN (option, ...)` utility-option syntax is accepted.
1273+
fn session_ctx_with_pg_dialect() -> SessionContext {
1274+
use std::str::FromStr;
1275+
let mut config = SessionConfig::new();
1276+
let options = config.options_mut();
1277+
options.sql_parser.dialect =
1278+
datafusion::config::Dialect::from_str("PostgreSQL").unwrap();
1279+
SessionContext::new_with_config(config)
1280+
}
1281+
1282+
async fn collect_explain(ctx: &SessionContext, sql: &str) -> String {
1283+
let dataframe = ctx.sql(sql).await.unwrap();
1284+
let batches = dataframe.collect().await.unwrap();
1285+
arrow::util::pretty::pretty_format_batches(&batches)
1286+
.unwrap()
1287+
.to_string()
1288+
}
1289+
1290+
/// Verifies that the Postgres-style `EXPLAIN (METRICS '...')` form produces
1291+
/// the same category filtering as `SET datafusion.explain.analyze_categories`.
1292+
#[tokio::test]
1293+
async fn explain_analyze_paren_metrics_filtering() {
1294+
let ctx = session_ctx_with_pg_dialect();
1295+
let sql = "EXPLAIN (ANALYZE, METRICS 'rows') \
1296+
SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC";
1297+
let plan = collect_explain(&ctx, sql).await;
1298+
assert!(
1299+
plan.contains("output_rows"),
1300+
"rows category should include output_rows:\n{plan}"
1301+
);
1302+
assert!(
1303+
!plan.contains("elapsed_compute"),
1304+
"rows-only METRICS should exclude elapsed_compute:\n{plan}"
1305+
);
1306+
assert!(
1307+
!plan.contains("output_bytes"),
1308+
"rows-only METRICS should exclude output_bytes:\n{plan}"
1309+
);
1310+
}
1311+
1312+
/// Verifies that a statement-level METRICS overrides the session config.
1313+
#[tokio::test]
1314+
async fn explain_analyze_paren_metrics_overrides_session_config() {
1315+
let ctx = session_ctx_with_pg_dialect();
1316+
// Session default: show only `rows` via config.
1317+
{
1318+
let state = ctx.state_ref();
1319+
let mut state = state.write();
1320+
state.config_mut().options_mut().explain.analyze_categories =
1321+
ExplainAnalyzeCategories::Only(vec![MetricCategory::Rows]);
1322+
}
1323+
// Statement overrides with 'bytes' — we should see output_bytes but not
1324+
// output_rows (except row-count metrics with the `output_bytes` substring
1325+
// are avoided because the metric names are distinct).
1326+
let sql = "EXPLAIN (ANALYZE, METRICS 'bytes') \
1327+
SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC";
1328+
let plan = collect_explain(&ctx, sql).await;
1329+
assert!(
1330+
plan.contains("output_bytes"),
1331+
"statement-level METRICS='bytes' should show output_bytes:\n{plan}"
1332+
);
1333+
assert!(
1334+
!plan.contains("output_rows"),
1335+
"statement-level METRICS='bytes' should hide output_rows:\n{plan}"
1336+
);
1337+
}
1338+
1339+
/// Verifies that `EXPLAIN (ANALYZE, LEVEL summary)` only shows summary metrics,
1340+
/// overriding the session default of `dev`.
1341+
#[tokio::test]
1342+
async fn explain_analyze_paren_level_overrides_session_config() {
1343+
let ctx = session_ctx_with_pg_dialect();
1344+
// Session default: Dev
1345+
{
1346+
let state = ctx.state_ref();
1347+
let mut state = state.write();
1348+
state.config_mut().options_mut().explain.analyze_level = MetricType::Dev;
1349+
}
1350+
let sql = "EXPLAIN (ANALYZE, LEVEL summary) \
1351+
SELECT * FROM generate_series(10) as t1(v1) ORDER BY v1 DESC";
1352+
let plan = collect_explain(&ctx, sql).await;
1353+
// `spill_count` is Dev-only; `output_rows` is Summary.
1354+
assert!(
1355+
plan.contains("output_rows"),
1356+
"summary should still show output_rows:\n{plan}"
1357+
);
1358+
assert!(
1359+
!plan.contains("spill_count"),
1360+
"summary should hide Dev-only spill_count:\n{plan}"
1361+
);
1362+
}
1363+
1364+
/// Verifies that `EXPLAIN (ANALYZE, BUFFERS)` returns a helpful error.
1365+
#[tokio::test]
1366+
async fn explain_paren_buffers_rejected() {
1367+
let ctx = session_ctx_with_pg_dialect();
1368+
let err = ctx
1369+
.sql("EXPLAIN (ANALYZE, BUFFERS) SELECT 1")
1370+
.await
1371+
.unwrap_err();
1372+
let msg = err.to_string();
1373+
assert!(
1374+
msg.contains("BUFFERS"),
1375+
"error should mention BUFFERS: {msg}"
1376+
);
1377+
assert!(
1378+
msg.contains("not supported"),
1379+
"error should say not supported: {msg}"
1380+
);
1381+
}

datafusion/expr/src/logical_plan/builder.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,8 @@ impl LogicalPlanBuilder {
13301330
verbose: explain_option.verbose,
13311331
input: self.plan,
13321332
schema,
1333+
analyze_level: None,
1334+
analyze_categories: None,
13331335
})))
13341336
} else {
13351337
let stringified_plans =
@@ -1342,6 +1344,7 @@ impl LogicalPlanBuilder {
13421344
stringified_plans,
13431345
schema,
13441346
logical_optimization_succeeded: false,
1347+
show_statistics: explain_option.show_statistics,
13451348
})))
13461349
}
13471350
}

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ use crate::{
5252

5353
use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef};
5454
use datafusion_common::cse::{NormalizeEq, Normalizeable};
55-
use datafusion_common::format::ExplainFormat;
55+
use datafusion_common::format::{ExplainAnalyzeCategories, ExplainFormat, MetricType};
5656
use datafusion_common::metadata::check_metadata_with_storage_equal;
5757
use datafusion_common::tree_node::{
5858
Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion,
@@ -1091,6 +1091,8 @@ impl LogicalPlan {
10911091
verbose: a.verbose,
10921092
schema: Arc::clone(&a.schema),
10931093
input: Arc::new(input),
1094+
analyze_level: a.analyze_level,
1095+
analyze_categories: a.analyze_categories.clone(),
10941096
}))
10951097
}
10961098
LogicalPlan::Explain(e) => {
@@ -1103,6 +1105,7 @@ impl LogicalPlan {
11031105
stringified_plans: e.stringified_plans.clone(),
11041106
schema: Arc::clone(&e.schema),
11051107
logical_optimization_succeeded: e.logical_optimization_succeeded,
1108+
show_statistics: e.show_statistics,
11061109
}))
11071110
}
11081111
LogicalPlan::Statement(Statement::Prepare(Prepare {
@@ -3202,6 +3205,9 @@ pub struct ExplainOption {
32023205
pub analyze: bool,
32033206
/// Output syntax/format
32043207
pub format: ExplainFormat,
3208+
/// Statement-level override for `datafusion.explain.show_statistics`.
3209+
/// `None` means "fall back to session config".
3210+
pub show_statistics: Option<bool>,
32053211
}
32063212

32073213
impl Default for ExplainOption {
@@ -3210,6 +3216,7 @@ impl Default for ExplainOption {
32103216
verbose: false,
32113217
analyze: false,
32123218
format: ExplainFormat::Indent,
3219+
show_statistics: None,
32133220
}
32143221
}
32153222
}
@@ -3232,6 +3239,13 @@ impl ExplainOption {
32323239
self.format = format;
32333240
self
32343241
}
3242+
3243+
/// Builder-style setter for a statement-level override of
3244+
/// `datafusion.explain.show_statistics`.
3245+
pub fn with_show_statistics(mut self, show_statistics: Option<bool>) -> Self {
3246+
self.show_statistics = show_statistics;
3247+
self
3248+
}
32353249
}
32363250

32373251
/// Produces a relation with string representations of
@@ -3255,6 +3269,9 @@ pub struct Explain {
32553269
pub schema: DFSchemaRef,
32563270
/// Used by physical planner to check if should proceed with planning
32573271
pub logical_optimization_succeeded: bool,
3272+
/// Statement-level override for `datafusion.explain.show_statistics`.
3273+
/// When `None`, the session-config value is used.
3274+
pub show_statistics: Option<bool>,
32583275
}
32593276

32603277
// Manual implementation needed because of `schema` field. Comparison excludes this field.
@@ -3270,18 +3287,22 @@ impl PartialOrd for Explain {
32703287
pub stringified_plans: &'a Vec<StringifiedPlan>,
32713288
/// Used by physical planner to check if should proceed with planning
32723289
pub logical_optimization_succeeded: &'a bool,
3290+
/// Statement-level override for show_statistics
3291+
pub show_statistics: &'a Option<bool>,
32733292
}
32743293
let comparable_self = ComparableExplain {
32753294
verbose: &self.verbose,
32763295
plan: &self.plan,
32773296
stringified_plans: &self.stringified_plans,
32783297
logical_optimization_succeeded: &self.logical_optimization_succeeded,
3298+
show_statistics: &self.show_statistics,
32793299
};
32803300
let comparable_other = ComparableExplain {
32813301
verbose: &other.verbose,
32823302
plan: &other.plan,
32833303
stringified_plans: &other.stringified_plans,
32843304
logical_optimization_succeeded: &other.logical_optimization_succeeded,
3305+
show_statistics: &other.show_statistics,
32853306
};
32863307
comparable_self
32873308
.partial_cmp(&comparable_other)
@@ -3300,9 +3321,18 @@ pub struct Analyze {
33003321
pub input: Arc<LogicalPlan>,
33013322
/// The output schema of the explain (2 columns of text)
33023323
pub schema: DFSchemaRef,
3324+
/// Statement-level override for `datafusion.explain.analyze_level`.
3325+
/// When `None`, the session-config value is used.
3326+
pub analyze_level: Option<MetricType>,
3327+
/// Statement-level override for `datafusion.explain.analyze_categories`.
3328+
/// When `None`, the session-config value is used.
3329+
pub analyze_categories: Option<ExplainAnalyzeCategories>,
33033330
}
33043331

3305-
// Manual implementation needed because of `schema` field. Comparison excludes this field.
3332+
// Manual implementation needed because of `schema` field and the lack of
3333+
// `PartialOrd` on `MetricType` / `ExplainAnalyzeCategories`. Ordering is
3334+
// defined over `(verbose, input)` and then falls back to `==` for the
3335+
// remaining statement-level override fields.
33063336
impl PartialOrd for Analyze {
33073337
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
33083338
match self.verbose.partial_cmp(&other.verbose) {

datafusion/expr/src/logical_plan/tree_node.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ impl TreeNode for LogicalPlan {
203203
stringified_plans,
204204
schema,
205205
logical_optimization_succeeded,
206+
show_statistics,
206207
}) => plan.map_elements(f)?.update_data(|plan| {
207208
LogicalPlan::Explain(Explain {
208209
verbose,
@@ -211,17 +212,22 @@ impl TreeNode for LogicalPlan {
211212
stringified_plans,
212213
schema,
213214
logical_optimization_succeeded,
215+
show_statistics,
214216
})
215217
}),
216218
LogicalPlan::Analyze(Analyze {
217219
verbose,
218220
input,
219221
schema,
222+
analyze_level,
223+
analyze_categories,
220224
}) => input.map_elements(f)?.update_data(|input| {
221225
LogicalPlan::Analyze(Analyze {
222226
verbose,
223227
input,
224228
schema,
229+
analyze_level,
230+
analyze_categories,
225231
})
226232
}),
227233
LogicalPlan::Dml(DmlStatement {

0 commit comments

Comments
 (0)