@@ -70,61 +70,115 @@ reset datafusion.explain.analyze_level;
7070# ------------------------------------------------
7171# Test analyze_categories: filter metrics by kind
7272# ------------------------------------------------
73+ # Categories classify metrics by determinism:
74+ # rows, bytes — depend on plan + data, deterministic across runs
75+ # timing — varies run-to-run even on same hardware
76+
77+ # --- Setup: create a small parquet table with multiple row groups ---
7378
74- # "rows" — only row-count metrics (deterministic), no timing or bytes
7579statement ok
76- set datafusion.explain.analyze_categories = 'rows';
80+ set datafusion.execution.parquet.pushdown_filters = true;
81+
82+ statement ok
83+ CREATE TABLE _cat_data AS VALUES
84+ ('Anow Vole', 7),
85+ ('Brown Bear', 133),
86+ ('Gray Wolf', 82),
87+ ('Lynx', 71),
88+ ('Red Fox', 40),
89+ ('Alpine Bat', 6),
90+ ('Nlpine Ibex', 101),
91+ ('Nlpine Goat', 76),
92+ ('Nlpine Sheep', 83),
93+ ('Europ. Mole', 4),
94+ ('Polecat', 16),
95+ ('Alpine Ibex', 97);
96+
97+ statement ok
98+ COPY (SELECT column1 as species, column2 as s FROM _cat_data)
99+ TO 'test_files/scratch/explain_analyze/data.parquet'
100+ STORED AS PARQUET
101+ OPTIONS ('format.max_row_group_size' '3');
102+
103+ statement ok
104+ drop table _cat_data;
105+
106+ statement ok
107+ CREATE EXTERNAL TABLE cat_tracking
108+ STORED AS PARQUET
109+ LOCATION 'test_files/scratch/explain_analyze/data.parquet';
110+
111+ # ---- categories = 'none': plan only, no metrics at all ----
112+
113+ statement ok
114+ set datafusion.explain.analyze_level = summary;
115+
116+ statement ok
117+ set datafusion.explain.analyze_categories = 'none';
77118
78119query TT
79- EXPLAIN ANALYZE SELECT * FROM generate_series(100) ;
120+ explain analyze select * from cat_tracking where species > 'M' AND s >= 50 order by species limit 3 ;
80121----
81- Plan with Metrics LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=0, end=100, batch_size=8192], metrics=[output_rows=101, output_batches=1]
122+ Plan with Metrics
123+ 01)SortExec: TopK(fetch=3), expr=[species@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[species@0 < Nlpine Sheep], metrics=[]
124+ 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/explain_analyze/data.parquet]]}, projection=[species, s], file_type=parquet, predicate=species@0 > M AND s@1 >= 50 AND DynamicFilter [ species@0 < Nlpine Sheep ], pruning_predicate=species_null_count@1 != row_count@2 AND species_max@0 > M AND s_null_count@4 != row_count@2 AND s_max@3 >= 50 AND species_null_count@1 != row_count@2 AND species_min@5 < Nlpine Sheep, required_guarantees=[], metrics=[]
82125
83126statement ok
84127reset datafusion.explain.analyze_categories;
85128
86- # "none" — plan only, all metrics suppressed (empty brackets)
129+ # ---- categories = 'rows': deterministic row-count metrics only ----
130+ # Note: no elapsed_compute, no output_bytes, no bytes_scanned, no metadata_load_time
131+
87132statement ok
88- set datafusion.explain.analyze_categories = 'none ';
133+ set datafusion.explain.analyze_categories = 'rows ';
89134
90135query TT
91- EXPLAIN ANALYZE SELECT * FROM generate_series(100) ;
136+ explain analyze select * from cat_tracking where species > 'M' AND s >= 50 order by species limit 3 ;
92137----
93- Plan with Metrics LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=0, end=100, batch_size=8192], metrics=[]
138+ Plan with Metrics
139+ 01)SortExec: TopK(fetch=3), expr=[species@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[species@0 < Nlpine Sheep], metrics=[output_rows=3]
140+ 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/explain_analyze/data.parquet]]}, projection=[species, s], file_type=parquet, predicate=species@0 > M AND s@1 >= 50 AND DynamicFilter [ species@0 < Nlpine Sheep ], pruning_predicate=species_null_count@1 != row_count@2 AND species_max@0 > M AND s_null_count@4 != row_count@2 AND s_max@3 >= 50 AND species_null_count@1 != row_count@2 AND species_min@5 < Nlpine Sheep, required_guarantees=[], metrics=[output_rows=3, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched, row_groups_pruned_bloom_filter=3 total → 3 matched, page_index_pages_pruned=6 total → 6 matched, limit_pruned_row_groups=0 total → 0 matched, bytes_scanned=521, scan_efficiency_ratio=22% (521/2.35 K)]
94141
95142statement ok
96143reset datafusion.explain.analyze_categories;
97144
98- # "rows,bytes" — row + byte metrics, no timing
145+ # ---- categories = 'rows,bytes': add byte metrics, still no timing ----
146+
99147statement ok
100148set datafusion.explain.analyze_categories = 'rows,bytes';
101149
102150query TT
103- EXPLAIN ANALYZE SELECT * FROM generate_series(100) ;
151+ explain analyze select * from cat_tracking where species > 'M' AND s >= 50 order by species limit 3 ;
104152----
105- Plan with Metrics LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=0, end=100, batch_size=8192], metrics=[output_rows=101, output_bytes=<slt:ignore>]
153+ Plan with Metrics
154+ 01)SortExec: TopK(fetch=3), expr=[species@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[species@0 < Nlpine Sheep], metrics=[output_rows=3, output_bytes=<slt:ignore>]
155+ 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/explain_analyze/data.parquet]]}, projection=[species, s], file_type=parquet, predicate=species@0 > M AND s@1 >= 50 AND DynamicFilter [ species@0 < Nlpine Sheep ], pruning_predicate=species_null_count@1 != row_count@2 AND species_max@0 > M AND s_null_count@4 != row_count@2 AND s_max@3 >= 50 AND species_null_count@1 != row_count@2 AND species_min@5 < Nlpine Sheep, required_guarantees=[], metrics=[output_rows=3, output_bytes=<slt:ignore>, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=4 total → 3 matched -> 1 fully matched, row_groups_pruned_bloom_filter=3 total → 3 matched, page_index_pages_pruned=6 total → 6 matched, limit_pruned_row_groups=0 total → 0 matched, bytes_scanned=<slt:ignore>, scan_efficiency_ratio=<slt:ignore>]
106156
107157statement ok
108158reset datafusion.explain.analyze_categories;
109159
110- # "rows" with dev level — per-expression timing excluded, batches included
111- statement ok
112- set datafusion.explain.analyze_level = dev;
160+ # ---- categories = 'timing': only timing metrics (non-deterministic) ----
113161
114162statement ok
115- set datafusion.explain.analyze_categories = 'rows ';
163+ set datafusion.explain.analyze_categories = 'timing ';
116164
117165query TT
118- EXPLAIN ANALYZE
119- SELECT a
120- FROM generate_series(1, 100) as t1(a);
166+ explain analyze select * from cat_tracking where species > 'M' AND s >= 50 order by species limit 3;
121167----
122168Plan with Metrics
123- 01)ProjectionExec: expr=[value@0 as a], metrics=[output_rows=100, output_batches=1]
124- 02)--LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100, batch_size=8192], metrics=[output_rows=100, output_batches=1]
169+ 01)SortExec: TopK(fetch=3), expr=[species@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[species@0 < Nlpine Sheep], metrics=[elapsed_compute=<slt:ignore>]
170+ 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/explain_analyze/data.parquet]]}, projection=[species, s], file_type=parquet, predicate=species@0 > M AND s@1 >= 50 AND DynamicFilter [ species@0 < Nlpine Sheep ], pruning_predicate=species_null_count@1 != row_count@2 AND species_max@0 > M AND s_null_count@4 != row_count@2 AND s_max@3 >= 50 AND species_null_count@1 != row_count@2 AND species_min@5 < Nlpine Sheep, required_guarantees=[], metrics=[elapsed_compute=<slt:ignore>, metadata_load_time=<slt:ignore>]
171+
172+ statement ok
173+ reset datafusion.explain.analyze_categories;
125174
126175statement ok
127176reset datafusion.explain.analyze_level;
128177
178+ # --- Teardown ---
179+
129180statement ok
130- reset datafusion.explain.analyze_categories;
181+ drop table cat_tracking;
182+
183+ statement ok
184+ reset datafusion.execution.parquet.pushdown_filters;
0 commit comments