Skip to content

Commit 98defe6

Browse files
authored
chore: Fix all sqllogictest dangling configs (#21108)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> Follow up to #20838 ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> in #20838 it adds a way to check and warn about dangling config settings, it has detected several existing violations but has not fixed them: ```sh yongting@Yongtings-MacBook-Pro-2 ~/C/datafusion (cleanup-slt)> git checkout main Switched to branch 'main' Your branch is up to date with 'upstream/main'. yongting@Yongtings-MacBook-Pro-2 ~/C/datafusion (main=)> cargo test --profile=ci --test sqllogictests Compiling datafusion-sqllogictest v52.3.0 (/Users/yongting/Code/datafusion/datafusion/sqllogictest) Finished `ci` profile [unoptimized] target(s) in 2.09s Running bin/sqllogictests.rs (target/ci/deps/sqllogictests-7e2a8cc6115b158a) Running with 14 test threads (available parallelism: 14) SLT file aggregate_repartition.slt left modified configuration datafusion.execution.target_partitions: 4 -> 1 SLT file arrow_files.slt left modified configuration datafusion.sql_parser.map_string_types_to_utf8view: true -> false SLT file datetime/current_date_timezone.slt left modified configuration datafusion.execution.time_zone: NULL -> +00:00 ... ``` This PR does 1. Fix all the old violations 2. For future slt dangling configurations, it fails the test run instead of only warn about it. I tested to add a config statement at the end of one `slt` file, and run the test, it runs and failed as expected ```sh yongting@Yongtings-MacBook-Pro-2 ~/C/datafusion (cleanup-slt)> cargo test --profile=ci --test sqllogictests Compiling datafusion-sqllogictest v52.3.0 (/Users/yongting/Code/datafusion/datafusion/sqllogictest) Finished `ci` profile [unoptimized] target(s) in 2.28s Running bin/sqllogictests.rs (target/ci/deps/sqllogictests-7e2a8cc6115b158a) Running with 14 test threads (available parallelism: 14) Completed 416 test files in 5 seconds External error: Other Error: SLT file sort_pushdown.slt left modified configuration datafusion.optimizer.max_passes: 3 -> 10 Error: Execution("1 failures") error: test failed, to rerun pass `-p datafusion-sqllogictest --test sqllogictests` ``` ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 4. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> UT ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> no
1 parent 7f29cb0 commit 98defe6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+645
-65
lines changed

datafusion/sqllogictest/bin/sqllogictests.rs

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ use std::fs;
4747
use std::io::{IsTerminal, stderr, stdout};
4848
use std::path::{Path, PathBuf};
4949
use std::str::FromStr;
50-
use std::sync::Arc;
5150
use std::sync::atomic::{AtomicUsize, Ordering};
51+
use std::sync::{Arc, Mutex};
5252
use std::time::Duration;
5353

5454
#[cfg(feature = "postgres")]
@@ -76,6 +76,19 @@ struct FileTiming {
7676
elapsed: Duration,
7777
}
7878

79+
type DataFusionConfigChangeErrors = Arc<Mutex<Vec<String>>>;
80+
81+
fn config_change_result(
82+
config_change_errors: &DataFusionConfigChangeErrors,
83+
) -> Result<()> {
84+
let errors = config_change_errors.lock().unwrap();
85+
if errors.is_empty() {
86+
Ok(())
87+
} else {
88+
Err(DataFusionError::External(errors.join("\n\n").into()))
89+
}
90+
}
91+
7992
pub fn main() -> Result<()> {
8093
tokio::runtime::Builder::new_multi_thread()
8194
.enable_all()
@@ -482,7 +495,7 @@ async fn run_test_file_substrait_round_trip(
482495
runner.with_column_validator(strict_column_validator);
483496
runner.with_normalizer(value_normalizer);
484497
runner.with_validator(validator);
485-
let res = run_file_in_runner(path, runner, filters, colored_output).await;
498+
let res = run_file_in_runner(path, &mut runner, filters, colored_output).await;
486499
pb.finish_and_clear();
487500
res
488501
}
@@ -512,26 +525,37 @@ async fn run_test_file(
512525
pb.set_style(mp_style);
513526
pb.set_message(format!("{:?}", &relative_path));
514527

528+
// If DataFusion configuration has changed during test file runs, errors will be
529+
// pushed to this vec.
530+
// HACK: managed externally because `sqllogictest` is an external dependency, and
531+
// it doesn't have an API to directly access the inner runner.
532+
let config_change_errors = Arc::new(Mutex::new(Vec::new()));
515533
let mut runner = sqllogictest::Runner::new(|| async {
516534
Ok(DataFusion::new(
517535
test_ctx.session_ctx().clone(),
518536
relative_path.clone(),
519537
pb.clone(),
520538
)
521-
.with_currently_executing_sql_tracker(currently_executing_sql_tracker.clone()))
539+
.with_currently_executing_sql_tracker(currently_executing_sql_tracker.clone())
540+
.with_config_change_errors(Arc::clone(&config_change_errors)))
522541
});
523542
runner.add_label("Datafusion");
524543
runner.with_column_validator(strict_column_validator);
525544
runner.with_normalizer(value_normalizer);
526545
runner.with_validator(validator);
527-
let result = run_file_in_runner(path, runner, filters, colored_output).await;
546+
let result = run_file_in_runner(path, &mut runner, filters, colored_output).await;
528547
pb.finish_and_clear();
529-
result
548+
549+
result?;
550+
551+
// If there was no correctness error, check that the config is unchanged.
552+
runner.shutdown_async().await;
553+
config_change_result(&config_change_errors)
530554
}
531555

532556
async fn run_file_in_runner<D: AsyncDB, M: MakeConnection<Conn = D>>(
533557
path: PathBuf,
534-
mut runner: sqllogictest::Runner<D, M>,
558+
runner: &mut sqllogictest::Runner<D, M>,
535559
filters: &[Filter],
536560
colored_output: bool,
537561
) -> Result<()> {
@@ -644,7 +668,7 @@ async fn run_test_file_with_postgres(
644668
runner.with_column_validator(strict_column_validator);
645669
runner.with_normalizer(value_normalizer);
646670
runner.with_validator(validator);
647-
let result = run_file_in_runner(path, runner, filters, false).await;
671+
let result = run_file_in_runner(path, &mut runner, filters, false).await;
648672
pb.finish_and_clear();
649673
result
650674
}
@@ -688,13 +712,15 @@ async fn run_complete_file(
688712
pb.set_style(mp_style);
689713
pb.set_message(format!("{:?}", &relative_path));
690714

715+
let config_change_errors = Arc::new(Mutex::new(Vec::new()));
691716
let mut runner = sqllogictest::Runner::new(|| async {
692717
Ok(DataFusion::new(
693718
test_ctx.session_ctx().clone(),
694719
relative_path.clone(),
695720
pb.clone(),
696721
)
697-
.with_currently_executing_sql_tracker(currently_executing_sql_tracker.clone()))
722+
.with_currently_executing_sql_tracker(currently_executing_sql_tracker.clone())
723+
.with_config_change_errors(Arc::clone(&config_change_errors)))
698724
});
699725

700726
let col_separator = " ";
@@ -712,7 +738,9 @@ async fn run_complete_file(
712738

713739
pb.finish_and_clear();
714740

715-
res
741+
res?;
742+
runner.shutdown_async().await;
743+
config_change_result(&config_change_errors)
716744
}
717745

718746
#[cfg(feature = "postgres")]

datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs

Lines changed: 82 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use std::collections::HashMap;
19-
use std::sync::Arc;
19+
use std::sync::{Arc, Mutex};
2020
use std::{path::PathBuf, time::Duration};
2121

2222
use super::{DFSqlLogicTestError, error::Result, normalize};
@@ -40,6 +40,7 @@ pub struct DataFusion {
4040
pb: ProgressBar,
4141
currently_executing_sql_tracker: CurrentlyExecutingSqlTracker,
4242
default_config: HashMap<String, Option<String>>,
43+
config_change_errors: Option<Arc<Mutex<Vec<String>>>>,
4344
}
4445

4546
impl DataFusion {
@@ -59,6 +60,7 @@ impl DataFusion {
5960
pb,
6061
currently_executing_sql_tracker: CurrentlyExecutingSqlTracker::default(),
6162
default_config,
63+
config_change_errors: None,
6264
}
6365
}
6466

@@ -73,6 +75,14 @@ impl DataFusion {
7375
self
7476
}
7577

78+
pub fn with_config_change_errors(
79+
mut self,
80+
config_change_errors: Arc<Mutex<Vec<String>>>,
81+
) -> Self {
82+
self.config_change_errors = Some(config_change_errors);
83+
self
84+
}
85+
7686
fn update_slow_count(&self) {
7787
let msg = self.pb.message();
7888
let split: Vec<&str> = msg.split(" ").collect();
@@ -88,6 +98,43 @@ impl DataFusion {
8898
self.pb
8999
.set_message(format!("{} - {} took > 500 ms", split[0], current_count));
90100
}
101+
102+
pub fn validate_config_unchanged(&mut self) -> Result<()> {
103+
let mut changed = false;
104+
let mut message = format!(
105+
"SLT file {} left modified configuration",
106+
self.relative_path.display()
107+
);
108+
109+
for entry in self.ctx.state().config().options().entries() {
110+
let default_entry = self.default_config.remove(&entry.key);
111+
112+
if let Some(default_entry) = default_entry
113+
&& default_entry.as_ref() != entry.value.as_ref()
114+
{
115+
changed = true;
116+
117+
let default = default_entry.as_deref().unwrap_or("NULL");
118+
let current = entry.value.as_deref().unwrap_or("NULL");
119+
120+
message
121+
.push_str(&format!("\n {}: {} -> {}", entry.key, default, current));
122+
}
123+
}
124+
125+
for (key, value) in &self.default_config {
126+
changed = true;
127+
128+
let default = value.as_deref().unwrap_or("NULL");
129+
message.push_str(&format!("\n {key}: {default} -> NULL"));
130+
}
131+
132+
if changed {
133+
Err(DFSqlLogicTestError::Other(message))
134+
} else {
135+
Ok(())
136+
}
137+
}
91138
}
92139

93140
#[async_trait]
@@ -142,48 +189,12 @@ impl sqllogictest::AsyncDB for DataFusion {
142189
tokio::time::sleep(dur).await;
143190
}
144191

145-
async fn shutdown(&mut self) {}
146-
}
147-
148-
impl Drop for DataFusion {
149-
fn drop(&mut self) {
150-
let mut changed = false;
151-
152-
for e in self.ctx.state().config().options().entries() {
153-
let default_entry = self.default_config.remove(&e.key);
154-
155-
if let Some(default_entry) = default_entry
156-
&& default_entry.as_ref() != e.value.as_ref()
157-
{
158-
if !changed {
159-
changed = true;
160-
self.pb.println(format!(
161-
"SLT file {} left modified configuration",
162-
self.relative_path.display()
163-
));
164-
}
165-
166-
let default = default_entry.as_deref().unwrap_or("NULL");
167-
let current = e.value.as_deref().unwrap_or("NULL");
168-
169-
self.pb
170-
.println(format!(" {}: {} -> {}", e.key, default, current));
171-
}
172-
}
173-
174-
// Any remaining entries were present initially but removed during execution
175-
for (key, value) in &self.default_config {
176-
if !changed {
177-
changed = true;
178-
self.pb.println(format!(
179-
"SLT file {} left modified configuration",
180-
self.relative_path.display()
181-
));
182-
}
183-
184-
let default = value.as_deref().unwrap_or("NULL");
185-
186-
self.pb.println(format!(" {key}: {default} -> NULL"));
192+
/// Shutdown and check no DataFusion configuration has changed during test
193+
async fn shutdown(&mut self) {
194+
if let Some(config_change_errors) = self.config_change_errors.clone()
195+
&& let Err(error) = self.validate_config_unchanged()
196+
{
197+
config_change_errors.lock().unwrap().push(error.to_string());
187198
}
188199
}
189200
}
@@ -209,3 +220,31 @@ async fn run_query(
209220
Ok(DBOutput::Rows { types, rows })
210221
}
211222
}
223+
224+
#[cfg(test)]
225+
mod tests {
226+
use super::*;
227+
use sqllogictest::AsyncDB;
228+
229+
#[tokio::test]
230+
async fn validate_config_unchanged_detects_modified_config() {
231+
let ctx = SessionContext::new();
232+
let default_batch_size = ctx.state().config().options().execution.batch_size;
233+
let mut runner =
234+
DataFusion::new(ctx, PathBuf::from("test.slt"), ProgressBar::hidden());
235+
236+
<DataFusion as AsyncDB>::run(
237+
&mut runner,
238+
"SET datafusion.execution.batch_size = 2048",
239+
)
240+
.await
241+
.unwrap();
242+
243+
let error = runner.validate_config_unchanged().unwrap_err();
244+
let message = error.to_string();
245+
246+
assert!(message.contains("test.slt left modified configuration"));
247+
assert!(message.contains("datafusion.execution.batch_size"));
248+
assert!(message.contains(&format!("{default_batch_size} -> 2048")));
249+
}
250+
}

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8778,9 +8778,12 @@ ORDER BY g;
87788778
2 2 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5
87798779
3 2 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5
87808780

8781-
statement ok
8782-
DROP TABLE stream_test;
8781+
# Config reset
87838782

8784-
# Restore default target partitions
8783+
# The SLT runner sets `target_partitions` to 4 instead of using the default, so
8784+
# reset it explicitly.
87858785
statement ok
87868786
set datafusion.execution.target_partitions = 4;
8787+
8788+
statement ok
8789+
DROP TABLE stream_test;

datafusion/sqllogictest/test_files/aggregate_repartition.slt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,10 @@ physical_plan
128128
01)ProjectionExec: expr=[env@0 as env, count(Int64(1))@1 as count(*)]
129129
02)--AggregateExec: mode=Single, gby=[env@0 as env], aggr=[count(Int64(1))]
130130
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/aggregate_repartition/dim.parquet]]}, projection=[env], file_type=parquet
131+
132+
# Config reset
133+
134+
# The SLT runner sets `target_partitions` to 4 instead of using the default, so
135+
# reset it explicitly.
136+
statement ok
137+
SET datafusion.execution.target_partitions = 4;

datafusion/sqllogictest/test_files/aggregate_skip_partial.slt

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,21 @@ c true false NULL
216216
d NULL false NULL
217217
e true false NULL
218218

219+
# Config reset
220+
statement ok
221+
reset datafusion.execution.batch_size;
222+
223+
# The SLT runner sets `target_partitions` to 4 instead of using the default, so
224+
# reset it explicitly.
225+
statement ok
226+
set datafusion.execution.target_partitions = 4;
227+
228+
statement ok
229+
reset datafusion.execution.skip_partial_aggregation_probe_ratio_threshold;
230+
231+
statement ok
232+
reset datafusion.execution.skip_partial_aggregation_probe_rows_threshold;
233+
219234
# Prepare settings to always skip aggregation after couple of batches
220235
statement ok
221236
set datafusion.execution.skip_partial_aggregation_probe_rows_threshold = 10;
@@ -693,6 +708,21 @@ ORDER BY i;
693708
2 66
694709
3 33
695710

711+
# Config reset
712+
statement ok
713+
reset datafusion.execution.batch_size;
714+
715+
# The SLT runner sets `target_partitions` to 4 instead of using the default, so
716+
# reset it explicitly.
717+
statement ok
718+
set datafusion.execution.target_partitions = 4;
719+
720+
statement ok
721+
reset datafusion.execution.skip_partial_aggregation_probe_ratio_threshold;
722+
723+
statement ok
724+
reset datafusion.execution.skip_partial_aggregation_probe_rows_threshold;
725+
696726
statement ok
697727
DROP TABLE decimal_table;
698728

@@ -738,5 +768,20 @@ SELECT bool_and(c1), bool_and(c2), bool_and(c3), bool_and(c4), bool_and(c5), boo
738768
----
739769
true false false false false true false NULL
740770

771+
# Config reset
772+
statement ok
773+
reset datafusion.execution.batch_size;
774+
775+
# The SLT runner sets `target_partitions` to 4 instead of using the default, so
776+
# reset it explicitly.
777+
statement ok
778+
set datafusion.execution.target_partitions = 4;
779+
780+
statement ok
781+
reset datafusion.execution.skip_partial_aggregation_probe_ratio_threshold;
782+
783+
statement ok
784+
reset datafusion.execution.skip_partial_aggregation_probe_rows_threshold;
785+
741786
statement ok
742787
DROP TABLE aggregate_test_100_bool

datafusion/sqllogictest/test_files/arrow_files.slt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,3 +388,7 @@ physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/
388388
# querying corrupted stream format should result in error
389389
query error DataFusion error: Arrow error: Parser error: Unsupported message header type in IPC stream: 'NONE'
390390
SELECT * FROM arrow_stream_corrupted_metadata_length
391+
392+
# Config reset
393+
statement ok
394+
RESET datafusion.sql_parser.map_string_types_to_utf8view;

datafusion/sqllogictest/test_files/avro.slt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,7 @@ SELECT id, string_col, int_col, bigint_col FROM alltypes_plain ORDER BY id LIMIT
267267
2 0 0 0
268268
3 1 1 10
269269
4 0 0 0
270+
271+
# Config reset
272+
statement ok
273+
reset datafusion.sql_parser.map_string_types_to_utf8view;

0 commit comments

Comments
 (0)