1818use clap:: { ColorChoice , Parser , ValueEnum } ;
1919use datafusion:: common:: instant:: Instant ;
2020use datafusion:: common:: utils:: get_available_parallelism;
21- use datafusion:: common:: {
22- DataFusionError , HashMap , Result , exec_datafusion_err, exec_err,
23- } ;
21+ use datafusion:: common:: { DataFusionError , Result , exec_datafusion_err, exec_err} ;
22+ use datafusion_sqllogictest:: TestFile ;
2423use datafusion_sqllogictest:: {
2524 CurrentlyExecutingSqlTracker , DataFusion , DataFusionSubstraitRoundTrip , Filter ,
2625 TestContext , df_value_validator, read_dir_recursive, setup_scratch_dir,
@@ -44,13 +43,12 @@ use crate::postgres_container::{
4443} ;
4544use datafusion:: common:: runtime:: SpawnedTask ;
4645use futures:: FutureExt ;
47- use std:: ffi:: OsStr ;
4846use std:: fs;
4947use std:: io:: { IsTerminal , stderr, stdout} ;
5048use std:: path:: { Path , PathBuf } ;
5149use std:: str:: FromStr ;
50+ use std:: sync:: Arc ;
5251use std:: sync:: atomic:: { AtomicUsize , Ordering } ;
53- use std:: sync:: { Arc , LazyLock } ;
5452use std:: time:: Duration ;
5553
5654#[ cfg( feature = "postgres" ) ]
@@ -59,6 +57,7 @@ mod postgres_container;
5957const TEST_DIRECTORY : & str = "test_files/" ;
6058const DATAFUSION_TESTING_TEST_DIRECTORY : & str = "../../datafusion-testing/data/" ;
6159const PG_COMPAT_FILE_PREFIX : & str = "pg_compat_" ;
60+ const TPCH_PREFIX : & str = "tpch" ;
6261const SQLITE_PREFIX : & str = "sqlite" ;
6362const ERRS_PER_FILE_LIMIT : usize = 10 ;
6463const TIMING_DEBUG_SLOW_FILES_ENV : & str = "SLT_TIMING_DEBUG_SLOW_FILES" ;
@@ -77,55 +76,6 @@ struct FileTiming {
7776 elapsed : Duration ,
7877}
7978
80- /// TEST PRIORITY
81- ///
82- /// Heuristically prioritize some test to run earlier.
83- ///
84- /// Prioritizes test to run earlier if they are known to be long running (as
85- /// each test file itself is run sequentially, but multiple test files are run
86- /// in parallel.
87- ///
88- /// Tests not listed here will run after the listed tests in an arbitrary order.
89- ///
90- /// You can find the top longest running tests by running `--timing-summary` mode.
91- /// For example
92- ///
93- /// ```shell
94- /// $ cargo test --profile=ci --test sqllogictests -- --timing-summary top
95- /// ...
96- /// Per-file elapsed summary (deterministic):
97- /// 1. 5.375s push_down_filter_regression.slt
98- /// 2. 3.174s aggregate.slt
99- /// 3. 3.158s imdb.slt
100- /// 4. 2.793s joins.slt
101- /// 5. 2.505s array.slt
102- /// 6. 2.265s aggregate_skip_partial.slt
103- /// 7. 2.260s window.slt
104- /// 8. 1.677s group_by.slt
105- /// 9. 0.973s datetime/timestamps.slt
106- /// 10. 0.822s cte.slt
107- /// ```
108- static TEST_PRIORITY : LazyLock < HashMap < PathBuf , usize > > = LazyLock :: new ( || {
109- [
110- ( PathBuf :: from ( "push_down_filter_regression.slt" ) , 0 ) , // longest running, so run first.
111- ( PathBuf :: from ( "aggregate.slt" ) , 1 ) ,
112- ( PathBuf :: from ( "joins.slt" ) , 2 ) ,
113- ( PathBuf :: from ( "imdb.slt" ) , 3 ) ,
114- ( PathBuf :: from ( "array.slt" ) , 4 ) ,
115- ( PathBuf :: from ( "aggregate_skip_partial.slt" ) , 5 ) ,
116- ( PathBuf :: from ( "window.slt" ) , 6 ) ,
117- ( PathBuf :: from ( "group_by.slt" ) , 7 ) ,
118- ( PathBuf :: from ( "datetime/timestamps.slt" ) , 8 ) ,
119- ( PathBuf :: from ( "cte.slt" ) , 9 ) ,
120- ]
121- . into_iter ( )
122- . collect ( )
123- } ) ;
124-
125- /// Default priority for tests not in the TEST_PRIORITY map. Tests with lower
126- /// priority values run first.
127- static DEFAULT_PRIORITY : usize = 100 ;
128-
12979pub fn main ( ) -> Result < ( ) > {
13080 tokio:: runtime:: Builder :: new_multi_thread ( )
13181 . enable_all ( )
@@ -832,91 +782,35 @@ async fn run_complete_file_with_postgres(
832782 plan_err ! ( "Can not run with postgres as postgres feature is not enabled" )
833783}
834784
835- /// Represents a parsed test file
836- #[ derive( Debug ) ]
837- struct TestFile {
838- /// The absolute path to the file
839- pub path : PathBuf ,
840- /// The relative path of the file (used for display)
841- pub relative_path : PathBuf ,
842- }
843-
844- impl TestFile {
845- fn new ( path : PathBuf ) -> Self {
846- let p = path. to_string_lossy ( ) ;
847- let relative_path = PathBuf :: from ( if p. starts_with ( TEST_DIRECTORY ) {
848- p. strip_prefix ( TEST_DIRECTORY ) . unwrap ( )
849- } else if p. starts_with ( DATAFUSION_TESTING_TEST_DIRECTORY ) {
850- p. strip_prefix ( DATAFUSION_TESTING_TEST_DIRECTORY ) . unwrap ( )
851- } else {
852- ""
853- } ) ;
854-
855- Self {
856- path,
857- relative_path,
858- }
859- }
860-
861- fn is_slt_file ( & self ) -> bool {
862- self . path . extension ( ) == Some ( OsStr :: new ( "slt" ) )
863- }
864-
865- fn check_sqlite ( & self , options : & Options ) -> bool {
866- if !self . relative_path . starts_with ( SQLITE_PREFIX ) {
867- return true ;
868- }
869-
870- options. include_sqlite
871- }
872-
873- fn check_tpch ( & self , options : & Options ) -> bool {
874- if !self . relative_path . starts_with ( "tpch" ) {
875- return true ;
876- }
785+ fn read_test_files ( options : & Options ) -> Result < Vec < TestFile > > {
786+ let prefixes: & [ & str ] = if options. include_sqlite {
787+ & [ TEST_DIRECTORY , DATAFUSION_TESTING_TEST_DIRECTORY ]
788+ } else {
789+ & [ TEST_DIRECTORY ]
790+ } ;
877791
878- options. include_tpch
879- }
880- }
792+ let directories = prefixes
793+ . iter ( )
794+ . map ( |prefix| {
795+ read_dir_recursive ( prefix) . map_err ( |e| {
796+ exec_datafusion_err ! ( "Error reading test directory {prefix}: {e}" )
797+ } )
798+ } )
799+ . collect :: < Result < Vec < _ > > > ( ) ?;
881800
882- fn read_test_files ( options : & Options ) -> Result < Vec < TestFile > > {
883- let mut paths = read_dir_recursive ( TEST_DIRECTORY ) ?
801+ let mut paths = directories
884802 . into_iter ( )
885- . map ( TestFile :: new)
803+ . flatten ( )
804+ . map ( |p| TestFile :: new ( p, prefixes) )
886805 . filter ( |f| options. check_test_file ( & f. path ) )
887806 . filter ( |f| f. is_slt_file ( ) )
888- . filter ( |f| f . check_tpch ( options ) )
889- . filter ( |f| f . check_sqlite ( options ) )
807+ . filter ( |f| !f . relative_path_starts_with ( TPCH_PREFIX ) || options . include_tpch )
808+ . filter ( |f| !f . relative_path_starts_with ( SQLITE_PREFIX ) || options . include_sqlite )
890809 . filter ( |f| options. check_pg_compat_file ( f. path . as_path ( ) ) )
891810 . collect :: < Vec < _ > > ( ) ;
892- if options. include_sqlite {
893- let mut sqlite_paths = read_dir_recursive ( DATAFUSION_TESTING_TEST_DIRECTORY ) ?
894- . into_iter ( )
895- . map ( TestFile :: new)
896- . filter ( |f| options. check_test_file ( & f. path ) )
897- . filter ( |f| f. is_slt_file ( ) )
898- . filter ( |f| f. check_sqlite ( options) )
899- . filter ( |f| options. check_pg_compat_file ( f. path . as_path ( ) ) )
900- . collect :: < Vec < _ > > ( ) ;
901-
902- paths. append ( & mut sqlite_paths)
903- }
904811
905- Ok ( sort_tests ( paths) )
906- }
907-
908- /// Sort the tests heuristically by order of "priority"
909- ///
910- /// Prioritizes test to run earlier if they are known to be long running (as
911- /// each test file itself is run sequentially, but multiple test files are run
912- /// in parallel.
913- fn sort_tests ( mut tests : Vec < TestFile > ) -> Vec < TestFile > {
914- tests. sort_by_key ( |f| {
915- TEST_PRIORITY
916- . get ( & f. relative_path )
917- . unwrap_or ( & DEFAULT_PRIORITY )
918- } ) ;
919- tests
812+ paths. sort_unstable ( ) ;
813+ Ok ( paths)
920814}
921815
922816/// Parsed command line options
0 commit comments