@@ -942,16 +942,69 @@ impl FileScanConfig {
942942 /// Returns only the output orderings that are validated against actual
943943 /// file group statistics.
944944 ///
945+ /// The various listing tables do not attempt to read all files
946+ /// concurrently, instead they read files in sequence within a
947+ /// partition. This is an important property as it allows plans to
948+ /// run against 1000s of files and not try to open them all
949+ /// concurrently.
950+ ///
951+ /// However, it means if we assign more than one file to a partition
952+ /// the output sort order will not be preserved unless the files'
953+ /// min/max statistics prove the combined stream is still ordered.
954+ ///
955+ /// When only 1 file is assigned to each partition, each partition is
956+ /// correctly sorted on `(A, B, C)`:
957+ ///
958+ /// ```text
959+ /// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓
960+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐
961+ /// ┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃
962+ /// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │
963+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃
964+ /// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │
965+ /// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
966+ /// Partition 1 Partition 2 Partition 3 Partition 4
967+ /// ┃ ┃
968+ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
969+ /// DataSourceExec
970+ /// ```
971+ ///
972+ /// However, when more than 1 file is assigned to each partition, each
973+ /// partition is NOT necessarily sorted on `(A, B, C)`. Once the second
974+ /// file is scanned, the same values for A, B and C can be repeated in
975+ /// the same sorted stream:
976+ ///
977+ /// ```text
978+ /// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
979+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
980+ /// ┃ ┌───────────────┐ ┌──────────────┐ │
981+ /// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃
982+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
983+ /// │ └───────────────┘ │ │ └──────────────┘ ┃
984+ /// ┃ ┌───────────────┐ ┌──────────────┐ │
985+ /// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃
986+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
987+ /// │ └───────────────┘ │ │ └──────────────┘ ┃
988+ /// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
989+ /// Partition 1 Partition 2 ┃
990+ /// ┃
991+ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛
992+ /// DataSourceExec
993+ /// ```
994+ ///
945995 /// For example, individual files may be ordered by `col1 ASC`,
946- /// but if we have files with these min/max statistics in a single partition / file group:
996+ /// but if we have files with these min/max statistics in a single
997+ /// partition / file group:
947998 ///
948999 /// - file1: min(col1) = 10, max(col1) = 20
9491000 /// - file2: min(col1) = 5, max(col1) = 15
9501001 ///
951- /// Because reading file1 followed by file2 would produce out-of-order output (there is overlap
952- /// in the ranges), we cannot retain `col1 ASC` as a valid output ordering.
1002+ /// Because reading file1 followed by file2 would produce out-of-order
1003+ /// output (there is overlap in the ranges), we cannot retain `col1 ASC`
1004+ /// as a valid output ordering.
9531005 ///
954- /// Similarly this would not be a valid order (non-overlapping ranges but not ordered):
1006+ /// Similarly this would not be a valid order (non-overlapping ranges
1007+ /// but not ordered):
9551008 ///
9561009 /// - file1: min(col1) = 20, max(col1) = 30
9571010 /// - file2: min(col1) = 10, max(col1) = 15
@@ -961,13 +1014,14 @@ impl FileScanConfig {
9611014 /// - file1: min(col1) = 5, max(col1) = 15
9621015 /// - file2: min(col1) = 16, max(col1) = 25
9631016 ///
964- /// Then we know that reading file1 followed by file2 will produce ordered output,
965- /// so `col1 ASC` would be retained.
1017+ /// Then we know that reading file1 followed by file2 will produce
1018+ /// ordered output, so `col1 ASC` would be retained.
9661019 ///
967- /// Note that we are checking for ordering *within* *each* file group / partition,
968- /// files in different partitions are read independently and do not affect each other's ordering.
969- /// Merging of the multiple partition streams into a single ordered stream is handled
970- /// upstream e.g. by `SortPreservingMergeExec`.
1020+ /// Note that we are checking for ordering *within* *each* file group /
1021+ /// partition — files in different partitions are read independently and
1022+ /// do not affect each other's ordering. Merging of the multiple
1023+ /// partition streams into a single ordered stream is handled upstream
1024+ /// e.g. by `SortPreservingMergeExec`.
9711025 fn validated_output_ordering ( & self ) -> Vec < LexOrdering > {
9721026 let schema = self . file_source . table_schema ( ) . table_schema ( ) ;
9731027 validate_orderings ( & self . output_ordering , schema, & self . file_groups , None )
0 commit comments