@@ -952,16 +952,69 @@ impl FileScanConfig {
952952 /// Returns only the output orderings that are validated against actual
953953 /// file group statistics.
954954 ///
955+ /// The various listing tables do not attempt to read all files
956+ /// concurrently, instead they read files in sequence within a
957+ /// partition. This is an important property as it allows plans to
958+ /// run against 1000s of files and not try to open them all
959+ /// concurrently.
960+ ///
961+ /// However, it means if we assign more than one file to a partition
962+ /// the output sort order will not be preserved unless the files'
963+ /// min/max statistics prove the combined stream is still ordered.
964+ ///
965+ /// When only 1 file is assigned to each partition, each partition is
966+ /// correctly sorted on `(A, B, C)`:
967+ ///
968+ /// ```text
969+ /// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓
970+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐
971+ /// ┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃
972+ /// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │
973+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃
974+ /// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │
975+ /// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
976+ /// Partition 1 Partition 2 Partition 3 Partition 4
977+ /// ┃ ┃
978+ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
979+ /// DataSourceExec
980+ /// ```
981+ ///
982+ /// However, when more than 1 file is assigned to each partition, each
983+ /// partition is NOT necessarily sorted on `(A, B, C)`. Once the second
984+ /// file is scanned, the same values for A, B and C can be repeated in
985+ /// the same sorted stream:
986+ ///
987+ /// ```text
988+ /// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
989+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
990+ /// ┃ ┌───────────────┐ ┌──────────────┐ │
991+ /// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃
992+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
993+ /// │ └───────────────┘ │ │ └──────────────┘ ┃
994+ /// ┃ ┌───────────────┐ ┌──────────────┐ │
995+ /// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃
996+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
997+ /// │ └───────────────┘ │ │ └──────────────┘ ┃
998+ /// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
999+ /// Partition 1 Partition 2 ┃
1000+ /// ┃
1001+ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛
1002+ /// DataSourceExec
1003+ /// ```
1004+ ///
9551005 /// For example, individual files may be ordered by `col1 ASC`,
956- /// but if we have files with these min/max statistics in a single partition / file group:
1006+ /// but if we have files with these min/max statistics in a single
1007+ /// partition / file group:
9571008 ///
9581009 /// - file1: min(col1) = 10, max(col1) = 20
9591010 /// - file2: min(col1) = 5, max(col1) = 15
9601011 ///
961- /// Because reading file1 followed by file2 would produce out-of-order output (there is overlap
962- /// in the ranges), we cannot retain `col1 ASC` as a valid output ordering.
1012+ /// Because reading file1 followed by file2 would produce out-of-order
1013+ /// output (there is overlap in the ranges), we cannot retain `col1 ASC`
1014+ /// as a valid output ordering.
9631015 ///
964- /// Similarly this would not be a valid order (non-overlapping ranges but not ordered):
1016+ /// Similarly this would not be a valid order (non-overlapping ranges
1017+ /// but not ordered):
9651018 ///
9661019 /// - file1: min(col1) = 20, max(col1) = 30
9671020 /// - file2: min(col1) = 10, max(col1) = 15
@@ -971,13 +1024,14 @@ impl FileScanConfig {
9711024 /// - file1: min(col1) = 5, max(col1) = 15
9721025 /// - file2: min(col1) = 16, max(col1) = 25
9731026 ///
974- /// Then we know that reading file1 followed by file2 will produce ordered output,
975- /// so `col1 ASC` would be retained.
1027+ /// Then we know that reading file1 followed by file2 will produce
1028+ /// ordered output, so `col1 ASC` would be retained.
9761029 ///
977- /// Note that we are checking for ordering *within* *each* file group / partition,
978- /// files in different partitions are read independently and do not affect each other's ordering.
979- /// Merging of the multiple partition streams into a single ordered stream is handled
980- /// upstream e.g. by `SortPreservingMergeExec`.
1030+ /// Note that we are checking for ordering *within* *each* file group /
1031+ /// partition — files in different partitions are read independently and
1032+ /// do not affect each other's ordering. Merging of the multiple
1033+ /// partition streams into a single ordered stream is handled upstream
1034+ /// e.g. by `SortPreservingMergeExec`.
9811035 fn validated_output_ordering ( & self ) -> Vec < LexOrdering > {
9821036 let schema = self . file_source . table_schema ( ) . table_schema ( ) ;
9831037 validate_orderings ( & self . output_ordering , schema, & self . file_groups , None )
0 commit comments