@@ -930,16 +930,69 @@ impl FileScanConfig {
930930 /// Returns only the output orderings that are validated against actual
931931 /// file group statistics.
932932 ///
933+ /// The various listing tables do not attempt to read all files
934+ /// concurrently, instead they read files in sequence within a
935+ /// partition. This is an important property as it allows plans to
936+ /// run against 1000s of files and not try to open them all
937+ /// concurrently.
938+ ///
939+ /// However, it means if we assign more than one file to a partition
940+ /// the output sort order will not be preserved unless the files'
941+ /// min/max statistics prove the combined stream is still ordered.
942+ ///
943+ /// When only 1 file is assigned to each partition, each partition is
944+ /// correctly sorted on `(A, B, C)`:
945+ ///
946+ /// ```text
947+ /// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓
948+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐
949+ /// ┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃
950+ /// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │
951+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃
952+ /// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │
953+ /// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
954+ /// Partition 1 Partition 2 Partition 3 Partition 4
955+ /// ┃ ┃
956+ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
957+ /// DataSourceExec
958+ /// ```
959+ ///
960+ /// However, when more than 1 file is assigned to each partition, each
961+ /// partition is NOT necessarily sorted on `(A, B, C)`. Once the second
962+ /// file is scanned, the same values for A, B and C can be repeated in
963+ /// the same sorted stream:
964+ ///
965+ /// ```text
966+ /// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
967+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
968+ /// ┃ ┌───────────────┐ ┌──────────────┐ │
969+ /// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃
970+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
971+ /// │ └───────────────┘ │ │ └──────────────┘ ┃
972+ /// ┃ ┌───────────────┐ ┌──────────────┐ │
973+ /// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃
974+ /// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
975+ /// │ └───────────────┘ │ │ └──────────────┘ ┃
976+ /// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
977+ /// Partition 1 Partition 2 ┃
978+ /// ┃
979+ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛
980+ /// DataSourceExec
981+ /// ```
982+ ///
933983 /// For example, individual files may be ordered by `col1 ASC`,
934- /// but if we have files with these min/max statistics in a single partition / file group:
984+ /// but if we have files with these min/max statistics in a single
985+ /// partition / file group:
935986 ///
936987 /// - file1: min(col1) = 10, max(col1) = 20
937988 /// - file2: min(col1) = 5, max(col1) = 15
938989 ///
939- /// Because reading file1 followed by file2 would produce out-of-order output (there is overlap
940- /// in the ranges), we cannot retain `col1 ASC` as a valid output ordering.
990+ /// Because reading file1 followed by file2 would produce out-of-order
991+ /// output (there is overlap in the ranges), we cannot retain `col1 ASC`
992+ /// as a valid output ordering.
941993 ///
942- /// Similarly this would not be a valid order (non-overlapping ranges but not ordered):
994+ /// Similarly this would not be a valid order (non-overlapping ranges
995+ /// but not ordered):
943996 ///
944997 /// - file1: min(col1) = 20, max(col1) = 30
945998 /// - file2: min(col1) = 10, max(col1) = 15
@@ -949,13 +1002,14 @@ impl FileScanConfig {
9491002 /// - file1: min(col1) = 5, max(col1) = 15
9501003 /// - file2: min(col1) = 16, max(col1) = 25
9511004 ///
952- /// Then we know that reading file1 followed by file2 will produce ordered output,
953- /// so `col1 ASC` would be retained.
1005+ /// Then we know that reading file1 followed by file2 will produce
1006+ /// ordered output, so `col1 ASC` would be retained.
9541007 ///
955- /// Note that we are checking for ordering *within* *each* file group / partition,
956- /// files in different partitions are read independently and do not affect each other's ordering.
957- /// Merging of the multiple partition streams into a single ordered stream is handled
958- /// upstream e.g. by `SortPreservingMergeExec`.
1008+ /// Note that we are checking for ordering *within* *each* file group /
1009+ /// partition — files in different partitions are read independently and
1010+ /// do not affect each other's ordering. Merging of the multiple
1011+ /// partition streams into a single ordered stream is handled upstream
1012+ /// e.g. by `SortPreservingMergeExec`.
9591013 fn validated_output_ordering ( & self ) -> Vec < LexOrdering > {
9601014 let schema = self . file_source . table_schema ( ) . table_schema ( ) ;
9611015 validate_orderings ( & self . output_ordering , schema, & self . file_groups , None )
0 commit comments