@@ -22,7 +22,8 @@ use crate::file_groups::FileGroup;
2222use crate :: {
2323 PartitionedFile , display:: FileGroupsDisplay , file:: FileSource ,
2424 file_compression_type:: FileCompressionType , file_stream:: FileStreamBuilder ,
25- source:: DataSource , statistics:: MinMaxStatistics ,
25+ file_stream:: work_source:: SharedWorkSource , source:: DataSource ,
26+ statistics:: MinMaxStatistics ,
2627} ;
2728use arrow:: datatypes:: FieldRef ;
2829use arrow:: datatypes:: { DataType , Schema , SchemaRef } ;
@@ -55,7 +56,13 @@ use datafusion_physical_plan::{
5556 metrics:: ExecutionPlanMetricsSet ,
5657} ;
5758use log:: { debug, warn} ;
58- use std:: { any:: Any , fmt:: Debug , fmt:: Formatter , fmt:: Result as FmtResult , sync:: Arc } ;
59+ use std:: {
60+ any:: Any ,
61+ fmt:: Debug ,
62+ fmt:: Formatter ,
63+ fmt:: Result as FmtResult ,
64+ sync:: { Arc , OnceLock } ,
65+ } ;
5966
6067/// [`FileScanConfig`] represents scanning data from a group of files
6168///
@@ -209,6 +216,11 @@ pub struct FileScanConfig {
209216 /// If the number of file partitions > target_partitions, the file partitions will be grouped
210217 /// in a round-robin fashion such that number of file partitions = target_partitions.
211218 pub partitioned_by_file_group : bool ,
219+ /// Shared queue of unopened files for sibling streams in this scan.
220+ ///
221+ /// This is initialized once per `FileScanConfig` and reused by reorderable
222+ /// `FileStream`s created from that config.
223+ pub ( crate ) shared_work_source : Arc < OnceLock < SharedWorkSource > > ,
212224}
213225
214226/// A builder for [`FileScanConfig`]'s.
@@ -551,10 +563,34 @@ impl FileScanConfigBuilder {
551563 expr_adapter_factory : expr_adapter,
552564 statistics,
553565 partitioned_by_file_group,
566+ shared_work_source : Arc :: new ( OnceLock :: new ( ) ) ,
554567 }
555568 }
556569}
557570
571+ impl FileScanConfig {
572+ /// Returns the shared unopened-file queue for reorderable streams in this scan.
573+ ///
574+ /// The queue is initialized once from all file groups so sibling streams
575+ /// can begin stealing work immediately, even if they are built or polled
576+ /// before every sibling `FileStream` has been constructed.
577+ pub ( crate ) fn shared_work_source ( & self ) -> Option < SharedWorkSource > {
578+ if self . preserve_order || self . partitioned_by_file_group {
579+ return None ;
580+ }
581+
582+ Some (
583+ self . shared_work_source
584+ . get_or_init ( || {
585+ SharedWorkSource :: new (
586+ self . file_groups . iter ( ) . flat_map ( FileGroup :: iter) . cloned ( ) ,
587+ )
588+ } )
589+ . clone ( ) ,
590+ )
591+ }
592+ }
593+
558594impl From < FileScanConfig > for FileScanConfigBuilder {
559595 fn from ( config : FileScanConfig ) -> Self {
560596 Self {
0 commit comments