@@ -23,6 +23,8 @@ use std::sync::Arc;
2323
2424use crate :: DefaultParquetFileReaderFactory ;
2525use crate :: ParquetFileReaderFactory ;
26+ use crate :: ParquetMorselizer ;
27+ use crate :: morsel:: { EncryptionContext , ParquetMorselizerState } ;
2628use crate :: opener:: ParquetOpener ;
2729use crate :: opener:: build_pruning_predicates;
2830use crate :: row_filter:: can_expr_be_pushed_down_with_schemas;
@@ -31,6 +33,7 @@ use datafusion_common::config::ConfigOptions;
3133use datafusion_common:: config:: EncryptionFactoryOptions ;
3234use datafusion_datasource:: as_file_source;
3335use datafusion_datasource:: file_stream:: FileOpener ;
36+ use datafusion_datasource:: morsel:: Morselizer ;
3437
3538use arrow:: datatypes:: TimeUnit ;
3639use datafusion_common:: DataFusionError ;
@@ -573,6 +576,76 @@ impl FileSource for ParquetSource {
573576 Ok ( opener)
574577 }
575578
579+ fn create_morselizer (
580+ & self ,
581+ object_store : Arc < dyn ObjectStore > ,
582+ base_config : & FileScanConfig ,
583+ partition : usize ,
584+ ) -> datafusion_common:: Result < Box < dyn Morselizer > > {
585+ let expr_adapter_factory = base_config
586+ . expr_adapter_factory
587+ . clone ( )
588+ . unwrap_or_else ( || Arc :: new ( DefaultPhysicalExprAdapterFactory ) as _ ) ;
589+
590+ let parquet_file_reader_factory =
591+ self . parquet_file_reader_factory . clone ( ) . unwrap_or_else ( || {
592+ Arc :: new ( DefaultParquetFileReaderFactory :: new ( object_store) ) as _
593+ } ) ;
594+
595+ #[ cfg( not( feature = "parquet_encryption" ) ) ]
596+ let encryption_context = EncryptionContext :: default ( ) ;
597+
598+ #[ cfg( feature = "parquet_encryption" ) ]
599+ let encryption_context = {
600+ let file_decryption_properties = self
601+ . table_parquet_options ( )
602+ . crypto
603+ . file_decryption
604+ . clone ( )
605+ . map ( FileDecryptionProperties :: from)
606+ . map ( Arc :: new) ;
607+
608+ EncryptionContext :: new (
609+ file_decryption_properties,
610+ self . get_encryption_factory_with_config ( ) ,
611+ )
612+ } ;
613+
614+ let coerce_int96 = self
615+ . table_parquet_options
616+ . global
617+ . coerce_int96
618+ . as_ref ( )
619+ . map ( |time_unit| parse_coerce_int96_string ( time_unit. as_str ( ) ) . unwrap ( ) ) ;
620+
621+ let morselizer = ParquetMorselizer :: new ( ParquetMorselizerState {
622+ partition_index : partition,
623+ projection : self . projection . clone ( ) ,
624+ batch_size : self
625+ . batch_size
626+ . expect ( "Batch size must set before creating ParquetMorselizer" ) ,
627+ limit : base_config. limit ,
628+ preserve_order : base_config. preserve_order ,
629+ predicate : self . predicate . clone ( ) ,
630+ table_schema : self . table_schema . clone ( ) ,
631+ metadata_size_hint : self . metadata_size_hint ,
632+ metrics : self . metrics ( ) . clone ( ) ,
633+ parquet_file_reader_factory,
634+ pushdown_filters : self . pushdown_filters ( ) ,
635+ reorder_filters : self . reorder_filters ( ) ,
636+ force_filter_selections : self . force_filter_selections ( ) ,
637+ enable_page_index : self . enable_page_index ( ) ,
638+ enable_bloom_filter : self . bloom_filter_on_read ( ) ,
639+ enable_row_group_stats_pruning : self . table_parquet_options . global . pruning ,
640+ coerce_int96,
641+ expr_adapter_factory,
642+ encryption_context,
643+ max_predicate_cache_size : self . max_predicate_cache_size ( ) ,
644+ reverse_row_groups : self . reverse_row_groups ,
645+ } ) ;
646+ Ok ( Box :: new ( morselizer) )
647+ }
648+
576649 fn as_any ( & self ) -> & dyn Any {
577650 self
578651 }
0 commit comments