@@ -845,6 +845,7 @@ config_namespace! {
845845 /// default parquet writer setting
846846 pub bloom_filter_ndv: Option <u64 >, default = None
847847
848+
848849 /// (writing) Controls whether DataFusion will attempt to speed up writing
849850 /// parquet files by serializing them in parallel. Each column
850851 /// in each row group in each output file are serialized in parallel
@@ -872,6 +873,27 @@ config_namespace! {
872873 /// writing out already in-memory data, such as from a cached
873874 /// data frame.
874875 pub maximum_buffered_record_batches_per_stream: usize , default = 2
876+
877+ /// (writing) EXPERIMENTAL: Enable content-defined chunking (CDC) when writing
878+ /// parquet files. When true, the other `cdc_*` options control the chunking
879+ /// behavior. When CDC is enabled, parallel writing is automatically disabled
880+ /// since the chunker state must persist across row groups.
881+ pub enable_content_defined_chunking: bool , default = false
882+
883+ /// (writing) Minimum chunk size in bytes for content-defined chunking.
884+ /// The rolling hash will not be updated until this size is reached for each chunk.
885+ /// Default is 256 KiB. Only used when `enable_content_defined_chunking` is true.
886+ pub cdc_min_chunk_size: usize , default = 256 * 1024
887+
888+ /// (writing) Maximum chunk size in bytes for content-defined chunking.
889+ /// The chunker will create a new chunk whenever the chunk size exceeds this value.
890+ /// Default is 1 MiB. Only used when `enable_content_defined_chunking` is true.
891+ pub cdc_max_chunk_size: usize , default = 1024 * 1024
892+
893+ /// (writing) Normalization level for content-defined chunking.
894+ /// Increasing this improves deduplication ratio but increases fragmentation.
895+ /// Recommended range is [-3, 3], default is 0. Only used when `enable_content_defined_chunking` is true.
896+ pub cdc_norm_level: i64 , default = 0
875897 }
876898}
877899
@@ -1820,6 +1842,7 @@ config_field!(usize);
18201842config_field ! ( f64 ) ;
18211843config_field ! ( u64 ) ;
18221844config_field ! ( u32 ) ;
1845+ config_field ! ( i64 ) ;
18231846
18241847impl ConfigField for u8 {
18251848 fn visit < V : Visit > ( & self , v : & mut V , key : & str , description : & ' static str ) {
0 commit comments