@@ -129,6 +129,7 @@ pub trait PruningStatistics {
129129}
130130
131131/// Prune files based on their partition values.
132+ ///
132133/// This is used both at planning time and execution time to prune
133134/// files based on their partition values.
134135/// This feeds into [`CompositePruningStatistics`] to allow pruning
@@ -137,19 +138,21 @@ pub trait PruningStatistics {
137138#[ derive( Clone ) ]
138139pub struct PartitionPruningStatistics {
139140 /// Values for each column for each container.
140- /// The outer vectors represent the columns while the inner
141- /// vectors represent the containers.
142- /// The order must match the order of the partition columns in
143- /// [`PartitionPruningStatistics::partition_schema`].
141+ ///
142+ /// The outer vectors represent the columns while the inner vectors
143+ /// represent the containers. The order must match the order of the
144+ /// partition columns in [`PartitionPruningStatistics::partition_schema`].
144145 partition_values : Vec < ArrayRef > ,
145146 /// The number of containers.
147+ ///
146148 /// Stored since the partition values are column-major and if
147149 /// there are no columns we wouldn't know the number of containers.
148150 num_containers : usize ,
149151 /// The schema of the partition columns.
150- /// This must **not** be the schema of the entire file or table:
151- /// it must only be the schema of the partition columns,
152- /// in the same order as the values in [`PartitionPruningStatistics::partition_values`].
152+ ///
153+ /// This must **not** be the schema of the entire file or table: it must
154+ /// only be the schema of the partition columns, in the same order as the
155+ /// values in [`PartitionPruningStatistics::partition_values`].
153156 partition_schema : SchemaRef ,
154157}
155158
@@ -258,7 +261,16 @@ impl PruningStatistics for PartitionPruningStatistics {
258261}
259262
260263/// Prune a set of containers represented by their statistics.
261- /// Each [`Statistics`] represents a container (e.g. a file or a partition of files).
264+ ///
265+ /// Each [`Statistics`] represents a "container" -- some collection of data
266+ /// that has statistics of its columns.
267+ ///
268+ /// It is up to the caller to decide what each container represents. For
269+ /// example, they can come from a file (e.g. [`PartitionedFile`]) or a set of of
270+ /// files (e.g. [`FileGroup`])
271+ ///
272+ /// [`PartitionedFile`]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.PartitionedFile.html
273+ /// [`FileGroup`]: https://docs.rs/datafusion/latest/datafusion/datasource/physical_plan/struct.FileGroup.html
262274#[ derive( Clone ) ]
263275pub struct PrunableStatistics {
264276 /// Statistics for each container.
0 commit comments