Skip to content

Commit 376eb03

Browse files
authored
Minor: update documentation for PrunableStatistics (#16213)
1 parent 900279c commit 376eb03

1 file changed

Lines changed: 20 additions & 8 deletions

File tree

datafusion/common/src/pruning.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ pub trait PruningStatistics {
129129
}
130130

131131
/// Prune files based on their partition values.
132+
///
132133
/// This is used both at planning time and execution time to prune
133134
/// files based on their partition values.
134135
/// This feeds into [`CompositePruningStatistics`] to allow pruning
@@ -137,19 +138,21 @@ pub trait PruningStatistics {
137138
#[derive(Clone)]
138139
pub struct PartitionPruningStatistics {
139140
/// Values for each column for each container.
140-
/// The outer vectors represent the columns while the inner
141-
/// vectors represent the containers.
142-
/// The order must match the order of the partition columns in
143-
/// [`PartitionPruningStatistics::partition_schema`].
141+
///
142+
/// The outer vectors represent the columns while the inner vectors
143+
/// represent the containers. The order must match the order of the
144+
/// partition columns in [`PartitionPruningStatistics::partition_schema`].
144145
partition_values: Vec<ArrayRef>,
145146
/// The number of containers.
147+
///
146148
/// Stored since the partition values are column-major and if
147149
/// there are no columns we wouldn't know the number of containers.
148150
num_containers: usize,
149151
/// The schema of the partition columns.
150-
/// This must **not** be the schema of the entire file or table:
151-
/// it must only be the schema of the partition columns,
152-
/// in the same order as the values in [`PartitionPruningStatistics::partition_values`].
152+
///
153+
/// This must **not** be the schema of the entire file or table: it must
154+
/// only be the schema of the partition columns, in the same order as the
155+
/// values in [`PartitionPruningStatistics::partition_values`].
153156
partition_schema: SchemaRef,
154157
}
155158

@@ -258,7 +261,16 @@ impl PruningStatistics for PartitionPruningStatistics {
258261
}
259262

260263
/// Prune a set of containers represented by their statistics.
261-
/// Each [`Statistics`] represents a container (e.g. a file or a partition of files).
264+
///
265+
/// Each [`Statistics`] represents a "container" -- some collection of data
266+
/// that has statistics of its columns.
267+
///
268+
/// It is up to the caller to decide what each container represents. For
269+
/// example, they can come from a file (e.g. [`PartitionedFile`]) or a set of of
270+
/// files (e.g. [`FileGroup`])
271+
///
272+
/// [`PartitionedFile`]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.PartitionedFile.html
273+
/// [`FileGroup`]: https://docs.rs/datafusion/latest/datafusion/datasource/physical_plan/struct.FileGroup.html
262274
#[derive(Clone)]
263275
pub struct PrunableStatistics {
264276
/// Statistics for each container.

0 commit comments

Comments
 (0)