@@ -28,11 +28,33 @@ use arrow::datatypes::{DataType, Schema};
2828/// propagate information the precision of statistical values.
2929#[ derive( Clone , PartialEq , Eq , Default , Copy ) ]
3030pub enum Precision < T : Debug + Clone + PartialEq + Eq + PartialOrd > {
31- /// The exact value is known
31+ /// The exact value is known. Used for guaranteeing correctness.
32+ ///
33+ /// Comes from definitive sources such as:
34+ /// - Parquet file metadata (row counts, byte sizes)
35+ /// - In-memory RecordBatch data (actual row counts, byte sizes, null counts)
36+ /// - and more...
3237 Exact ( T ) ,
33- /// The value is not known exactly, but is likely close to this value
38+ /// The value is not known exactly, but is likely close to this value.
39+ /// Used for cost-based optimizations.
40+ ///
41+ /// Some operations that would result in `Inexact(T)` would be:
42+ /// - Applying a filter (selectivity is unknown)
43+ /// - Mixing exact and inexact values in arithmetic
44+ /// - and more...
3445 Inexact ( T ) ,
35- /// Nothing is known about the value
46+ /// Nothing is known about the value. This is the default state.
47+ ///
48+ /// Acts as an absorbing element in arithmetic -> any operation
49+ /// involving `Absent` yields `Absent`. [`Precision::to_inexact`]
50+ /// on `Absent` returns `Absent`, not `Inexact` — it represents
51+ /// a fundamentally different state.
52+ ///
53+ /// Common sources include:
54+ /// - Data sources without statistics
55+ /// - Parquet columns missing from file metadata
56+ /// - Statistics that cannot be derived for an operation (e.g.,
57+ /// `distinct_count` after a union, `total_byte_size` for joins)
3658 #[ default]
3759 Absent ,
3860}
@@ -286,6 +308,7 @@ pub struct Statistics {
286308 /// The number of rows estimated to be scanned.
287309 pub num_rows : Precision < usize > ,
288310 /// The total bytes of the output data.
311+ ///
289312 /// Note that this is not the same as the total bytes that may be scanned,
290313 /// processed, etc.
291314 /// E.g. we may read 1GB of data from a Parquet file but the Arrow data
0 commit comments