fix: address CI lint warnings (clippy, rustdoc, deprecated Wildcard)

adriangb · claude · adriangb · commit 2c272f2d436e · 2026-03-26T15:32:57.000-05:00
- Fix broken intra-doc links for Expr, ResolvedStatistics, PruningPredicate
- Replace deprecated Expr::Wildcard with Expr::Literal in count expressions
- Fix clippy: collapsible if, bool_assert_comparison, uninlined_format_args,
  cloned_ref_to_slice_refs
- Fix unused variable warning in test

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/datafusion/pruning/src/pruning_predicate.rs b/datafusion/pruning/src/pruning_predicate.rs
@@ -578,8 +578,10 @@ impl PruningPredicate {
         }
 
         // Phase 2: Min/max/null_count/row_count predicate
-        let statistics_batch =
-            build_statistics_record_batch_from_resolved(resolved, &self.required_columns)?;
+        let statistics_batch = build_statistics_record_batch_from_resolved(
+            resolved,
+            &self.required_columns,
+        )?;
         builder.combine_value(self.predicate_expr.evaluate(&statistics_batch)?);
 
         Ok(builder.build())
@@ -1989,7 +1991,7 @@ fn wrap_null_count_check_expr(
     )))
 }
 
-/// Convert a [`StatisticsType`] + column into the corresponding logical [`Expr`].
+/// Convert a [`StatisticsType`] + column into the corresponding logical expression.
 fn stat_type_to_expr(
     column: &phys_expr::Column,
     stat_type: StatisticsType,
@@ -1998,12 +2000,10 @@ fn stat_type_to_expr(
     let col_expr = LExpr::Column(Column::new_unqualified(column.name()));
     match stat_type {
         StatisticsType::Min => {
-            datafusion_functions_aggregate::min_max::min_udaf()
-                .call(vec![col_expr])
+            datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr])
         }
         StatisticsType::Max => {
-            datafusion_functions_aggregate::min_max::max_udaf()
-                .call(vec![col_expr])
+            datafusion_functions_aggregate::min_max::max_udaf().call(vec![col_expr])
         }
         StatisticsType::NullCount => {
             let count_expr = datafusion_functions_aggregate::count::count_udaf()
@@ -2040,7 +2040,7 @@ fn literal_guarantee_to_in_list(
     ))
 }
 
-/// Build a statistics [`RecordBatch`] from a [`ResolvedStatistics`] cache,
+/// Build a statistics [`RecordBatch`] from a [`crate::ResolvedStatistics`] cache,
 /// looking up each required column's expression and falling back to null
 /// arrays for missing entries.
 fn build_statistics_record_batch_from_resolved(
@@ -2072,7 +2072,9 @@ fn build_statistics_record_batch_from_resolved(
     let mut options = RecordBatchOptions::default();
     options.row_count = Some(num_containers);
 
-    trace!("Creating statistics batch from resolved for {required_columns:#?} with {arrays:#?}");
+    trace!(
+        "Creating statistics batch from resolved for {required_columns:#?} with {arrays:#?}"
+    );
 
     RecordBatch::try_new_with_options(schema, arrays, &options).map_err(|err| {
         plan_datafusion_err!("Can not create statistics record batch: {err}")
@@ -5587,12 +5589,14 @@ mod tests {
             "i",
             ContainerStats::new_i32(
                 vec![Some(1), Some(6), Some(3)],  // min
-                vec![Some(4), Some(10), Some(8)],  // max
+                vec![Some(4), Some(10), Some(8)], // max
             ),
         );
 
         let expr = col("i").gt(lit(5i32));
-        let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
+        let p =
+            PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
+                .unwrap();
 
         let prune_result = p.prune(&statistics).unwrap();
         let resolved = crate::statistics::resolve_all_sync(
@@ -5614,17 +5618,16 @@ mod tests {
         let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
         let statistics = TestStatistics::new().with(
             "i",
-            ContainerStats::new_i32(
-                vec![Some(0), Some(0)],
-                vec![Some(0), Some(0)],
-            )
-            .with_null_counts(vec![Some(10), Some(0)])
-            .with_row_counts(vec![Some(10), Some(10)]),
+            ContainerStats::new_i32(vec![Some(0), Some(0)], vec![Some(0), Some(0)])
+                .with_null_counts(vec![Some(10), Some(0)])
+                .with_row_counts(vec![Some(10), Some(10)]),
         );
 
         // i = 0: first container is all nulls, should be pruned
         let expr = col("i").eq(lit(0i32));
-        let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
+        let p =
+            PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
+                .unwrap();
 
         let prune_result = p.prune(&statistics).unwrap();
         let resolved = crate::statistics::resolve_all_sync(
@@ -5640,16 +5643,15 @@ mod tests {
     #[test]
     fn test_evaluate_missing_cache_entries() {
         let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
-        let statistics = TestStatistics::new().with(
+        let _statistics = TestStatistics::new().with(
             "i",
-            ContainerStats::new_i32(
-                vec![Some(1), Some(6)],
-                vec![Some(4), Some(10)],
-            ),
+            ContainerStats::new_i32(vec![Some(1), Some(6)], vec![Some(4), Some(10)]),
         );
 
         let expr = col("i").gt(lit(5i32));
-        let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
+        let p =
+            PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
+                .unwrap();
 
         // Empty resolved stats — everything should be kept (conservative)
         let resolved = crate::statistics::ResolvedStatistics::new_empty(2);
@@ -5662,16 +5664,28 @@ mod tests {
     fn test_all_required_expressions() {
         let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
         let expr = col("i").eq(lit(5i32));
-        let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
+        let p =
+            PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
+                .unwrap();
 
         let exprs = p.all_required_expressions();
         // i = 5 requires: min(i), max(i), count(*) filter (where i is null),
         // count(*) filter (where i is not null)
-        assert!(exprs.len() >= 2, "Expected at least min and max, got {}", exprs.len());
+        assert!(
+            exprs.len() >= 2,
+            "Expected at least min and max, got {}",
+            exprs.len()
+        );
 
         // Check that we have min and max expressions
         let expr_strings: Vec<String> = exprs.iter().map(|e| e.to_string()).collect();
-        assert!(expr_strings.iter().any(|s| s.contains("min")), "Expected min expr in {:?}", expr_strings);
-        assert!(expr_strings.iter().any(|s| s.contains("max")), "Expected max expr in {:?}", expr_strings);
+        assert!(
+            expr_strings.iter().any(|s| s.contains("min")),
+            "Expected min expr in {expr_strings:?}"
+        );
+        assert!(
+            expr_strings.iter().any(|s| s.contains("max")),
+            "Expected max expr in {expr_strings:?}"
+        );
     }
 }
diff --git a/datafusion/pruning/src/statistics.rs b/datafusion/pruning/src/statistics.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{new_null_array, ArrayRef};
+use arrow::array::{ArrayRef, new_null_array};
 use datafusion_common::pruning::PruningStatistics;
 use datafusion_expr::Expr;
 use std::collections::{HashMap, HashSet};
@@ -127,7 +127,7 @@ impl<T: PruningStatistics + Send + Sync> StatisticsSource for T {
 /// [`PruningPredicate::evaluate`].
 ///
 /// Keyed by [`Expr`] so that a single cache can serve multiple
-/// [`PruningPredicate`] instances (e.g., after dynamic filter changes
+/// [`PruningPredicate`](crate::PruningPredicate) instances (e.g., after dynamic filter changes
 /// rebuild the predicate but reuse the same resolved stats).
 /// Missing entries are treated as unknown — safe for pruning
 /// (the predicate will conservatively keep the container).
@@ -377,8 +377,8 @@ mod tests {
     #[test]
     fn test_resolve_min() {
         let stats = MockStats::new();
-        let expr = datafusion_functions_aggregate::min_max::min_udaf()
-            .call(vec![col_expr("a")]);
+        let expr =
+            datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr("a")]);
         let result = resolve_expression_sync(&stats, &expr);
         assert!(result.is_some());
         let arr = result.unwrap();
@@ -388,8 +388,8 @@ mod tests {
     #[test]
     fn test_resolve_max() {
         let stats = MockStats::new();
-        let expr = datafusion_functions_aggregate::min_max::max_udaf()
-            .call(vec![col_expr("a")]);
+        let expr =
+            datafusion_functions_aggregate::min_max::max_udaf().call(vec![col_expr("a")]);
         let result = resolve_expression_sync(&stats, &expr);
         assert!(result.is_some());
         let arr = result.unwrap();
@@ -400,10 +400,7 @@ mod tests {
     fn test_resolve_count_null() {
         let stats = MockStats::new();
         let expr = datafusion_functions_aggregate::count::count_udaf()
-            .call(vec![Expr::Wildcard {
-                qualifier: None,
-                options: Box::default(),
-            }])
+            .call(vec![Expr::Literal(ScalarValue::Boolean(Some(true)), None)])
             .filter(Expr::IsNull(Box::new(col_expr("a"))))
             .build()
             .unwrap();
@@ -415,10 +412,7 @@ mod tests {
     fn test_resolve_count_not_null() {
         let stats = MockStats::new();
         let expr = datafusion_functions_aggregate::count::count_udaf()
-            .call(vec![Expr::Wildcard {
-                qualifier: None,
-                options: Box::default(),
-            }])
+            .call(vec![Expr::Literal(ScalarValue::Boolean(Some(true)), None)])
             .filter(Expr::IsNotNull(Box::new(col_expr("a"))))
             .build()
             .unwrap();
@@ -449,8 +443,8 @@ mod tests {
 
     #[test]
     fn test_resolve_in_list() {
-        let stats =
-            MockStats::new().with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
+        let stats = MockStats::new()
+            .with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
         let expr = Expr::InList(datafusion_expr::expr::InList::new(
             Box::new(col_expr("a")),
             vec![
@@ -463,14 +457,14 @@ mod tests {
         assert!(result.is_some());
         let arr = result.unwrap();
         let bool_arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(bool_arr.value(0), true);
-        assert_eq!(bool_arr.value(1), false);
+        assert!(bool_arr.value(0));
+        assert!(!bool_arr.value(1));
     }
 
     #[test]
     fn test_resolve_not_in_list() {
-        let stats =
-            MockStats::new().with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
+        let stats = MockStats::new()
+            .with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
         let expr = Expr::InList(datafusion_expr::expr::InList::new(
             Box::new(col_expr("a")),
             vec![Expr::Literal(ScalarValue::Int64(Some(1)), None)],
@@ -481,8 +475,8 @@ mod tests {
         let arr = result.unwrap();
         let bool_arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
         // Inverted: true→false, false→true
-        assert_eq!(bool_arr.value(0), false);
-        assert_eq!(bool_arr.value(1), true);
+        assert!(!bool_arr.value(0));
+        assert!(bool_arr.value(1));
     }
 
     #[test]
@@ -506,7 +500,7 @@ mod tests {
         let stats = MockStats::new();
         let min_expr =
             datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr("a")]);
-        let resolved = resolve_all_sync(&stats, &[min_expr.clone()]);
+        let resolved = resolve_all_sync(&stats, std::slice::from_ref(&min_expr));
 
         // Existing entry
         let arr = resolved.get_or_null(&min_expr, &DataType::Int64);