Skip to content

Commit 2c272f2

Browse files
adriangbclaude
andcommitted
fix: address CI lint warnings (clippy, rustdoc, deprecated Wildcard)
- Fix broken intra-doc links for Expr, ResolvedStatistics, PruningPredicate - Replace deprecated Expr::Wildcard with Expr::Literal in count expressions - Fix clippy: collapsible if, bool_assert_comparison, uninlined_format_args, cloned_ref_to_slice_refs - Fix unused variable warning in test Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 78b9eac commit 2c272f2

2 files changed

Lines changed: 59 additions & 51 deletions

File tree

datafusion/pruning/src/pruning_predicate.rs

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,10 @@ impl PruningPredicate {
578578
}
579579

580580
// Phase 2: Min/max/null_count/row_count predicate
581-
let statistics_batch =
582-
build_statistics_record_batch_from_resolved(resolved, &self.required_columns)?;
581+
let statistics_batch = build_statistics_record_batch_from_resolved(
582+
resolved,
583+
&self.required_columns,
584+
)?;
583585
builder.combine_value(self.predicate_expr.evaluate(&statistics_batch)?);
584586

585587
Ok(builder.build())
@@ -1989,7 +1991,7 @@ fn wrap_null_count_check_expr(
19891991
)))
19901992
}
19911993

1992-
/// Convert a [`StatisticsType`] + column into the corresponding logical [`Expr`].
1994+
/// Convert a [`StatisticsType`] + column into the corresponding logical expression.
19931995
fn stat_type_to_expr(
19941996
column: &phys_expr::Column,
19951997
stat_type: StatisticsType,
@@ -1998,12 +2000,10 @@ fn stat_type_to_expr(
19982000
let col_expr = LExpr::Column(Column::new_unqualified(column.name()));
19992001
match stat_type {
20002002
StatisticsType::Min => {
2001-
datafusion_functions_aggregate::min_max::min_udaf()
2002-
.call(vec![col_expr])
2003+
datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr])
20032004
}
20042005
StatisticsType::Max => {
2005-
datafusion_functions_aggregate::min_max::max_udaf()
2006-
.call(vec![col_expr])
2006+
datafusion_functions_aggregate::min_max::max_udaf().call(vec![col_expr])
20072007
}
20082008
StatisticsType::NullCount => {
20092009
let count_expr = datafusion_functions_aggregate::count::count_udaf()
@@ -2040,7 +2040,7 @@ fn literal_guarantee_to_in_list(
20402040
))
20412041
}
20422042

2043-
/// Build a statistics [`RecordBatch`] from a [`ResolvedStatistics`] cache,
2043+
/// Build a statistics [`RecordBatch`] from a [`crate::ResolvedStatistics`] cache,
20442044
/// looking up each required column's expression and falling back to null
20452045
/// arrays for missing entries.
20462046
fn build_statistics_record_batch_from_resolved(
@@ -2072,7 +2072,9 @@ fn build_statistics_record_batch_from_resolved(
20722072
let mut options = RecordBatchOptions::default();
20732073
options.row_count = Some(num_containers);
20742074

2075-
trace!("Creating statistics batch from resolved for {required_columns:#?} with {arrays:#?}");
2075+
trace!(
2076+
"Creating statistics batch from resolved for {required_columns:#?} with {arrays:#?}"
2077+
);
20762078

20772079
RecordBatch::try_new_with_options(schema, arrays, &options).map_err(|err| {
20782080
plan_datafusion_err!("Can not create statistics record batch: {err}")
@@ -5587,12 +5589,14 @@ mod tests {
55875589
"i",
55885590
ContainerStats::new_i32(
55895591
vec![Some(1), Some(6), Some(3)], // min
5590-
vec![Some(4), Some(10), Some(8)], // max
5592+
vec![Some(4), Some(10), Some(8)], // max
55915593
),
55925594
);
55935595

55945596
let expr = col("i").gt(lit(5i32));
5595-
let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
5597+
let p =
5598+
PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
5599+
.unwrap();
55965600

55975601
let prune_result = p.prune(&statistics).unwrap();
55985602
let resolved = crate::statistics::resolve_all_sync(
@@ -5614,17 +5618,16 @@ mod tests {
56145618
let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
56155619
let statistics = TestStatistics::new().with(
56165620
"i",
5617-
ContainerStats::new_i32(
5618-
vec![Some(0), Some(0)],
5619-
vec![Some(0), Some(0)],
5620-
)
5621-
.with_null_counts(vec![Some(10), Some(0)])
5622-
.with_row_counts(vec![Some(10), Some(10)]),
5621+
ContainerStats::new_i32(vec![Some(0), Some(0)], vec![Some(0), Some(0)])
5622+
.with_null_counts(vec![Some(10), Some(0)])
5623+
.with_row_counts(vec![Some(10), Some(10)]),
56235624
);
56245625

56255626
// i = 0: first container is all nulls, should be pruned
56265627
let expr = col("i").eq(lit(0i32));
5627-
let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
5628+
let p =
5629+
PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
5630+
.unwrap();
56285631

56295632
let prune_result = p.prune(&statistics).unwrap();
56305633
let resolved = crate::statistics::resolve_all_sync(
@@ -5640,16 +5643,15 @@ mod tests {
56405643
#[test]
56415644
fn test_evaluate_missing_cache_entries() {
56425645
let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
5643-
let statistics = TestStatistics::new().with(
5646+
let _statistics = TestStatistics::new().with(
56445647
"i",
5645-
ContainerStats::new_i32(
5646-
vec![Some(1), Some(6)],
5647-
vec![Some(4), Some(10)],
5648-
),
5648+
ContainerStats::new_i32(vec![Some(1), Some(6)], vec![Some(4), Some(10)]),
56495649
);
56505650

56515651
let expr = col("i").gt(lit(5i32));
5652-
let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
5652+
let p =
5653+
PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
5654+
.unwrap();
56535655

56545656
// Empty resolved stats — everything should be kept (conservative)
56555657
let resolved = crate::statistics::ResolvedStatistics::new_empty(2);
@@ -5662,16 +5664,28 @@ mod tests {
56625664
fn test_all_required_expressions() {
56635665
let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
56645666
let expr = col("i").eq(lit(5i32));
5665-
let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
5667+
let p =
5668+
PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
5669+
.unwrap();
56665670

56675671
let exprs = p.all_required_expressions();
56685672
// i = 5 requires: min(i), max(i), count(*) filter (where i is null),
56695673
// count(*) filter (where i is not null)
5670-
assert!(exprs.len() >= 2, "Expected at least min and max, got {}", exprs.len());
5674+
assert!(
5675+
exprs.len() >= 2,
5676+
"Expected at least min and max, got {}",
5677+
exprs.len()
5678+
);
56715679

56725680
// Check that we have min and max expressions
56735681
let expr_strings: Vec<String> = exprs.iter().map(|e| e.to_string()).collect();
5674-
assert!(expr_strings.iter().any(|s| s.contains("min")), "Expected min expr in {:?}", expr_strings);
5675-
assert!(expr_strings.iter().any(|s| s.contains("max")), "Expected max expr in {:?}", expr_strings);
5682+
assert!(
5683+
expr_strings.iter().any(|s| s.contains("min")),
5684+
"Expected min expr in {expr_strings:?}"
5685+
);
5686+
assert!(
5687+
expr_strings.iter().any(|s| s.contains("max")),
5688+
"Expected max expr in {expr_strings:?}"
5689+
);
56765690
}
56775691
}

datafusion/pruning/src/statistics.rs

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::{new_null_array, ArrayRef};
18+
use arrow::array::{ArrayRef, new_null_array};
1919
use datafusion_common::pruning::PruningStatistics;
2020
use datafusion_expr::Expr;
2121
use std::collections::{HashMap, HashSet};
@@ -127,7 +127,7 @@ impl<T: PruningStatistics + Send + Sync> StatisticsSource for T {
127127
/// [`PruningPredicate::evaluate`].
128128
///
129129
/// Keyed by [`Expr`] so that a single cache can serve multiple
130-
/// [`PruningPredicate`] instances (e.g., after dynamic filter changes
130+
/// [`PruningPredicate`](crate::PruningPredicate) instances (e.g., after dynamic filter changes
131131
/// rebuild the predicate but reuse the same resolved stats).
132132
/// Missing entries are treated as unknown — safe for pruning
133133
/// (the predicate will conservatively keep the container).
@@ -377,8 +377,8 @@ mod tests {
377377
#[test]
378378
fn test_resolve_min() {
379379
let stats = MockStats::new();
380-
let expr = datafusion_functions_aggregate::min_max::min_udaf()
381-
.call(vec![col_expr("a")]);
380+
let expr =
381+
datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr("a")]);
382382
let result = resolve_expression_sync(&stats, &expr);
383383
assert!(result.is_some());
384384
let arr = result.unwrap();
@@ -388,8 +388,8 @@ mod tests {
388388
#[test]
389389
fn test_resolve_max() {
390390
let stats = MockStats::new();
391-
let expr = datafusion_functions_aggregate::min_max::max_udaf()
392-
.call(vec![col_expr("a")]);
391+
let expr =
392+
datafusion_functions_aggregate::min_max::max_udaf().call(vec![col_expr("a")]);
393393
let result = resolve_expression_sync(&stats, &expr);
394394
assert!(result.is_some());
395395
let arr = result.unwrap();
@@ -400,10 +400,7 @@ mod tests {
400400
fn test_resolve_count_null() {
401401
let stats = MockStats::new();
402402
let expr = datafusion_functions_aggregate::count::count_udaf()
403-
.call(vec![Expr::Wildcard {
404-
qualifier: None,
405-
options: Box::default(),
406-
}])
403+
.call(vec![Expr::Literal(ScalarValue::Boolean(Some(true)), None)])
407404
.filter(Expr::IsNull(Box::new(col_expr("a"))))
408405
.build()
409406
.unwrap();
@@ -415,10 +412,7 @@ mod tests {
415412
fn test_resolve_count_not_null() {
416413
let stats = MockStats::new();
417414
let expr = datafusion_functions_aggregate::count::count_udaf()
418-
.call(vec![Expr::Wildcard {
419-
qualifier: None,
420-
options: Box::default(),
421-
}])
415+
.call(vec![Expr::Literal(ScalarValue::Boolean(Some(true)), None)])
422416
.filter(Expr::IsNotNull(Box::new(col_expr("a"))))
423417
.build()
424418
.unwrap();
@@ -449,8 +443,8 @@ mod tests {
449443

450444
#[test]
451445
fn test_resolve_in_list() {
452-
let stats =
453-
MockStats::new().with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
446+
let stats = MockStats::new()
447+
.with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
454448
let expr = Expr::InList(datafusion_expr::expr::InList::new(
455449
Box::new(col_expr("a")),
456450
vec![
@@ -463,14 +457,14 @@ mod tests {
463457
assert!(result.is_some());
464458
let arr = result.unwrap();
465459
let bool_arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
466-
assert_eq!(bool_arr.value(0), true);
467-
assert_eq!(bool_arr.value(1), false);
460+
assert!(bool_arr.value(0));
461+
assert!(!bool_arr.value(1));
468462
}
469463

470464
#[test]
471465
fn test_resolve_not_in_list() {
472-
let stats =
473-
MockStats::new().with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
466+
let stats = MockStats::new()
467+
.with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
474468
let expr = Expr::InList(datafusion_expr::expr::InList::new(
475469
Box::new(col_expr("a")),
476470
vec![Expr::Literal(ScalarValue::Int64(Some(1)), None)],
@@ -481,8 +475,8 @@ mod tests {
481475
let arr = result.unwrap();
482476
let bool_arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
483477
// Inverted: true→false, false→true
484-
assert_eq!(bool_arr.value(0), false);
485-
assert_eq!(bool_arr.value(1), true);
478+
assert!(!bool_arr.value(0));
479+
assert!(bool_arr.value(1));
486480
}
487481

488482
#[test]
@@ -506,7 +500,7 @@ mod tests {
506500
let stats = MockStats::new();
507501
let min_expr =
508502
datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr("a")]);
509-
let resolved = resolve_all_sync(&stats, &[min_expr.clone()]);
503+
let resolved = resolve_all_sync(&stats, std::slice::from_ref(&min_expr));
510504

511505
// Existing entry
512506
let arr = resolved.get_or_null(&min_expr, &DataType::Int64);

0 commit comments

Comments
 (0)