Skip to content

Commit 314acd5

Browse files
adriangbclaude
andcommitted
fix: address CI lint warnings (clippy, rustdoc, deprecated Wildcard)
- Fix broken intra-doc links for Expr, ResolvedStatistics, PruningPredicate - Replace deprecated Expr::Wildcard with Expr::Literal in count expressions - Fix clippy: collapsible if, bool_assert_comparison, uninlined_format_args, cloned_ref_to_slice_refs - Fix unused variable warning in test Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 91fe94e commit 314acd5

2 files changed

Lines changed: 59 additions & 51 deletions

File tree

datafusion/pruning/src/pruning_predicate.rs

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,10 @@ impl PruningPredicate {
578578
}
579579

580580
// Phase 2: Min/max/null_count/row_count predicate
581-
let statistics_batch =
582-
build_statistics_record_batch_from_resolved(resolved, &self.required_columns)?;
581+
let statistics_batch = build_statistics_record_batch_from_resolved(
582+
resolved,
583+
&self.required_columns,
584+
)?;
583585
builder.combine_value(self.predicate_expr.evaluate(&statistics_batch)?);
584586

585587
Ok(builder.build())
@@ -1989,7 +1991,7 @@ fn wrap_null_count_check_expr(
19891991
)))
19901992
}
19911993

1992-
/// Convert a [`StatisticsType`] + column into the corresponding logical [`Expr`].
1994+
/// Convert a [`StatisticsType`] + column into the corresponding logical expression.
19931995
fn stat_type_to_expr(
19941996
column: &phys_expr::Column,
19951997
stat_type: StatisticsType,
@@ -1998,12 +2000,10 @@ fn stat_type_to_expr(
19982000
let col_expr = LExpr::Column(Column::new_unqualified(column.name()));
19992001
match stat_type {
20002002
StatisticsType::Min => {
2001-
datafusion_functions_aggregate::min_max::min_udaf()
2002-
.call(vec![col_expr])
2003+
datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr])
20032004
}
20042005
StatisticsType::Max => {
2005-
datafusion_functions_aggregate::min_max::max_udaf()
2006-
.call(vec![col_expr])
2006+
datafusion_functions_aggregate::min_max::max_udaf().call(vec![col_expr])
20072007
}
20082008
StatisticsType::NullCount => {
20092009
let count_expr = datafusion_functions_aggregate::count::count_udaf()
@@ -2040,7 +2040,7 @@ fn literal_guarantee_to_in_list(
20402040
))
20412041
}
20422042

2043-
/// Build a statistics [`RecordBatch`] from a [`ResolvedStatistics`] cache,
2043+
/// Build a statistics [`RecordBatch`] from a [`crate::ResolvedStatistics`] cache,
20442044
/// looking up each required column's expression and falling back to null
20452045
/// arrays for missing entries.
20462046
fn build_statistics_record_batch_from_resolved(
@@ -2072,7 +2072,9 @@ fn build_statistics_record_batch_from_resolved(
20722072
let mut options = RecordBatchOptions::default();
20732073
options.row_count = Some(num_containers);
20742074

2075-
trace!("Creating statistics batch from resolved for {required_columns:#?} with {arrays:#?}");
2075+
trace!(
2076+
"Creating statistics batch from resolved for {required_columns:#?} with {arrays:#?}"
2077+
);
20762078

20772079
RecordBatch::try_new_with_options(schema, arrays, &options).map_err(|err| {
20782080
plan_datafusion_err!("Can not create statistics record batch: {err}")
@@ -5589,12 +5591,14 @@ mod tests {
55895591
"i",
55905592
ContainerStats::new_i32(
55915593
vec![Some(1), Some(6), Some(3)], // min
5592-
vec![Some(4), Some(10), Some(8)], // max
5594+
vec![Some(4), Some(10), Some(8)], // max
55935595
),
55945596
);
55955597

55965598
let expr = col("i").gt(lit(5i32));
5597-
let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
5599+
let p =
5600+
PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
5601+
.unwrap();
55985602

55995603
let prune_result = p.prune(&statistics).unwrap();
56005604
let resolved = crate::statistics::resolve_all_sync(
@@ -5616,17 +5620,16 @@ mod tests {
56165620
let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
56175621
let statistics = TestStatistics::new().with(
56185622
"i",
5619-
ContainerStats::new_i32(
5620-
vec![Some(0), Some(0)],
5621-
vec![Some(0), Some(0)],
5622-
)
5623-
.with_null_counts(vec![Some(10), Some(0)])
5624-
.with_row_counts(vec![Some(10), Some(10)]),
5623+
ContainerStats::new_i32(vec![Some(0), Some(0)], vec![Some(0), Some(0)])
5624+
.with_null_counts(vec![Some(10), Some(0)])
5625+
.with_row_counts(vec![Some(10), Some(10)]),
56255626
);
56265627

56275628
// i = 0: first container is all nulls, should be pruned
56285629
let expr = col("i").eq(lit(0i32));
5629-
let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
5630+
let p =
5631+
PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
5632+
.unwrap();
56305633

56315634
let prune_result = p.prune(&statistics).unwrap();
56325635
let resolved = crate::statistics::resolve_all_sync(
@@ -5642,16 +5645,15 @@ mod tests {
56425645
#[test]
56435646
fn test_evaluate_missing_cache_entries() {
56445647
let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
5645-
let statistics = TestStatistics::new().with(
5648+
let _statistics = TestStatistics::new().with(
56465649
"i",
5647-
ContainerStats::new_i32(
5648-
vec![Some(1), Some(6)],
5649-
vec![Some(4), Some(10)],
5650-
),
5650+
ContainerStats::new_i32(vec![Some(1), Some(6)], vec![Some(4), Some(10)]),
56515651
);
56525652

56535653
let expr = col("i").gt(lit(5i32));
5654-
let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
5654+
let p =
5655+
PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
5656+
.unwrap();
56555657

56565658
// Empty resolved stats — everything should be kept (conservative)
56575659
let resolved = crate::statistics::ResolvedStatistics::new_empty(2);
@@ -5664,16 +5666,28 @@ mod tests {
56645666
fn test_all_required_expressions() {
56655667
let schema = Schema::new(vec![Field::new("i", DataType::Int32, true)]);
56665668
let expr = col("i").eq(lit(5i32));
5667-
let p = PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema)).unwrap();
5669+
let p =
5670+
PruningPredicate::try_new(logical2physical(&expr, &schema), Arc::new(schema))
5671+
.unwrap();
56685672

56695673
let exprs = p.all_required_expressions();
56705674
// i = 5 requires: min(i), max(i), count(*) filter (where i is null),
56715675
// count(*) filter (where i is not null)
5672-
assert!(exprs.len() >= 2, "Expected at least min and max, got {}", exprs.len());
5676+
assert!(
5677+
exprs.len() >= 2,
5678+
"Expected at least min and max, got {}",
5679+
exprs.len()
5680+
);
56735681

56745682
// Check that we have min and max expressions
56755683
let expr_strings: Vec<String> = exprs.iter().map(|e| e.to_string()).collect();
5676-
assert!(expr_strings.iter().any(|s| s.contains("min")), "Expected min expr in {:?}", expr_strings);
5677-
assert!(expr_strings.iter().any(|s| s.contains("max")), "Expected max expr in {:?}", expr_strings);
5684+
assert!(
5685+
expr_strings.iter().any(|s| s.contains("min")),
5686+
"Expected min expr in {expr_strings:?}"
5687+
);
5688+
assert!(
5689+
expr_strings.iter().any(|s| s.contains("max")),
5690+
"Expected max expr in {expr_strings:?}"
5691+
);
56785692
}
56795693
}

datafusion/pruning/src/statistics.rs

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::{new_null_array, ArrayRef};
18+
use arrow::array::{ArrayRef, new_null_array};
1919
use datafusion_common::pruning::PruningStatistics;
2020
use datafusion_expr::Expr;
2121
use std::collections::{HashMap, HashSet};
@@ -127,7 +127,7 @@ impl<T: PruningStatistics + Send + Sync> StatisticsSource for T {
127127
/// [`PruningPredicate::evaluate`].
128128
///
129129
/// Keyed by [`Expr`] so that a single cache can serve multiple
130-
/// [`PruningPredicate`] instances (e.g., after dynamic filter changes
130+
/// [`PruningPredicate`](crate::PruningPredicate) instances (e.g., after dynamic filter changes
131131
/// rebuild the predicate but reuse the same resolved stats).
132132
/// Missing entries are treated as unknown — safe for pruning
133133
/// (the predicate will conservatively keep the container).
@@ -377,8 +377,8 @@ mod tests {
377377
#[test]
378378
fn test_resolve_min() {
379379
let stats = MockStats::new();
380-
let expr = datafusion_functions_aggregate::min_max::min_udaf()
381-
.call(vec![col_expr("a")]);
380+
let expr =
381+
datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr("a")]);
382382
let result = resolve_expression_sync(&stats, &expr);
383383
assert!(result.is_some());
384384
let arr = result.unwrap();
@@ -388,8 +388,8 @@ mod tests {
388388
#[test]
389389
fn test_resolve_max() {
390390
let stats = MockStats::new();
391-
let expr = datafusion_functions_aggregate::min_max::max_udaf()
392-
.call(vec![col_expr("a")]);
391+
let expr =
392+
datafusion_functions_aggregate::min_max::max_udaf().call(vec![col_expr("a")]);
393393
let result = resolve_expression_sync(&stats, &expr);
394394
assert!(result.is_some());
395395
let arr = result.unwrap();
@@ -400,10 +400,7 @@ mod tests {
400400
fn test_resolve_count_null() {
401401
let stats = MockStats::new();
402402
let expr = datafusion_functions_aggregate::count::count_udaf()
403-
.call(vec![Expr::Wildcard {
404-
qualifier: None,
405-
options: Box::default(),
406-
}])
403+
.call(vec![Expr::Literal(ScalarValue::Boolean(Some(true)), None)])
407404
.filter(Expr::IsNull(Box::new(col_expr("a"))))
408405
.build()
409406
.unwrap();
@@ -415,10 +412,7 @@ mod tests {
415412
fn test_resolve_count_not_null() {
416413
let stats = MockStats::new();
417414
let expr = datafusion_functions_aggregate::count::count_udaf()
418-
.call(vec![Expr::Wildcard {
419-
qualifier: None,
420-
options: Box::default(),
421-
}])
415+
.call(vec![Expr::Literal(ScalarValue::Boolean(Some(true)), None)])
422416
.filter(Expr::IsNotNull(Box::new(col_expr("a"))))
423417
.build()
424418
.unwrap();
@@ -449,8 +443,8 @@ mod tests {
449443

450444
#[test]
451445
fn test_resolve_in_list() {
452-
let stats =
453-
MockStats::new().with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
446+
let stats = MockStats::new()
447+
.with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
454448
let expr = Expr::InList(datafusion_expr::expr::InList::new(
455449
Box::new(col_expr("a")),
456450
vec![
@@ -463,14 +457,14 @@ mod tests {
463457
assert!(result.is_some());
464458
let arr = result.unwrap();
465459
let bool_arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
466-
assert_eq!(bool_arr.value(0), true);
467-
assert_eq!(bool_arr.value(1), false);
460+
assert!(bool_arr.value(0));
461+
assert!(!bool_arr.value(1));
468462
}
469463

470464
#[test]
471465
fn test_resolve_not_in_list() {
472-
let stats =
473-
MockStats::new().with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
466+
let stats = MockStats::new()
467+
.with_contained(BooleanArray::from(vec![Some(true), Some(false)]));
474468
let expr = Expr::InList(datafusion_expr::expr::InList::new(
475469
Box::new(col_expr("a")),
476470
vec![Expr::Literal(ScalarValue::Int64(Some(1)), None)],
@@ -481,8 +475,8 @@ mod tests {
481475
let arr = result.unwrap();
482476
let bool_arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
483477
// Inverted: true→false, false→true
484-
assert_eq!(bool_arr.value(0), false);
485-
assert_eq!(bool_arr.value(1), true);
478+
assert!(!bool_arr.value(0));
479+
assert!(bool_arr.value(1));
486480
}
487481

488482
#[test]
@@ -506,7 +500,7 @@ mod tests {
506500
let stats = MockStats::new();
507501
let min_expr =
508502
datafusion_functions_aggregate::min_max::min_udaf().call(vec![col_expr("a")]);
509-
let resolved = resolve_all_sync(&stats, &[min_expr.clone()]);
503+
let resolved = resolve_all_sync(&stats, std::slice::from_ref(&min_expr));
510504

511505
// Existing entry
512506
let arr = resolved.get_or_null(&min_expr, &DataType::Int64);

0 commit comments

Comments
 (0)