Skip to content

Commit a34ddf1

Browse files
committed
docs: update helper documentation in min_max.rs for dictionary routing responsibility
- Clarified caller responsibility for dictionary routing in the helper docs. - Maintained caller-side correctness routing in min_max.rs at lines 820 and 906. - Added regression coverage with a new test in min_max.rs at line 996 to ensure min/max are computed from logical rows instead of raw dictionary values, including unreferenced entries and null-key positions.
1 parent 0b8592d commit a34ddf1

1 file changed

Lines changed: 25 additions & 3 deletions

File tree

  • datafusion/functions-aggregate-common/src

datafusion/functions-aggregate-common/src/min_max.rs

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -824,9 +824,9 @@ pub fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
824824

825825
/// Finds the min/max by scanning logical rows via `ScalarValue::try_from_array`.
826826
///
827-
/// This path is required for dictionary arrays because comparing
828-
/// `dictionary.values()` is not semantically correct: it can include
829-
/// unreferenced values and ignore null key positions.
827+
/// Callers are responsible for routing dictionary arrays to this helper.
828+
/// Passing `dictionary.values()` is semantically incorrect because it can
829+
/// include unreferenced dictionary entries and ignore null key positions.
830830
fn min_max_batch_generic(values: &ArrayRef, ordering: Ordering) -> Result<ScalarValue> {
831831
let mut index = 0;
832832
let mut extreme = loop {
@@ -911,6 +911,8 @@ pub fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
911911
#[cfg(test)]
912912
mod tests {
913913
use super::*;
914+
use arrow::array::DictionaryArray;
915+
use std::sync::Arc;
914916

915917
#[test]
916918
fn min_max_dictionary_and_scalar_compare_by_inner_value() -> Result<()> {
@@ -989,4 +991,24 @@ mod tests {
989991
);
990992
Ok(())
991993
}
994+
995+
#[test]
996+
fn min_max_batch_dictionary_uses_logical_rows() -> Result<()> {
997+
let keys = Int8Array::from(vec![Some(1), None, Some(1), Some(1)]);
998+
let values = Arc::new(StringArray::from(vec!["zzz", "bbb", "aaa"]));
999+
let array = Arc::new(DictionaryArray::new(keys, values)) as ArrayRef;
1000+
1001+
let min = min_batch(&array)?;
1002+
let max = max_batch(&array)?;
1003+
1004+
let expected = ScalarValue::Dictionary(
1005+
Box::new(DataType::Int8),
1006+
Box::new(ScalarValue::Utf8(Some("bbb".to_string()))),
1007+
);
1008+
1009+
assert_eq!(min, expected);
1010+
assert_eq!(max, expected);
1011+
1012+
Ok(())
1013+
}
9921014
}

0 commit comments

Comments
 (0)