1818//! Basic min/max functionality shared across DataFusion aggregate functions
1919
2020use arrow:: array:: {
21- ArrayRef , AsArray as _ , BinaryArray , BinaryViewArray , BooleanArray , Date32Array ,
22- Date64Array , Decimal32Array , Decimal64Array , Decimal128Array , Decimal256Array ,
21+ ArrayRef , BinaryArray , BinaryViewArray , BooleanArray , Date32Array , Date64Array ,
22+ Decimal32Array , Decimal64Array , Decimal128Array , Decimal256Array ,
2323 DurationMicrosecondArray , DurationMillisecondArray , DurationNanosecondArray ,
2424 DurationSecondArray , FixedSizeBinaryArray , Float16Array , Float32Array , Float64Array ,
2525 Int8Array , Int16Array , Int32Array , Int64Array , IntervalDayTimeArray ,
@@ -457,13 +457,23 @@ macro_rules! min_max {
457457
458458fn dictionary_batch_extreme (
459459 values : & ArrayRef ,
460- extreme_fn : fn ( & ArrayRef ) -> Result < ScalarValue > ,
460+ ordering : Ordering ,
461461) -> Result < ScalarValue > {
462- let DataType :: Dictionary ( key_type, _) = values. data_type ( ) else {
463- unreachable ! ( "dictionary_batch_extreme requires dictionary arrays" )
464- } ;
465- let inner = extreme_fn ( values. as_any_dictionary ( ) . values ( ) ) ?;
466- Ok ( wrap_dictionary_scalar ( key_type. as_ref ( ) , inner) )
462+ let mut extreme: Option < ScalarValue > = None ;
463+
464+ for i in 0 ..values. len ( ) {
465+ let current = ScalarValue :: try_from_array ( values, i) ?;
466+ if current. is_null ( ) {
467+ continue ;
468+ }
469+
470+ match & extreme {
471+ Some ( existing) if existing. try_cmp ( & current) ? != ordering => { }
472+ _ => extreme = Some ( current) ,
473+ }
474+ }
475+
476+ extreme. map_or_else ( || ScalarValue :: try_from ( values. data_type ( ) ) , Ok )
467477}
468478
469479fn wrap_dictionary_scalar ( key_type : & DataType , value : ScalarValue ) -> ScalarValue {
@@ -813,7 +823,9 @@ pub fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
813823 DataType :: FixedSizeList ( _, _) => {
814824 min_max_batch_generic ( values, Ordering :: Greater ) ?
815825 }
816- DataType :: Dictionary ( _, _) => dictionary_batch_extreme ( values, min_batch) ?,
826+ DataType :: Dictionary ( _, _) => {
827+ dictionary_batch_extreme ( values, Ordering :: Greater ) ?
828+ }
817829 _ => min_max_batch ! ( values, min) ,
818830 } )
819831}
@@ -828,7 +840,10 @@ fn min_max_batch_generic(array: &ArrayRef, ordering: Ordering) -> Result<ScalarV
828840 let mut extreme = ScalarValue :: try_from_array ( array, first_idx) ?;
829841 for i in non_null_indices {
830842 let current = ScalarValue :: try_from_array ( array, i) ?;
831- if extreme. try_cmp ( & current) ? == ordering {
843+ if current. is_null ( ) {
844+ continue ;
845+ }
846+ if extreme. is_null ( ) || extreme. try_cmp ( & current) ? == ordering {
832847 extreme = current;
833848 }
834849 }
@@ -885,7 +900,7 @@ pub fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
885900 DataType :: List ( _) => min_max_batch_generic ( values, Ordering :: Less ) ?,
886901 DataType :: LargeList ( _) => min_max_batch_generic ( values, Ordering :: Less ) ?,
887902 DataType :: FixedSizeList ( _, _) => min_max_batch_generic ( values, Ordering :: Less ) ?,
888- DataType :: Dictionary ( _, _) => dictionary_batch_extreme ( values, max_batch ) ?,
903+ DataType :: Dictionary ( _, _) => dictionary_batch_extreme ( values, Ordering :: Less ) ?,
889904 _ => min_max_batch ! ( values, max) ,
890905 } )
891906}
0 commit comments