1818use std:: any:: Any ;
1919use std:: sync:: Arc ;
2020
21- use crate :: function:: error_utils:: {
22- invalid_arg_count_exec_err, unsupported_data_type_exec_err,
23- } ;
2421use arrow:: array:: { Array , StringArray } ;
2522use arrow:: datatypes:: DataType ;
2623use arrow:: {
2724 array:: { as_dictionary_array, as_largestring_array, as_string_array} ,
2825 datatypes:: Int32Type ,
2926} ;
27+ use datafusion_common:: cast:: as_large_binary_array;
3028use datafusion_common:: cast:: as_string_view_array;
29+ use datafusion_common:: types:: { logical_int64, logical_string, NativeType } ;
3130use datafusion_common:: utils:: take_function_args;
3231use datafusion_common:: {
3332 cast:: { as_binary_array, as_fixed_size_binary_array, as_int64_array} ,
3433 exec_err, DataFusionError ,
3534} ;
36- use datafusion_expr:: Signature ;
37- use datafusion_expr:: { ColumnarValue , ScalarFunctionArgs , ScalarUDFImpl , Volatility } ;
35+ use datafusion_expr:: {
36+ Coercion , ColumnarValue , ScalarFunctionArgs , ScalarUDFImpl , Signature , TypeSignature ,
37+ TypeSignatureClass , Volatility ,
38+ } ;
3839use std:: fmt:: Write ;
3940
4041/// <https://spark.apache.org/docs/latest/api/sql/index.html#hex>
@@ -52,8 +53,27 @@ impl Default for SparkHex {
5253
5354impl SparkHex {
5455 pub fn new ( ) -> Self {
56+ let int64 = Coercion :: new_implicit (
57+ TypeSignatureClass :: Native ( logical_int64 ( ) ) ,
58+ vec ! [ TypeSignatureClass :: Numeric ] ,
59+ NativeType :: Int64 ,
60+ ) ;
61+
62+ let string = Coercion :: new_exact ( TypeSignatureClass :: Native ( logical_string ( ) ) ) ;
63+
64+ let binary = Coercion :: new_exact ( TypeSignatureClass :: Binary ) ;
65+
66+ let variants = vec ! [
67+ // accepts numeric types
68+ TypeSignature :: Coercible ( vec![ int64] ) ,
69+ // accepts string types (Utf8, Utf8View, LargeUtf8)
70+ TypeSignature :: Coercible ( vec![ string] ) ,
71+ // accepts binary types (Binary, FixedSizeBinary, LargeBinary)
72+ TypeSignature :: Coercible ( vec![ binary] ) ,
73+ ] ;
74+
5575 Self {
56- signature : Signature :: user_defined ( Volatility :: Immutable ) ,
76+ signature : Signature :: one_of ( variants , Volatility :: Immutable ) ,
5777 aliases : vec ! [ ] ,
5878 }
5979 }
@@ -89,56 +109,6 @@ impl ScalarUDFImpl for SparkHex {
89109 fn aliases ( & self ) -> & [ String ] {
90110 & self . aliases
91111 }
92-
93- fn coerce_types (
94- & self ,
95- arg_types : & [ DataType ] ,
96- ) -> datafusion_common:: Result < Vec < DataType > > {
97- if arg_types. len ( ) != 1 {
98- return Err ( invalid_arg_count_exec_err ( "hex" , ( 1 , 1 ) , arg_types. len ( ) ) ) ;
99- }
100- match & arg_types[ 0 ] {
101- DataType :: Int64
102- | DataType :: Utf8
103- | DataType :: Utf8View
104- | DataType :: LargeUtf8
105- | DataType :: Binary
106- | DataType :: LargeBinary => Ok ( vec ! [ arg_types[ 0 ] . clone( ) ] ) ,
107- DataType :: Dictionary ( key_type, value_type) => match value_type. as_ref ( ) {
108- DataType :: Int64
109- | DataType :: Utf8
110- | DataType :: Utf8View
111- | DataType :: LargeUtf8
112- | DataType :: Binary
113- | DataType :: LargeBinary => Ok ( vec ! [ arg_types[ 0 ] . clone( ) ] ) ,
114- other => {
115- if other. is_numeric ( ) {
116- Ok ( vec ! [ DataType :: Dictionary (
117- key_type. clone( ) ,
118- Box :: new( DataType :: Int64 ) ,
119- ) ] )
120- } else {
121- Err ( unsupported_data_type_exec_err (
122- "hex" ,
123- "Numeric, String, or Binary" ,
124- & arg_types[ 0 ] ,
125- ) )
126- }
127- }
128- } ,
129- other => {
130- if other. is_numeric ( ) {
131- Ok ( vec ! [ DataType :: Int64 ] )
132- } else {
133- Err ( unsupported_data_type_exec_err (
134- "hex" ,
135- "Numeric, String, or Binary" ,
136- & arg_types[ 0 ] ,
137- ) )
138- }
139- }
140- }
141- }
142112}
143113
144114fn hex_int64 ( num : i64 ) -> String {
@@ -240,6 +210,16 @@ pub fn compute_hex(
240210
241211 Ok ( ColumnarValue :: Array ( Arc :: new ( hexed) ) )
242212 }
213+ DataType :: LargeBinary => {
214+ let array = as_large_binary_array ( array) ?;
215+
216+ let hexed: StringArray = array
217+ . iter ( )
218+ . map ( |v| v. map ( |b| hex_bytes ( b, lowercase) ) . transpose ( ) )
219+ . collect :: < Result < _ , _ > > ( ) ?;
220+
221+ Ok ( ColumnarValue :: Array ( Arc :: new ( hexed) ) )
222+ }
243223 DataType :: FixedSizeBinary ( _) => {
244224 let array = as_fixed_size_binary_array ( array) ?;
245225
0 commit comments