Skip to content

Commit 10c933c

Browse files
authored
[datafusion-spark]: Refactor hex's signature away from user_defined (#19235)
## Which issue does this PR close? - part of #12725 ## Rationale for this change moving `hex` away from a user defined signature as it is out of scope and added a UT for checking correct number of args ## Are these changes tested? existing tests ## Are there any user-facing changes? yes
1 parent 8601151 commit 10c933c

3 files changed

Lines changed: 52 additions & 57 deletions

File tree

datafusion/expr-common/src/signature.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ pub enum TypeSignatureClass {
337337
Float,
338338
Decimal,
339339
Numeric,
340-
/// Encompasses both the native Binary as well as arbitrarily sized FixedSizeBinary types
340+
/// Encompasses both the native Binary/LargeBinary types as well as arbitrarily sized FixedSizeBinary types
341341
Binary,
342342
}
343343

datafusion/spark/src/function/math/hex.rs

Lines changed: 36 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,24 @@
1818
use std::any::Any;
1919
use std::sync::Arc;
2020

21-
use crate::function::error_utils::{
22-
invalid_arg_count_exec_err, unsupported_data_type_exec_err,
23-
};
2421
use arrow::array::{Array, StringArray};
2522
use arrow::datatypes::DataType;
2623
use arrow::{
2724
array::{as_dictionary_array, as_largestring_array, as_string_array},
2825
datatypes::Int32Type,
2926
};
27+
use datafusion_common::cast::as_large_binary_array;
3028
use datafusion_common::cast::as_string_view_array;
29+
use datafusion_common::types::{logical_int64, logical_string, NativeType};
3130
use datafusion_common::utils::take_function_args;
3231
use datafusion_common::{
3332
cast::{as_binary_array, as_fixed_size_binary_array, as_int64_array},
3433
exec_err, DataFusionError,
3534
};
36-
use datafusion_expr::Signature;
37-
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Volatility};
35+
use datafusion_expr::{
36+
Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
37+
TypeSignatureClass, Volatility,
38+
};
3839
use std::fmt::Write;
3940

4041
/// <https://spark.apache.org/docs/latest/api/sql/index.html#hex>
@@ -52,8 +53,27 @@ impl Default for SparkHex {
5253

5354
impl SparkHex {
5455
pub fn new() -> Self {
56+
let int64 = Coercion::new_implicit(
57+
TypeSignatureClass::Native(logical_int64()),
58+
vec![TypeSignatureClass::Numeric],
59+
NativeType::Int64,
60+
);
61+
62+
let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string()));
63+
64+
let binary = Coercion::new_exact(TypeSignatureClass::Binary);
65+
66+
let variants = vec![
67+
// accepts numeric types
68+
TypeSignature::Coercible(vec![int64]),
69+
// accepts string types (Utf8, Utf8View, LargeUtf8)
70+
TypeSignature::Coercible(vec![string]),
71+
// accepts binary types (Binary, FixedSizeBinary, LargeBinary)
72+
TypeSignature::Coercible(vec![binary]),
73+
];
74+
5575
Self {
56-
signature: Signature::user_defined(Volatility::Immutable),
76+
signature: Signature::one_of(variants, Volatility::Immutable),
5777
aliases: vec![],
5878
}
5979
}
@@ -89,56 +109,6 @@ impl ScalarUDFImpl for SparkHex {
89109
fn aliases(&self) -> &[String] {
90110
&self.aliases
91111
}
92-
93-
fn coerce_types(
94-
&self,
95-
arg_types: &[DataType],
96-
) -> datafusion_common::Result<Vec<DataType>> {
97-
if arg_types.len() != 1 {
98-
return Err(invalid_arg_count_exec_err("hex", (1, 1), arg_types.len()));
99-
}
100-
match &arg_types[0] {
101-
DataType::Int64
102-
| DataType::Utf8
103-
| DataType::Utf8View
104-
| DataType::LargeUtf8
105-
| DataType::Binary
106-
| DataType::LargeBinary => Ok(vec![arg_types[0].clone()]),
107-
DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
108-
DataType::Int64
109-
| DataType::Utf8
110-
| DataType::Utf8View
111-
| DataType::LargeUtf8
112-
| DataType::Binary
113-
| DataType::LargeBinary => Ok(vec![arg_types[0].clone()]),
114-
other => {
115-
if other.is_numeric() {
116-
Ok(vec![DataType::Dictionary(
117-
key_type.clone(),
118-
Box::new(DataType::Int64),
119-
)])
120-
} else {
121-
Err(unsupported_data_type_exec_err(
122-
"hex",
123-
"Numeric, String, or Binary",
124-
&arg_types[0],
125-
))
126-
}
127-
}
128-
},
129-
other => {
130-
if other.is_numeric() {
131-
Ok(vec![DataType::Int64])
132-
} else {
133-
Err(unsupported_data_type_exec_err(
134-
"hex",
135-
"Numeric, String, or Binary",
136-
&arg_types[0],
137-
))
138-
}
139-
}
140-
}
141-
}
142112
}
143113

144114
fn hex_int64(num: i64) -> String {
@@ -240,6 +210,16 @@ pub fn compute_hex(
240210

241211
Ok(ColumnarValue::Array(Arc::new(hexed)))
242212
}
213+
DataType::LargeBinary => {
214+
let array = as_large_binary_array(array)?;
215+
216+
let hexed: StringArray = array
217+
.iter()
218+
.map(|v| v.map(|b| hex_bytes(b, lowercase)).transpose())
219+
.collect::<Result<_, _>>()?;
220+
221+
Ok(ColumnarValue::Array(Arc::new(hexed)))
222+
}
243223
DataType::FixedSizeBinary(_) => {
244224
let array = as_fixed_size_binary_array(array)?;
245225

datafusion/sqllogictest/test_files/spark/math/hex.slt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,18 @@ SELECT hex(column1) FROM t_utf8view;
4848
666F6F
4949
NULL
5050
666F6F62617262617A
51+
52+
query T
53+
SELECT hex(column1) FROM VALUES (arrow_cast('hello', 'LargeBinary')), (NULL), (arrow_cast('world', 'LargeBinary'));
54+
----
55+
68656C6C6F
56+
NULL
57+
776F726C64
58+
59+
statement error Function 'hex' expects 1 arguments but received 2
60+
SELECT hex(1, 2);
61+
62+
query T
63+
SELECT hex(arrow_cast('test', 'LargeBinary')) as lar_b;
64+
----
65+
74657374

0 commit comments

Comments
 (0)