Skip to content

Commit 613f87d

Browse files
authored
minor: remove unused crypto functions & narrow public API (#20045)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> N/A ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> Removing dead code and remove functions from public API. ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> See comments. ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Existing tests. ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> Yes, some functions removed from public API, but they likely weren't intended to be in our public API. <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
1 parent b80bf2c commit 613f87d

2 files changed

Lines changed: 47 additions & 97 deletions

File tree

datafusion/functions/src/crypto/basic.rs

Lines changed: 4 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -17,73 +17,22 @@
1717

1818
//! "crypto" DataFusion functions
1919
20-
use arrow::array::{
21-
Array, ArrayRef, AsArray, BinaryArray, BinaryArrayType, StringViewArray,
22-
};
20+
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, BinaryArrayType};
2321
use arrow::datatypes::DataType;
2422
use blake2::{Blake2b512, Blake2s256, Digest};
2523
use blake3::Hasher as Blake3;
26-
use datafusion_common::cast::as_binary_array;
2724

2825
use arrow::compute::StringArrayType;
29-
use datafusion_common::{
30-
DataFusionError, Result, ScalarValue, exec_err, internal_err, plan_err,
31-
utils::take_function_args,
32-
};
26+
use datafusion_common::{DataFusionError, Result, ScalarValue, exec_err, plan_err};
3327
use datafusion_expr::ColumnarValue;
3428
use md5::Md5;
3529
use sha2::{Sha224, Sha256, Sha384, Sha512};
3630
use std::fmt;
3731
use std::str::FromStr;
3832
use std::sync::Arc;
3933

40-
macro_rules! define_digest_function {
41-
($NAME: ident, $METHOD: ident, $DOC: expr) => {
42-
#[doc = $DOC]
43-
pub fn $NAME(args: &[ColumnarValue]) -> Result<ColumnarValue> {
44-
let [data] = take_function_args(&DigestAlgorithm::$METHOD.to_string(), args)?;
45-
digest_process(data, DigestAlgorithm::$METHOD)
46-
}
47-
};
48-
}
49-
define_digest_function!(
50-
sha224,
51-
Sha224,
52-
"computes sha224 hash digest of the given input"
53-
);
54-
define_digest_function!(
55-
sha256,
56-
Sha256,
57-
"computes sha256 hash digest of the given input"
58-
);
59-
define_digest_function!(
60-
sha384,
61-
Sha384,
62-
"computes sha384 hash digest of the given input"
63-
);
64-
define_digest_function!(
65-
sha512,
66-
Sha512,
67-
"computes sha512 hash digest of the given input"
68-
);
69-
define_digest_function!(
70-
blake2b,
71-
Blake2b,
72-
"computes blake2b hash digest of the given input"
73-
);
74-
define_digest_function!(
75-
blake2s,
76-
Blake2s,
77-
"computes blake2s hash digest of the given input"
78-
);
79-
define_digest_function!(
80-
blake3,
81-
Blake3,
82-
"computes blake3 hash digest of the given input"
83-
);
84-
8534
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
86-
pub enum DigestAlgorithm {
35+
pub(crate) enum DigestAlgorithm {
8736
Md5,
8837
Sha224,
8938
Sha256,
@@ -135,44 +84,6 @@ impl fmt::Display for DigestAlgorithm {
13584
}
13685
}
13786

138-
/// computes md5 hash digest of the given input
139-
pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
140-
let [data] = take_function_args("md5", args)?;
141-
let value = digest_process(data, DigestAlgorithm::Md5)?;
142-
143-
// md5 requires special handling because of its unique utf8view return type
144-
Ok(match value {
145-
ColumnarValue::Array(array) => {
146-
let binary_array = as_binary_array(&array)?;
147-
let string_array: StringViewArray = binary_array
148-
.iter()
149-
.map(|opt| opt.map(hex_encode::<_>))
150-
.collect();
151-
ColumnarValue::Array(Arc::new(string_array))
152-
}
153-
ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
154-
ColumnarValue::Scalar(ScalarValue::Utf8View(opt.map(hex_encode::<_>)))
155-
}
156-
_ => return internal_err!("Impossibly got invalid results from digest"),
157-
})
158-
}
159-
160-
/// Hex encoding lookup table for fast byte-to-hex conversion
161-
const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef";
162-
163-
/// Fast hex encoding using a lookup table instead of format strings.
164-
/// This is significantly faster than using `write!("{:02x}")` for each byte.
165-
#[inline]
166-
fn hex_encode<T: AsRef<[u8]>>(data: T) -> String {
167-
let bytes = data.as_ref();
168-
let mut s = String::with_capacity(bytes.len() * 2);
169-
for &b in bytes {
170-
s.push(HEX_CHARS_LOWER[(b >> 4) as usize] as char);
171-
s.push(HEX_CHARS_LOWER[(b & 0x0f) as usize] as char);
172-
}
173-
s
174-
}
175-
17687
macro_rules! digest_to_array {
17788
($METHOD:ident, $INPUT:expr) => {{
17889
let binary_array: BinaryArray = $INPUT
@@ -269,7 +180,7 @@ impl DigestAlgorithm {
269180
}
270181
}
271182

272-
pub fn digest_process(
183+
pub(crate) fn digest_process(
273184
value: &ColumnarValue,
274185
digest_algorithm: DigestAlgorithm,
275186
) -> Result<ColumnarValue> {

datafusion/functions/src/crypto/md5.rs

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,23 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::crypto::basic::md5;
19-
use arrow::datatypes::DataType;
18+
use arrow::{array::StringViewArray, datatypes::DataType};
2019
use datafusion_common::{
21-
Result,
20+
Result, ScalarValue,
21+
cast::as_binary_array,
22+
internal_err,
2223
types::{logical_binary, logical_string},
24+
utils::take_function_args,
2325
};
2426
use datafusion_expr::{
2527
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
2628
TypeSignature, Volatility,
2729
};
2830
use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
2931
use datafusion_macros::user_doc;
30-
use std::any::Any;
32+
use std::{any::Any, sync::Arc};
33+
34+
use crate::crypto::basic::{DigestAlgorithm, digest_process};
3135

3236
#[user_doc(
3337
doc_section(label = "Hashing Functions"),
@@ -97,3 +101,38 @@ impl ScalarUDFImpl for Md5Func {
97101
self.doc()
98102
}
99103
}
104+
105+
/// Hex encoding lookup table for fast byte-to-hex conversion
106+
const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef";
107+
108+
/// Fast hex encoding using a lookup table instead of format strings.
109+
/// This is significantly faster than using `write!("{:02x}")` for each byte.
110+
#[inline]
111+
fn hex_encode(data: impl AsRef<[u8]>) -> String {
112+
let bytes = data.as_ref();
113+
let mut s = String::with_capacity(bytes.len() * 2);
114+
for &b in bytes {
115+
s.push(HEX_CHARS_LOWER[(b >> 4) as usize] as char);
116+
s.push(HEX_CHARS_LOWER[(b & 0x0f) as usize] as char);
117+
}
118+
s
119+
}
120+
121+
fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
122+
let [data] = take_function_args("md5", args)?;
123+
let value = digest_process(data, DigestAlgorithm::Md5)?;
124+
125+
// md5 requires special handling because of its unique utf8view return type
126+
Ok(match value {
127+
ColumnarValue::Array(array) => {
128+
let binary_array = as_binary_array(&array)?;
129+
let string_array: StringViewArray =
130+
binary_array.iter().map(|opt| opt.map(hex_encode)).collect();
131+
ColumnarValue::Array(Arc::new(string_array))
132+
}
133+
ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
134+
ColumnarValue::Scalar(ScalarValue::Utf8View(opt.map(hex_encode)))
135+
}
136+
_ => return internal_err!("Impossibly got invalid results from digest"),
137+
})
138+
}

0 commit comments

Comments
 (0)