Skip to content

Commit b6dcd45

Browse files
committed
move tests to slt file
1 parent 6cb99a7 commit b6dcd45

2 files changed

Lines changed: 59 additions & 122 deletions

File tree

datafusion/spark/src/function/string/encode.rs

Lines changed: 2 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,8 @@ impl ScalarUDFImpl for SparkEncode {
337337
#[cfg(test)]
338338
mod tests {
339339
use super::*;
340-
use arrow::array::{Array, BinaryArray, StringArray, StringViewArray};
340+
use arrow::array::{BinaryArray, StringViewArray};
341341
use datafusion_common::config::ConfigOptions;
342-
use datafusion_expr::ScalarUDF;
343342

344343
/// Helper to invoke encode as a scalar with two literal string arguments.
345344
fn eval_encode_scalar_with_ansi(
@@ -376,37 +375,6 @@ mod tests {
376375
}
377376
}
378377

379-
#[test]
380-
fn test_encode_utf8() {
381-
let result =
382-
eval_encode_scalar(ScalarValue::Utf8(Some("Spark SQL".into())), "UTF-8")
383-
.unwrap();
384-
let bytes = expect_binary_scalar(result);
385-
assert_eq!(bytes, b"Spark SQL");
386-
}
387-
388-
#[test]
389-
fn test_encode_us_ascii() {
390-
let result =
391-
eval_encode_scalar(ScalarValue::Utf8(Some("Hello".into())), "US-ASCII")
392-
.unwrap();
393-
assert_eq!(expect_binary_scalar(result), b"Hello");
394-
}
395-
396-
#[test]
397-
fn test_encode_iso_8859_1() {
398-
// "naïve" — U+00EF (ï) is 0xEF in ISO-8859-1
399-
let result = eval_encode_scalar(
400-
ScalarValue::Utf8(Some("na\u{00EF}ve".into())),
401-
"ISO-8859-1",
402-
)
403-
.unwrap();
404-
assert_eq!(
405-
expect_binary_scalar(result),
406-
vec![0x6E, 0x61, 0xEF, 0x76, 0x65]
407-
);
408-
}
409-
410378
#[test]
411379
fn test_encode_utf16() {
412380
let result =
@@ -420,40 +388,13 @@ mod tests {
420388
);
421389
}
422390

423-
#[test]
424-
fn test_encode_utf16be() {
425-
let result =
426-
eval_encode_scalar(ScalarValue::Utf8(Some("AB".into())), "UTF-16BE").unwrap();
427-
assert_eq!(expect_binary_scalar(result), vec![0x00, 0x41, 0x00, 0x42]);
428-
}
429-
430391
#[test]
431392
fn test_encode_utf16le() {
432393
let result =
433394
eval_encode_scalar(ScalarValue::Utf8(Some("AB".into())), "UTF-16LE").unwrap();
434395
assert_eq!(expect_binary_scalar(result), vec![0x41, 0x00, 0x42, 0x00]);
435396
}
436397

437-
#[test]
438-
fn test_encode_case_insensitive_charset() {
439-
let result =
440-
eval_encode_scalar(ScalarValue::Utf8(Some("hello".into())), "utf-8").unwrap();
441-
assert_eq!(expect_binary_scalar(result), b"hello");
442-
}
443-
444-
#[test]
445-
fn test_encode_unsupported_charset() {
446-
let result =
447-
eval_encode_scalar(ScalarValue::Utf8(Some("hello".into())), "EBCDIC");
448-
assert!(result.is_err());
449-
assert!(
450-
result
451-
.unwrap_err()
452-
.to_string()
453-
.contains("Unsupported charset")
454-
);
455-
}
456-
457398
#[test]
458399
fn test_encode_ascii_unmappable_legacy_mode() {
459400
// Legacy mode: non-ASCII chars replaced with '?'
@@ -500,34 +441,6 @@ mod tests {
500441
assert!(result.unwrap_err().to_string().contains("cannot encode"));
501442
}
502443

503-
#[test]
504-
fn test_encode_null_input() {
505-
let func = SparkEncode::new();
506-
let arr: ArrayRef =
507-
Arc::new(StringArray::from(vec![Some("hello"), None, Some("world")]));
508-
let result = func
509-
.invoke_with_args(ScalarFunctionArgs {
510-
args: vec![
511-
ColumnarValue::Array(arr),
512-
ColumnarValue::Scalar(ScalarValue::Utf8(Some("UTF-8".into()))),
513-
],
514-
arg_fields: vec![
515-
Arc::new(Field::new("input", DataType::Utf8, true)),
516-
Arc::new(Field::new("charset", DataType::Utf8, false)),
517-
],
518-
number_rows: 3,
519-
return_field: Arc::new(Field::new("encode", DataType::Binary, true)),
520-
config_options: Arc::new(ConfigOptions::default()),
521-
})
522-
.unwrap();
523-
524-
let arr = result.into_array(3).unwrap();
525-
let binary = arr.as_any().downcast_ref::<BinaryArray>().unwrap();
526-
assert_eq!(binary.value(0), b"hello");
527-
assert!(binary.is_null(1));
528-
assert_eq!(binary.value(2), b"world");
529-
}
530-
531444
#[test]
532445
fn test_encode_utf8view_column() {
533446
let func = SparkEncode::new();
@@ -579,7 +492,7 @@ mod tests {
579492
}
580493

581494
#[test]
582-
fn test_return_field_nullable() {
495+
fn test_encode_return_field_nullable() {
583496
let func = SparkEncode::new();
584497

585498
let nullable = func
@@ -606,18 +519,6 @@ mod tests {
606519
assert!(!non_nullable.is_nullable());
607520
}
608521

609-
#[test]
610-
fn test_function_name() {
611-
let func = SparkEncode::new();
612-
assert_eq!(func.name(), "encode");
613-
}
614-
615-
#[test]
616-
fn test_udf_registration() {
617-
let udf = ScalarUDF::from(SparkEncode::new());
618-
assert_eq!(udf.name(), "encode");
619-
}
620-
621522
#[test]
622523
fn test_encode_large_binary_input() {
623524
let result = eval_encode_scalar(
@@ -671,17 +572,6 @@ mod tests {
671572
);
672573
}
673574

674-
#[test]
675-
fn test_encode_utf32be() {
676-
// 'A' = U+0041 → 00 00 00 41, 'B' = U+0042 → 00 00 00 42
677-
let result =
678-
eval_encode_scalar(ScalarValue::Utf8(Some("AB".into())), "UTF-32BE").unwrap();
679-
assert_eq!(
680-
expect_binary_scalar(result),
681-
vec![0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x42]
682-
);
683-
}
684-
685575
#[test]
686576
fn test_encode_utf32le() {
687577
// 'A' = U+0041 → 41 00 00 00, 'B' = U+0042 → 42 00 00 00

datafusion/sqllogictest/test_files/spark/string/encode.slt

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,60 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
# This file was originally created by a porting script from:
19-
# https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function
20-
# This file is part of the implementation of the datafusion-spark function library.
21-
# For more information, please see:
22-
# https://github.com/apache/datafusion/issues/15914
23-
24-
## Original Query: SELECT encode('abc', 'utf-8');
25-
## PySpark 3.5.5 Result: {'encode(abc, utf-8)': bytearray(b'abc'), 'typeof(encode(abc, utf-8))': 'binary', 'typeof(abc)': 'string', 'typeof(utf-8)': 'string'}
26-
#query
27-
#SELECT encode('abc'::string, 'utf-8'::string);
18+
# UTF-8 encoding
19+
query ?
20+
SELECT encode('Spark SQL'::string, 'utf-8'::string);
21+
----
22+
537061726b2053514c
23+
24+
# US-ASCII encoding
25+
query ?
26+
SELECT encode('Hello'::string, 'us-ascii'::string);
27+
----
28+
48656c6c6f
29+
30+
# ISO-8859-1 encoding (ï = 0xEF in ISO-8859-1)
31+
query ?
32+
SELECT encode('naïve'::string, 'iso-8859-1'::string);
33+
----
34+
6e61ef7665
35+
36+
# UTF-16BE encoding
37+
query ?
38+
SELECT encode('AB'::string, 'utf-16be'::string);
39+
----
40+
00410042
41+
42+
# UTF-32BE encoding
43+
query ?
44+
SELECT encode('A'::string, 'utf-32be'::string);
45+
----
46+
00000041
47+
48+
# Case-insensitive charset
49+
query ?
50+
SELECT encode('hello'::string, 'Utf-8'::string);
51+
----
52+
68656c6c6f
53+
54+
# NULL input
55+
query ?
56+
SELECT encode(NULL::string, 'utf-8'::string);
57+
----
58+
NULL
59+
60+
# Array input with NULLs
61+
query ?
62+
SELECT encode(s, 'utf-8'::string) FROM (VALUES ('hello'::string), (NULL::string), ('world'::string)) AS t(s);
63+
----
64+
68656c6c6f
65+
NULL
66+
776f726c64
67+
68+
# Error: unsupported charset
69+
statement error Unsupported charset for encode
70+
SELECT encode('hello'::string, 'EBCDIC'::string);
71+
72+
# Error: no arguments
73+
statement error 'encode' does not support zero arguments
74+
SELECT encode();

0 commit comments

Comments
 (0)