Skip to content

Commit a49a20f

Browse files
committed
test: Add LargeBinary, BinaryView, and emoji surrogate pair tests
1 parent d59196e commit a49a20f

1 file changed

Lines changed: 53 additions & 0 deletions

File tree

  • datafusion/spark/src/function/string

β€Ždatafusion/spark/src/function/string/encode.rsβ€Ž

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,59 @@ mod tests {
490490
assert_eq!(udf.name(), "encode");
491491
}
492492

493+
#[test]
494+
fn test_encode_large_binary_input() {
495+
let result = eval_encode_scalar(
496+
ScalarValue::LargeBinary(Some(b"Hello".to_vec())),
497+
"UTF-8",
498+
)
499+
.unwrap();
500+
assert_eq!(expect_binary_scalar(result), b"Hello");
501+
}
502+
503+
#[test]
504+
fn test_encode_binary_view_input() {
505+
let result =
506+
eval_encode_scalar(ScalarValue::BinaryView(Some(b"Hello".to_vec())), "UTF-8")
507+
.unwrap();
508+
assert_eq!(expect_binary_scalar(result), b"Hello");
509+
}
510+
511+
#[test]
512+
fn test_encode_emoji_utf8() {
513+
// U+1F600 (πŸ˜€) is 4 bytes in UTF-8: F0 9F 98 80
514+
let result =
515+
eval_encode_scalar(ScalarValue::Utf8(Some("πŸ˜€".into())), "UTF-8").unwrap();
516+
assert_eq!(expect_binary_scalar(result), vec![0xF0, 0x9F, 0x98, 0x80]);
517+
}
518+
519+
#[test]
520+
fn test_encode_emoji_utf16be() {
521+
// U+1F600 (πŸ˜€) is a surrogate pair in UTF-16: D83D DE00
522+
let result =
523+
eval_encode_scalar(ScalarValue::Utf8(Some("πŸ˜€".into())), "UTF-16BE").unwrap();
524+
assert_eq!(expect_binary_scalar(result), vec![0xD8, 0x3D, 0xDE, 0x00]);
525+
}
526+
527+
#[test]
528+
fn test_encode_emoji_utf16le() {
529+
// U+1F600 (πŸ˜€) surrogate pair in little-endian: 3DD8 00DE
530+
let result =
531+
eval_encode_scalar(ScalarValue::Utf8(Some("πŸ˜€".into())), "UTF-16LE").unwrap();
532+
assert_eq!(expect_binary_scalar(result), vec![0x3D, 0xD8, 0x00, 0xDE]);
533+
}
534+
535+
#[test]
536+
fn test_encode_emoji_utf16_with_bom() {
537+
// UTF-16 = BOM (FEFF) + UTF-16BE surrogate pair
538+
let result =
539+
eval_encode_scalar(ScalarValue::Utf8(Some("πŸ˜€".into())), "UTF-16").unwrap();
540+
assert_eq!(
541+
expect_binary_scalar(result),
542+
vec![0xFE, 0xFF, 0xD8, 0x3D, 0xDE, 0x00]
543+
);
544+
}
545+
493546
/// Simple hex encoding for test assertions.
494547
fn hex_encode(bytes: &[u8]) -> String {
495548
bytes

0 commit comments

Comments
Β (0)