@@ -490,6 +490,59 @@ mod tests {
490490 assert_eq ! ( udf. name( ) , "encode" ) ;
491491 }
492492
493+ #[ test]
494+ fn test_encode_large_binary_input ( ) {
495+ let result = eval_encode_scalar (
496+ ScalarValue :: LargeBinary ( Some ( b"Hello" . to_vec ( ) ) ) ,
497+ "UTF-8" ,
498+ )
499+ . unwrap ( ) ;
500+ assert_eq ! ( expect_binary_scalar( result) , b"Hello" ) ;
501+ }
502+
503+ #[ test]
504+ fn test_encode_binary_view_input ( ) {
505+ let result =
506+ eval_encode_scalar ( ScalarValue :: BinaryView ( Some ( b"Hello" . to_vec ( ) ) ) , "UTF-8" )
507+ . unwrap ( ) ;
508+ assert_eq ! ( expect_binary_scalar( result) , b"Hello" ) ;
509+ }
510+
511+ #[ test]
512+ fn test_encode_emoji_utf8 ( ) {
513+ // U+1F600 (π) is 4 bytes in UTF-8: F0 9F 98 80
514+ let result =
515+ eval_encode_scalar ( ScalarValue :: Utf8 ( Some ( "π" . into ( ) ) ) , "UTF-8" ) . unwrap ( ) ;
516+ assert_eq ! ( expect_binary_scalar( result) , vec![ 0xF0 , 0x9F , 0x98 , 0x80 ] ) ;
517+ }
518+
519+ #[ test]
520+ fn test_encode_emoji_utf16be ( ) {
521+ // U+1F600 (π) is a surrogate pair in UTF-16: D83D DE00
522+ let result =
523+ eval_encode_scalar ( ScalarValue :: Utf8 ( Some ( "π" . into ( ) ) ) , "UTF-16BE" ) . unwrap ( ) ;
524+ assert_eq ! ( expect_binary_scalar( result) , vec![ 0xD8 , 0x3D , 0xDE , 0x00 ] ) ;
525+ }
526+
527+ #[ test]
528+ fn test_encode_emoji_utf16le ( ) {
529+ // U+1F600 (π) surrogate pair in little-endian: 3DD8 00DE
530+ let result =
531+ eval_encode_scalar ( ScalarValue :: Utf8 ( Some ( "π" . into ( ) ) ) , "UTF-16LE" ) . unwrap ( ) ;
532+ assert_eq ! ( expect_binary_scalar( result) , vec![ 0x3D , 0xD8 , 0x00 , 0xDE ] ) ;
533+ }
534+
535+ #[ test]
536+ fn test_encode_emoji_utf16_with_bom ( ) {
537+ // UTF-16 = BOM (FEFF) + UTF-16BE surrogate pair
538+ let result =
539+ eval_encode_scalar ( ScalarValue :: Utf8 ( Some ( "π" . into ( ) ) ) , "UTF-16" ) . unwrap ( ) ;
540+ assert_eq ! (
541+ expect_binary_scalar( result) ,
542+ vec![ 0xFE , 0xFF , 0xD8 , 0x3D , 0xDE , 0x00 ]
543+ ) ;
544+ }
545+
493546 /// Simple hex encoding for test assertions.
494547 fn hex_encode ( bytes : & [ u8 ] ) -> String {
495548 bytes
0 commit comments