1717
1818//! Helper functions for the table implementation
1919
20+ use std:: borrow:: Cow ;
2021use std:: mem;
2122use std:: sync:: Arc ;
2223
@@ -354,7 +355,7 @@ fn try_into_partitioned_file(
354355 . flatten ( )
355356 . zip ( partition_cols)
356357 . map ( |( parsed, ( _, datatype) ) | {
357- ScalarValue :: try_from_string ( parsed. to_string ( ) , datatype)
358+ ScalarValue :: try_from_string ( parsed. into_owned ( ) , datatype)
358359 } )
359360 . collect :: < Result < Vec < _ > > > ( ) ?;
360361
@@ -427,7 +428,7 @@ pub fn parse_partitions_for_path<'a, I>(
427428 table_path : & ListingTableUrl ,
428429 file_path : & ' a Path ,
429430 table_partition_cols : I ,
430- ) -> Option < Vec < String > >
431+ ) -> Option < Vec < Cow < ' a , str > > >
431432where
432433 I : IntoIterator < Item = & ' a str > ,
433434{
@@ -437,8 +438,10 @@ where
437438 for ( part, expected_partition) in subpath. zip ( table_partition_cols) {
438439 match part. split_once ( '=' ) {
439440 Some ( ( name, val) ) if name == expected_partition => {
440- let decoded = percent_decode_str ( val) . decode_utf8 ( ) . ok ( ) ?;
441- part_values. push ( decoded. into_owned ( ) ) ;
441+ let decoded = percent_decode_str ( val)
442+ . decode_utf8 ( )
443+ . unwrap_or ( Cow :: Borrowed ( val) ) ;
444+ part_values. push ( decoded) ;
442445 }
443446 _ => {
444447 debug ! (
@@ -515,7 +518,7 @@ mod tests {
515518 #[ test]
516519 fn test_parse_partitions_for_path ( ) {
517520 assert_eq ! (
518- Some ( vec![ ] as Vec <String >) ,
521+ Some ( vec![ ] as Vec <Cow < ' _ , str > >) ,
519522 parse_partitions_for_path(
520523 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
521524 & Path :: from( "bucket/mytable/file.csv" ) ,
@@ -539,7 +542,7 @@ mod tests {
539542 )
540543 ) ;
541544 assert_eq ! (
542- Some ( vec![ "v1" . to_string ( ) ] ) ,
545+ Some ( vec![ Cow :: Borrowed ( "v1" ) ] ) ,
543546 parse_partitions_for_path(
544547 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
545548 & Path :: from( "bucket/mytable/mypartition=v1/file.csv" ) ,
@@ -549,15 +552,15 @@ mod tests {
549552 // URL-encoded partition values should be decoded
550553 // Use Path::parse to avoid double-encoding (Path::from encodes % as %25)
551554 assert_eq ! (
552- Some ( vec![ "v/1" . to_string( ) ] ) ,
555+ Some ( vec![ Cow :: < str > :: Owned ( "v/1" . to_string( ) ) ] ) ,
553556 parse_partitions_for_path(
554557 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
555558 & Path :: parse( "bucket/mytable/mypartition=v%2F1/file.csv" ) . unwrap( ) ,
556559 vec![ "mypartition" ]
557560 )
558561 ) ;
559562 assert_eq ! (
560- Some ( vec![ "v1" . to_string ( ) ] ) ,
563+ Some ( vec![ Cow :: Borrowed ( "v1" ) ] ) ,
561564 parse_partitions_for_path(
562565 & ListingTableUrl :: parse( "file:///bucket/mytable/" ) . unwrap( ) ,
563566 & Path :: from( "bucket/mytable/mypartition=v1/file.csv" ) ,
@@ -574,55 +577,59 @@ mod tests {
574577 )
575578 ) ;
576579 assert_eq ! (
577- Some ( vec![ "v1" . to_string ( ) , "v2" . to_string ( ) ] ) ,
580+ Some ( vec![ Cow :: Borrowed ( "v1" ) , Cow :: Borrowed ( "v2" ) ] ) ,
578581 parse_partitions_for_path(
579582 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
580583 & Path :: from( "bucket/mytable/mypartition=v1/otherpartition=v2/file.csv" ) ,
581584 vec![ "mypartition" , "otherpartition" ]
582585 )
583586 ) ;
584587 assert_eq ! (
585- Some ( vec![ "v1" . to_string ( ) ] ) ,
588+ Some ( vec![ Cow :: Borrowed ( "v1" ) ] ) ,
586589 parse_partitions_for_path(
587590 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
588591 & Path :: from( "bucket/mytable/mypartition=v1/otherpartition=v2/file.csv" ) ,
589592 vec![ "mypartition" ]
590593 )
591594 ) ;
592595 assert_eq ! (
593- Some ( vec![ "John Doe" . to_string( ) ] ) ,
596+ Some ( vec![ Cow :: < str > :: Owned ( "John Doe" . to_string( ) ) ] ) ,
594597 parse_partitions_for_path(
595598 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
596599 & Path :: parse( "bucket/mytable/name=John%20Doe/file.csv" ) . unwrap( ) ,
597600 vec![ "name" ]
598601 )
599602 ) ;
600603 assert_eq ! (
601- Some ( vec![ "a/b" . to_string( ) , "c d" . to_string( ) ] ) ,
604+ Some ( vec![
605+ Cow :: <str >:: Owned ( "a/b" . to_string( ) ) ,
606+ Cow :: <str >:: Owned ( "c d" . to_string( ) ) ,
607+ ] ) ,
602608 parse_partitions_for_path(
603609 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
604610 & Path :: parse( "bucket/mytable/p1=a%2Fb/p2=c%20d/file.csv" ) . unwrap( ) ,
605611 vec![ "p1" , "p2" ]
606612 )
607613 ) ;
608614 assert_eq ! (
609- Some ( vec![ "Müller" . to_string( ) ] ) ,
615+ Some ( vec![ Cow :: < str > :: Owned ( "Müller" . to_string( ) ) ] ) ,
610616 parse_partitions_for_path(
611617 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
612618 & Path :: parse( "bucket/mytable/name=M%C3%BCller/file.csv" ) . unwrap( ) ,
613619 vec![ "name" ]
614620 )
615621 ) ;
616622 assert_eq ! (
617- Some ( vec![ "invalid%XX" . to_string ( ) ] ) ,
623+ Some ( vec![ Cow :: Borrowed ( "invalid%XX" ) ] ) ,
618624 parse_partitions_for_path(
619625 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
620626 & Path :: parse( "bucket/mytable/p1=invalid%XX/file.csv" ) . unwrap( ) ,
621627 vec![ "p1" ]
622628 )
623629 ) ;
630+ // Invalid UTF-8 after percent-decoding falls back to raw value
624631 assert_eq ! (
625- None ,
632+ Some ( vec! [ Cow :: Borrowed ( "%FF" ) ] ) ,
626633 parse_partitions_for_path(
627634 & ListingTableUrl :: parse( "file:///bucket/mytable" ) . unwrap( ) ,
628635 & Path :: parse( "bucket/mytable/p1=%FF/file.csv" ) . unwrap( ) ,
0 commit comments