@@ -24,12 +24,14 @@ use std::sync::Arc;
2424use std:: vec;
2525
2626use arrow:: array:: {
27- Array , ArrayRef , BinaryArray , Float64Array , Int32Array , LargeBinaryArray ,
28- LargeStringArray , StringArray , TimestampNanosecondArray , UnionArray ,
27+ Array , ArrayRef , BinaryArray , DictionaryArray , Float64Array , Int32Array ,
28+ LargeBinaryArray , LargeStringArray , StringArray , StructArray ,
29+ TimestampNanosecondArray , UInt32Array , UnionArray ,
2930} ;
3031use arrow:: buffer:: ScalarBuffer ;
3132use arrow:: datatypes:: {
32- DataType , Field , FieldRef , Schema , SchemaRef , TimeUnit , UnionFields ,
33+ DataType , Field , FieldRef , Fields , Schema , SchemaRef , TimeUnit , UInt32Type ,
34+ UnionFields ,
3335} ;
3436use arrow:: record_batch:: RecordBatch ;
3537use datafusion:: catalog:: {
@@ -174,6 +176,10 @@ impl TestContext {
174176 info ! ( "Registering table with union column" ) ;
175177 register_union_table ( test_ctx. session_ctx ( ) )
176178 }
179+ "dictionary_struct.slt" => {
180+ info ! ( "Registering table with dictionary-encoded struct column" ) ;
181+ register_dictionary_struct_table ( test_ctx. session_ctx ( ) ) ;
182+ }
177183 "async_udf.slt" => {
178184 info ! ( "Registering dummy async udf" ) ;
179185 register_async_abs_udf ( test_ctx. session_ctx ( ) )
@@ -584,6 +590,103 @@ fn register_union_table(ctx: &SessionContext) {
584590 ctx. register_batch ( "union_table" , batch) . unwrap ( ) ;
585591}
586592
593+ fn register_dictionary_struct_table ( ctx : & SessionContext ) {
594+ // Build deduplicated struct values: 3 unique structs
595+ let names =
596+ Arc :: new ( StringArray :: from ( vec ! [ "Alice" , "Bob" , "Carol" ] ) ) as ArrayRef ;
597+ let ids = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 ] ) ) as ArrayRef ;
598+
599+ let struct_fields: Fields = vec ! [
600+ Field :: new( "name" , DataType :: Utf8 , false ) ,
601+ Field :: new( "id" , DataType :: Int32 , false ) ,
602+ ]
603+ . into ( ) ;
604+
605+ let values_struct = Arc :: new (
606+ StructArray :: try_new ( struct_fields. clone ( ) , vec ! [ names, ids] , None ) . unwrap ( ) ,
607+ ) as ArrayRef ;
608+
609+ // Dictionary keys index into the 3-element struct array.
610+ // 5 rows with repeated references to test dictionary deduplication.
611+ let keys = UInt32Array :: from ( vec ! [ 0u32 , 1 , 2 , 0 , 1 ] ) ;
612+ let dict =
613+ DictionaryArray :: < UInt32Type > :: try_new ( keys, values_struct. clone ( ) ) . unwrap ( ) ;
614+
615+ // Also build a non-dictionary plain struct column for comparison.
616+ let plain_names = Arc :: new ( StringArray :: from ( vec ! [
617+ "Alice" , "Bob" , "Carol" , "Alice" , "Bob" ,
618+ ] ) ) as ArrayRef ;
619+ let plain_ids = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 1 , 2 ] ) ) as ArrayRef ;
620+ let plain_struct = StructArray :: try_new (
621+ struct_fields. clone ( ) ,
622+ vec ! [ plain_names, plain_ids] ,
623+ None ,
624+ )
625+ . unwrap ( ) ;
626+
627+ let dict_type = DataType :: Dictionary (
628+ Box :: new ( DataType :: UInt32 ) ,
629+ Box :: new ( DataType :: Struct ( struct_fields. clone ( ) ) ) ,
630+ ) ;
631+
632+ let schema = Schema :: new ( vec ! [
633+ Field :: new( "dict_struct" , dict_type, false ) ,
634+ Field :: new(
635+ "plain_struct" ,
636+ DataType :: Struct ( struct_fields. clone( ) ) ,
637+ false ,
638+ ) ,
639+ ] ) ;
640+
641+ let batch = RecordBatch :: try_new (
642+ Arc :: new ( schema) ,
643+ vec ! [ Arc :: new( dict) as ArrayRef , Arc :: new( plain_struct) as ArrayRef ] ,
644+ )
645+ . unwrap ( ) ;
646+
647+ ctx. register_batch ( "dict_struct_table" , batch) . unwrap ( ) ;
648+
649+ // Second table: dictionary-encoded struct with nullable entries
650+ let names_nullable =
651+ Arc :: new ( StringArray :: from ( vec ! [ "X" , "Y" ] ) ) as ArrayRef ;
652+ let ids_nullable = Arc :: new ( Int32Array :: from ( vec ! [ 10 , 20 ] ) ) as ArrayRef ;
653+ let struct_fields_nullable: Fields = vec ! [
654+ Field :: new( "name" , DataType :: Utf8 , false ) ,
655+ Field :: new( "id" , DataType :: Int32 , false ) ,
656+ ]
657+ . into ( ) ;
658+ let values_struct_nullable = Arc :: new (
659+ StructArray :: try_new (
660+ struct_fields_nullable. clone ( ) ,
661+ vec ! [ names_nullable, ids_nullable] ,
662+ None ,
663+ )
664+ . unwrap ( ) ,
665+ ) as ArrayRef ;
666+ let keys_nullable =
667+ UInt32Array :: from ( vec ! [ Some ( 0 ) , None , Some ( 1 ) , None ] ) ;
668+ let dict_nullable = DictionaryArray :: < UInt32Type > :: try_new (
669+ keys_nullable,
670+ values_struct_nullable,
671+ )
672+ . unwrap ( ) ;
673+
674+ let dict_type_nullable = DataType :: Dictionary (
675+ Box :: new ( DataType :: UInt32 ) ,
676+ Box :: new ( DataType :: Struct ( struct_fields_nullable) ) ,
677+ ) ;
678+
679+ let schema_nullable =
680+ Schema :: new ( vec ! [ Field :: new( "ds" , dict_type_nullable, true ) ] ) ;
681+ let batch_nullable = RecordBatch :: try_new (
682+ Arc :: new ( schema_nullable) ,
683+ vec ! [ Arc :: new( dict_nullable) as ArrayRef ] ,
684+ )
685+ . unwrap ( ) ;
686+ ctx. register_batch ( "dict_struct_nullable" , batch_nullable)
687+ . unwrap ( ) ;
688+ }
689+
587690fn register_async_abs_udf ( ctx : & SessionContext ) {
588691 #[ derive( Debug , PartialEq , Eq , Hash ) ]
589692 struct AsyncAbs {
0 commit comments