@@ -24,11 +24,14 @@ use std::sync::Arc;
2424use std:: vec;
2525
2626use arrow:: array:: {
27- Array , ArrayRef , BinaryArray , Float64Array , Int32Array , LargeBinaryArray ,
28- LargeStringArray , StringArray , TimestampNanosecondArray , UnionArray ,
27+ Array , ArrayRef , BinaryArray , DictionaryArray , Float64Array , Int32Array ,
28+ LargeBinaryArray , LargeStringArray , StringArray , StructArray ,
29+ TimestampNanosecondArray , UInt32Array , UnionArray ,
2930} ;
3031use arrow:: buffer:: ScalarBuffer ;
31- use arrow:: datatypes:: { DataType , Field , Schema , SchemaRef , TimeUnit , UnionFields } ;
32+ use arrow:: datatypes:: {
33+ DataType , Field , Fields , Schema , SchemaRef , TimeUnit , UInt32Type , UnionFields ,
34+ } ;
3235use arrow:: record_batch:: RecordBatch ;
3336use datafusion:: catalog:: {
3437 CatalogProvider , MemoryCatalogProvider , MemorySchemaProvider , SchemaProvider , Session ,
@@ -145,6 +148,10 @@ impl TestContext {
145148 info ! ( "Registering table with union column" ) ;
146149 register_union_table ( test_ctx. session_ctx ( ) )
147150 }
151+ "dictionary_struct.slt" => {
152+ info ! ( "Registering table with dictionary-encoded struct column" ) ;
153+ register_dictionary_struct_table ( test_ctx. session_ctx ( ) ) ;
154+ }
148155 "async_udf.slt" => {
149156 info ! ( "Registering dummy async udf" ) ;
150157 register_async_abs_udf ( test_ctx. session_ctx ( ) )
@@ -555,6 +562,103 @@ fn register_union_table(ctx: &SessionContext) {
555562 ctx. register_batch ( "union_table" , batch) . unwrap ( ) ;
556563}
557564
565+ fn register_dictionary_struct_table ( ctx : & SessionContext ) {
566+ // Build deduplicated struct values: 3 unique structs
567+ let names =
568+ Arc :: new ( StringArray :: from ( vec ! [ "Alice" , "Bob" , "Carol" ] ) ) as ArrayRef ;
569+ let ids = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 ] ) ) as ArrayRef ;
570+
571+ let struct_fields: Fields = vec ! [
572+ Field :: new( "name" , DataType :: Utf8 , false ) ,
573+ Field :: new( "id" , DataType :: Int32 , false ) ,
574+ ]
575+ . into ( ) ;
576+
577+ let values_struct = Arc :: new (
578+ StructArray :: try_new ( struct_fields. clone ( ) , vec ! [ names, ids] , None ) . unwrap ( ) ,
579+ ) as ArrayRef ;
580+
581+ // Dictionary keys index into the 3-element struct array.
582+ // 5 rows with repeated references to test dictionary deduplication.
583+ let keys = UInt32Array :: from ( vec ! [ 0u32 , 1 , 2 , 0 , 1 ] ) ;
584+ let dict =
585+ DictionaryArray :: < UInt32Type > :: try_new ( keys, values_struct. clone ( ) ) . unwrap ( ) ;
586+
587+ // Also build a non-dictionary plain struct column for comparison.
588+ let plain_names = Arc :: new ( StringArray :: from ( vec ! [
589+ "Alice" , "Bob" , "Carol" , "Alice" , "Bob" ,
590+ ] ) ) as ArrayRef ;
591+ let plain_ids = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 1 , 2 ] ) ) as ArrayRef ;
592+ let plain_struct = StructArray :: try_new (
593+ struct_fields. clone ( ) ,
594+ vec ! [ plain_names, plain_ids] ,
595+ None ,
596+ )
597+ . unwrap ( ) ;
598+
599+ let dict_type = DataType :: Dictionary (
600+ Box :: new ( DataType :: UInt32 ) ,
601+ Box :: new ( DataType :: Struct ( struct_fields. clone ( ) ) ) ,
602+ ) ;
603+
604+ let schema = Schema :: new ( vec ! [
605+ Field :: new( "dict_struct" , dict_type, false ) ,
606+ Field :: new(
607+ "plain_struct" ,
608+ DataType :: Struct ( struct_fields. clone( ) ) ,
609+ false ,
610+ ) ,
611+ ] ) ;
612+
613+ let batch = RecordBatch :: try_new (
614+ Arc :: new ( schema) ,
615+ vec ! [ Arc :: new( dict) as ArrayRef , Arc :: new( plain_struct) as ArrayRef ] ,
616+ )
617+ . unwrap ( ) ;
618+
619+ ctx. register_batch ( "dict_struct_table" , batch) . unwrap ( ) ;
620+
621+ // Second table: dictionary-encoded struct with nullable entries
622+ let names_nullable =
623+ Arc :: new ( StringArray :: from ( vec ! [ "X" , "Y" ] ) ) as ArrayRef ;
624+ let ids_nullable = Arc :: new ( Int32Array :: from ( vec ! [ 10 , 20 ] ) ) as ArrayRef ;
625+ let struct_fields_nullable: Fields = vec ! [
626+ Field :: new( "name" , DataType :: Utf8 , false ) ,
627+ Field :: new( "id" , DataType :: Int32 , false ) ,
628+ ]
629+ . into ( ) ;
630+ let values_struct_nullable = Arc :: new (
631+ StructArray :: try_new (
632+ struct_fields_nullable. clone ( ) ,
633+ vec ! [ names_nullable, ids_nullable] ,
634+ None ,
635+ )
636+ . unwrap ( ) ,
637+ ) as ArrayRef ;
638+ let keys_nullable =
639+ UInt32Array :: from ( vec ! [ Some ( 0 ) , None , Some ( 1 ) , None ] ) ;
640+ let dict_nullable = DictionaryArray :: < UInt32Type > :: try_new (
641+ keys_nullable,
642+ values_struct_nullable,
643+ )
644+ . unwrap ( ) ;
645+
646+ let dict_type_nullable = DataType :: Dictionary (
647+ Box :: new ( DataType :: UInt32 ) ,
648+ Box :: new ( DataType :: Struct ( struct_fields_nullable) ) ,
649+ ) ;
650+
651+ let schema_nullable =
652+ Schema :: new ( vec ! [ Field :: new( "ds" , dict_type_nullable, true ) ] ) ;
653+ let batch_nullable = RecordBatch :: try_new (
654+ Arc :: new ( schema_nullable) ,
655+ vec ! [ Arc :: new( dict_nullable) as ArrayRef ] ,
656+ )
657+ . unwrap ( ) ;
658+ ctx. register_batch ( "dict_struct_nullable" , batch_nullable)
659+ . unwrap ( ) ;
660+ }
661+
558662fn register_async_abs_udf ( ctx : & SessionContext ) {
559663 #[ derive( Debug , PartialEq , Eq , Hash ) ]
560664 struct AsyncAbs {
0 commit comments