@@ -117,6 +117,11 @@ pub struct DFSchema {
117117 field_qualifiers : Vec < Option < TableReference > > ,
118118 /// Stores functional dependencies in the schema.
119119 functional_dependencies : FunctionalDependencies ,
120+ /// Field names that are ambiguous in this schema because the underlying
121+ /// source (e.g. a derived-table subquery) contained multiple columns with
122+ /// the same unqualified name. Any attempt to reference these names without
123+ /// a qualifier should produce an [`SchemaError::AmbiguousReference`] error.
124+ ambiguous_names : HashSet < String > ,
120125}
121126
122127impl DFSchema {
@@ -126,6 +131,7 @@ impl DFSchema {
126131 inner : Arc :: new ( Schema :: new ( [ ] ) ) ,
127132 field_qualifiers : vec ! [ ] ,
128133 functional_dependencies : FunctionalDependencies :: empty ( ) ,
134+ ambiguous_names : HashSet :: new ( ) ,
129135 }
130136 }
131137
@@ -157,6 +163,7 @@ impl DFSchema {
157163 inner : schema,
158164 field_qualifiers : qualifiers,
159165 functional_dependencies : FunctionalDependencies :: empty ( ) ,
166+ ambiguous_names : HashSet :: new ( ) ,
160167 } ;
161168 dfschema. check_names ( ) ?;
162169 Ok ( dfschema)
@@ -173,6 +180,7 @@ impl DFSchema {
173180 inner : schema,
174181 field_qualifiers : vec ! [ None ; field_count] ,
175182 functional_dependencies : FunctionalDependencies :: empty ( ) ,
183+ ambiguous_names : HashSet :: new ( ) ,
176184 } ;
177185 dfschema. check_names ( ) ?;
178186 Ok ( dfschema)
@@ -191,6 +199,7 @@ impl DFSchema {
191199 inner : schema. clone ( ) . into ( ) ,
192200 field_qualifiers : vec ! [ Some ( qualifier) ; schema. fields. len( ) ] ,
193201 functional_dependencies : FunctionalDependencies :: empty ( ) ,
202+ ambiguous_names : HashSet :: new ( ) ,
194203 } ;
195204 schema. check_names ( ) ?;
196205 Ok ( schema)
@@ -205,6 +214,7 @@ impl DFSchema {
205214 inner : Arc :: clone ( schema) ,
206215 field_qualifiers : qualifiers,
207216 functional_dependencies : FunctionalDependencies :: empty ( ) ,
217+ ambiguous_names : HashSet :: new ( ) ,
208218 } ;
209219 dfschema. check_names ( ) ?;
210220 Ok ( dfschema)
@@ -226,6 +236,7 @@ impl DFSchema {
226236 inner : Arc :: clone ( & self . inner ) ,
227237 field_qualifiers : qualifiers,
228238 functional_dependencies : self . functional_dependencies . clone ( ) ,
239+ ambiguous_names : self . ambiguous_names . clone ( ) ,
229240 } )
230241 }
231242
@@ -275,6 +286,24 @@ impl DFSchema {
275286 }
276287 }
277288
289+ /// Marks the given field names as ambiguous.
290+ ///
291+ /// Ambiguous names correspond to fields that originated from multiple
292+ /// source columns with the same unqualified name (e.g. both sides of a
293+ /// JOIN having an `age` column). Any attempt to resolve such a name
294+ /// without a table qualifier will produce an
295+ /// [`SchemaError::AmbiguousReference`] error.
296+ pub fn with_ambiguous_names ( mut self , names : HashSet < String > ) -> Self {
297+ self . ambiguous_names = names;
298+ self
299+ }
300+
301+ /// Returns the set of field names that are considered ambiguous in this
302+ /// schema. See [`Self::with_ambiguous_names`].
303+ pub fn ambiguous_names ( & self ) -> & HashSet < String > {
304+ & self . ambiguous_names
305+ }
306+
278307 /// Create a new schema that contains the fields from this schema followed by the fields
279308 /// from the supplied schema. An error will be returned if there are duplicate field names.
280309 pub fn join ( & self , schema : & DFSchema ) -> Result < Self > {
@@ -294,6 +323,7 @@ impl DFSchema {
294323 inner : Arc :: new ( new_schema_with_metadata) ,
295324 field_qualifiers : new_qualifiers,
296325 functional_dependencies : FunctionalDependencies :: empty ( ) ,
326+ ambiguous_names : HashSet :: new ( ) ,
297327 } ;
298328 new_self. check_names ( ) ?;
299329 Ok ( new_self)
@@ -506,6 +536,14 @@ impl DFSchema {
506536 & self ,
507537 name : & str ,
508538 ) -> Result < ( Option < & TableReference > , & FieldRef ) > {
539+ // If this field name was marked as ambiguous at schema creation time
540+ // (e.g. because a derived-table subquery produced duplicate column
541+ // names), refuse to resolve it without an explicit qualifier.
542+ if self . ambiguous_names . contains ( name) {
543+ return _schema_err ! ( SchemaError :: AmbiguousReference {
544+ field: Box :: new( Column :: new_unqualified( name. to_string( ) ) )
545+ } ) ;
546+ }
509547 let matches = self . qualified_fields_with_unqualified_name ( name) ;
510548 match matches. len ( ) {
511549 0 => Err ( unqualified_field_not_found ( name, self ) ) ,
@@ -845,6 +883,7 @@ impl DFSchema {
845883 field_qualifiers : vec ! [ None ; self . inner. fields. len( ) ] ,
846884 inner : self . inner ,
847885 functional_dependencies : self . functional_dependencies ,
886+ ambiguous_names : self . ambiguous_names ,
848887 }
849888 }
850889
@@ -855,6 +894,7 @@ impl DFSchema {
855894 field_qualifiers : vec ! [ Some ( qualifier) ; self . inner. fields. len( ) ] ,
856895 inner : self . inner ,
857896 functional_dependencies : self . functional_dependencies ,
897+ ambiguous_names : self . ambiguous_names ,
858898 }
859899 }
860900
@@ -1126,6 +1166,7 @@ impl TryFrom<SchemaRef> for DFSchema {
11261166 inner : schema,
11271167 field_qualifiers : vec ! [ None ; field_count] ,
11281168 functional_dependencies : FunctionalDependencies :: empty ( ) ,
1169+ ambiguous_names : HashSet :: new ( ) ,
11291170 } ;
11301171 // Without checking names, because schema here may have duplicate field names.
11311172 // For example, Partial AggregateMode will generate duplicate field names from
@@ -1187,6 +1228,7 @@ impl ToDFSchema for Vec<Field> {
11871228 inner : schema. into ( ) ,
11881229 field_qualifiers : vec ! [ None ; field_count] ,
11891230 functional_dependencies : FunctionalDependencies :: empty ( ) ,
1231+ ambiguous_names : HashSet :: new ( ) ,
11901232 } ;
11911233 Ok ( dfschema)
11921234 }
@@ -1578,6 +1620,7 @@ mod tests {
15781620 inner : Arc :: clone ( & arrow_schema_ref) ,
15791621 field_qualifiers : vec ! [ None ; arrow_schema_ref. fields. len( ) ] ,
15801622 functional_dependencies : FunctionalDependencies :: empty ( ) ,
1623+ ambiguous_names : HashSet :: new ( ) ,
15811624 } ;
15821625 let df_schema_ref = Arc :: new ( df_schema. clone ( ) ) ;
15831626
@@ -1624,6 +1667,7 @@ mod tests {
16241667 inner : Arc :: clone ( & schema) ,
16251668 field_qualifiers : vec ! [ None ; schema. fields. len( ) ] ,
16261669 functional_dependencies : FunctionalDependencies :: empty ( ) ,
1670+ ambiguous_names : HashSet :: new ( ) ,
16271671 } ;
16281672
16291673 assert_eq ! ( df_schema. inner. metadata( ) , schema. metadata( ) )
0 commit comments