@@ -25,8 +25,9 @@ use super::SendableRecordBatchStream;
2525use crate :: stream:: RecordBatchReceiverStream ;
2626use crate :: { ColumnStatistics , Statistics } ;
2727
28- use arrow:: array:: Array ;
28+ use arrow:: array:: { Array , StringViewArray } ;
2929use arrow:: datatypes:: Schema ;
30+ use arrow:: error:: ArrowError ;
3031use arrow:: record_batch:: RecordBatch ;
3132use datafusion_common:: stats:: Precision ;
3233use datafusion_common:: { Result , plan_err} ;
@@ -190,7 +191,7 @@ pub fn can_project(
190191 . max ( )
191192 . is_some_and ( |& i| i >= schema. fields ( ) . len ( ) )
192193 {
193- Err ( arrow :: error :: ArrowError :: SchemaError ( format ! (
194+ Err ( ArrowError :: SchemaError ( format ! (
194195 "project index {} out of bounds, max field {}" ,
195196 columns. iter( ) . max( ) . unwrap( ) ,
196197 schema. fields( ) . len( )
@@ -204,6 +205,31 @@ pub fn can_project(
204205 }
205206}
206207
208+ /// Return a new `RecordBatch` with [`StringViewArray::gc`] called on such columns (if any).
209+ pub ( crate ) fn gc_stringview_arrays (
210+ batch : RecordBatch ,
211+ ) -> Result < RecordBatch , ArrowError > {
212+ let mut new_columns: Vec < Arc < dyn Array > > = Vec :: with_capacity ( batch. num_columns ( ) ) ;
213+
214+ let mut arr_mutated = false ;
215+ for array in batch. columns ( ) {
216+ if let Some ( string_view_array) = array. as_any ( ) . downcast_ref :: < StringViewArray > ( )
217+ {
218+ let new_array = string_view_array. gc ( ) ;
219+ new_columns. push ( Arc :: new ( new_array) ) ;
220+ arr_mutated = true ;
221+ } else {
222+ new_columns. push ( Arc :: clone ( array) ) ;
223+ }
224+ }
225+
226+ if arr_mutated {
227+ RecordBatch :: try_new ( batch. schema ( ) , new_columns)
228+ } else {
229+ Ok ( batch)
230+ }
231+ }
232+
207233#[ cfg( test) ]
208234mod tests {
209235 use super :: * ;
0 commit comments