Skip to content

Commit c1ad863

Browse files
authored
[Minor] Use buffer_unordered (#20462)
## Which issue does this PR close? - Closes #. ## Rationale for this change `buffer_unordered` should be slightly better here - as we sort by the paths anyway (perhaps we can reduce the default concurrency). Also remove some unnecessary allocations. ## What changes are included in this PR? ## Are these changes tested? ## Are there any user-facing changes?
1 parent f488a90 commit c1ad863

1 file changed

Lines changed: 4 additions & 6 deletions

File tree

datafusion/datasource-parquet/src/file_format.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ impl FileFormat for ParquetFormat {
391391
})
392392
.boxed() // Workaround https://github.com/rust-lang/rust/issues/64552
393393
// fetch schemas concurrently, if requested
394-
.buffered(state.config_options().execution.meta_fetch_concurrency)
394+
.buffer_unordered(state.config_options().execution.meta_fetch_concurrency)
395395
.try_collect()
396396
.await?;
397397

@@ -401,12 +401,10 @@ impl FileFormat for ParquetFormat {
401401
// is not deterministic. Thus, to ensure deterministic schema inference
402402
// sort the files first.
403403
// https://github.com/apache/datafusion/pull/6629
404-
schemas.sort_by(|(location1, _), (location2, _)| location1.cmp(location2));
404+
schemas
405+
.sort_unstable_by(|(location1, _), (location2, _)| location1.cmp(location2));
405406

406-
let schemas = schemas
407-
.into_iter()
408-
.map(|(_, schema)| schema)
409-
.collect::<Vec<_>>();
407+
let schemas = schemas.into_iter().map(|(_, schema)| schema);
410408

411409
let schema = if self.skip_metadata() {
412410
Schema::try_merge(clear_metadata(schemas))

0 commit comments

Comments
 (0)