|
25 | 25 | from typing import ( |
26 | 26 | TYPE_CHECKING, |
27 | 27 | Any, |
| 28 | + AsyncIterator, |
28 | 29 | Iterable, |
29 | 30 | Iterator, |
30 | 31 | Literal, |
|
43 | 44 | from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal |
44 | 45 | from datafusion.expr import Expr, SortExpr, sort_or_default |
45 | 46 | from datafusion.plan import ExecutionPlan, LogicalPlan |
46 | | -from datafusion.record_batch import RecordBatchStream |
| 47 | +from datafusion.record_batch import ( |
| 48 | + RecordBatch, |
| 49 | + RecordBatchStream, |
| 50 | + to_record_batch_stream, |
| 51 | +) |
47 | 52 |
|
48 | 53 | if TYPE_CHECKING: |
49 | 54 | import pathlib |
@@ -1123,15 +1128,20 @@ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: |
1123 | 1128 | return self.df.__arrow_c_stream__(requested_schema) |
1124 | 1129 |
|
1125 | 1130 | def __iter__(self) -> Iterator[RecordBatch]: |
1126 | | - """Yield DataFusion record batches without materializing results. |
| 1131 | + """Yield record batches from the DataFrame without materializing results. |
| 1132 | +
|
| 1133 | + This implementation delegates to :func:`to_record_batch_stream`, which |
| 1134 | + executes the DataFrame and returns a :class:`RecordBatchStream`. |
| 1135 | + """ |
| 1136 | + return to_record_batch_stream(self).__iter__() |
| 1137 | + |
| 1138 | + def __aiter__(self) -> AsyncIterator[RecordBatch]: |
| 1139 | + """Asynchronously yield record batches from the DataFrame. |
1127 | 1140 |
|
1128 | | - Batches are produced lazily using DataFusion's partitioned streaming |
1129 | | - APIs so ``collect`` is never invoked. Each returned batch exposes the |
1130 | | - Arrow C data interface and can be consumed by downstream libraries that |
1131 | | - support ``__arrow_c_array__``. |
| 1141 | + This delegates to :func:`to_record_batch_stream` to obtain a |
| 1142 | + :class:`RecordBatchStream` and returns its asynchronous iterator. |
1132 | 1143 | """ |
1133 | | - for stream in self.execute_stream_partitioned(): |
1134 | | - yield from stream |
| 1144 | + return to_record_batch_stream(self).__aiter__() |
1135 | 1145 |
|
1136 | 1146 | def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame: |
1137 | 1147 | """Apply a function to the current DataFrame which returns another DataFrame. |
|
0 commit comments