kosiew
diff --git a/‎Cargo.lock‎
Lines changed: 0 additions & 11 deletions b/‎Cargo.lock‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 8 additions & 33 deletions b/‎Cargo.toml‎
Lines changed: 8 additions & 33 deletions
diff --git a/‎docs/source/user-guide/dataframe/index.rst‎
Lines changed: 1 addition & 81 deletions b/‎docs/source/user-guide/dataframe/index.rst‎
Lines changed: 1 addition & 81 deletions
diff --git a/‎python/datafusion/dataframe.py‎
Lines changed: 8 additions & 37 deletions b/‎python/datafusion/dataframe.py‎
Lines changed: 8 additions & 37 deletions
diff --git a/‎python/datafusion/record_batch.py‎
Lines changed: 4 additions & 24 deletions b/‎python/datafusion/record_batch.py‎
Lines changed: 4 additions & 24 deletions
diff --git a/‎python/tests/conftest.py‎
Lines changed: 1 addition & 10 deletions b/‎python/tests/conftest.py‎
Lines changed: 1 addition & 10 deletions
@@ -26,34 +26,17 @@ readme = "README.md"
 license = "Apache-2.0"
 edition = "2021"
 rust-version = "1.78"
-include = [
-    "/src",
-    "/datafusion",
-    "/LICENSE.txt",
-    "build.rs",
-    "pyproject.toml",
-    "Cargo.toml",
-    "Cargo.lock",
-]
+include = ["/src", "/datafusion", "/LICENSE.txt", "build.rs", "pyproject.toml", "Cargo.toml", "Cargo.lock"]
 
 [features]
 default = ["mimalloc"]
-protoc = ["datafusion-substrait/protoc"]
+protoc = [ "datafusion-substrait/protoc" ]
 substrait = ["dep:datafusion-substrait"]
 
 [dependencies]
-tokio = { version = "1.45", features = [
-    "macros",
-    "rt",
-    "rt-multi-thread",
-    "sync",
-] }
-pyo3 = { version = "0.24", features = [
-    "extension-module",
-    "abi3",
-    "abi3-py39",
-] }
-pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"] }
+tokio = { version = "1.45", features = ["macros", "rt", "rt-multi-thread", "sync"] }
+pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] }
+pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"]}
 pyo3-log = "0.12.4"
 arrow = { version = "55.1.0", features = ["pyarrow"] }
 datafusion = { version = "49.0.2", features = ["avro", "unicode_expressions"] }
@@ -62,23 +45,15 @@ datafusion-proto = { version = "49.0.2" }
 datafusion-ffi = { version = "49.0.2" }
 prost = "0.13.1" # keep in line with `datafusion-substrait`
 uuid = { version = "1.18", features = ["v4"] }
-mimalloc = { version = "0.1", optional = true, default-features = false, features = [
-    "local_dynamic_tls",
-] }
+mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] }
 async-trait = "0.1.89"
 futures = "0.3"
-cstr = "0.2"
-object_store = { version = "0.12.3", features = [
-    "aws",
-    "gcp",
-    "azure",
-    "http",
-] }
+object_store = { version = "0.12.3", features = ["aws", "gcp", "azure", "http"] }
 url = "2"
 log = "0.4.27"
 
 [build-dependencies]
-prost-types = "0.13.1"     # keep in line with `datafusion-substrait`
+prost-types = "0.13.1" # keep in line with `datafusion-substrait`
 pyo3-build-config = "0.24"
 
 [lib]
 
@@ -145,90 +145,10 @@ To materialize the results of your DataFrame operations:
     
     # Display results
     df.show()                         # Print tabular format to console
-
+    
     # Count rows
     count = df.count()
 
-PyArrow Streaming
------------------
-
-DataFusion DataFrames implement the ``__arrow_c_stream__`` protocol, enabling
-zero-copy streaming into libraries like `PyArrow <https://arrow.apache.org/>`_.
-Earlier versions eagerly converted the entire DataFrame when exporting to
-PyArrow, which could exhaust memory on large datasets. With streaming, batches
-are produced lazily so you can process arbitrarily large results without
-out-of-memory errors.
-
-.. code-block:: python
-
-    import pyarrow as pa
-
-    # Create a PyArrow RecordBatchReader without materializing all batches
-    reader = pa.RecordBatchReader.from_stream(df)
-    for batch in reader:
-        ...  # process each batch as it is produced
-
-DataFrames are also iterable, yielding :class:`datafusion.RecordBatch`
-objects lazily so you can loop over results directly without importing
-PyArrow:
-
-.. code-block:: python
-
-    for batch in df:
-        ...  # each batch is a ``datafusion.RecordBatch``
-
-Each batch exposes ``to_pyarrow()``, allowing conversion to a PyArrow
-table without collecting everything eagerly:
-
-.. code-block:: python
-
-    import pyarrow as pa
-    table = pa.Table.from_batches(b.to_pyarrow() for b in df)
-
-Asynchronous iteration is supported as well, allowing integration with
-``asyncio`` event loops:
-
-.. code-block:: python
-
-    async for batch in df:
-        ...  # process each batch as it is produced
-
-To work with the stream directly, use
-``execute_stream()``, which returns a
-:class:`~datafusion.RecordBatchStream`:
-
-.. code-block:: python
-
-    stream = df.execute_stream()
-    for batch in stream:
-        ...
-
-Execute as Stream
-^^^^^^^^^^^^^^^^^
-
-For finer control over streaming execution, use
-:py:meth:`~datafusion.DataFrame.execute_stream` to obtain a
-:py:class:`pyarrow.RecordBatchReader`:
-
-.. code-block:: python
-
-    reader = df.execute_stream()
-    for batch in reader:
-        ...  # process each batch as it is produced
-
-When partition boundaries are important,
-:py:meth:`~datafusion.DataFrame.execute_stream_partitioned`
-returns an iterable of :py:class:`pyarrow.RecordBatchReader` objects, one per
-partition:
-
-.. code-block:: python
-
-    for stream in df.execute_stream_partitioned():
-        for batch in stream:
-            ...  # each stream yields RecordBatches
-
-See :doc:`../io/arrow` for additional details on the Arrow interface.
-
 HTML Rendering
 --------------
 
 
@@ -25,9 +25,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    AsyncIterator,
     Iterable,
-    Iterator,
     Literal,
     Optional,
     Union,
@@ -44,7 +42,7 @@
 from datafusion._internal import ParquetWriterOptions as ParquetWriterOptionsInternal
 from datafusion.expr import Expr, SortExpr, sort_or_default
 from datafusion.plan import ExecutionPlan, LogicalPlan
-from datafusion.record_batch import RecordBatch, RecordBatchStream
+from datafusion.record_batch import RecordBatchStream
 
 if TYPE_CHECKING:
     import pathlib
@@ -298,9 +296,6 @@ def __init__(
 class DataFrame:
     """Two dimensional table representation of data.
 
-    DataFrame objects are iterable; iterating over a DataFrame yields
-    :class:`datafusion.RecordBatch` instances lazily.
-
     See :ref:`user_guide_concepts` in the online documentation for more information.
     """
 
@@ -317,7 +312,7 @@ def into_view(self) -> pa.Table:
         return self.df.into_view()
 
     def __getitem__(self, key: str | list[str]) -> DataFrame:
-        """Return a new :py:class:`DataFrame` with the specified column or columns.
+        """Return a new :py:class`DataFrame` with the specified column or columns.
 
         Args:
             key: Column name or list of column names to select.
@@ -1047,18 +1042,6 @@ def execute_stream_partitioned(self) -> list[RecordBatchStream]:
         streams = self.df.execute_stream_partitioned()
         return [RecordBatchStream(rbs) for rbs in streams]
 
-    @deprecated("Use execute_stream() instead")
-    def to_record_batch_stream(self) -> RecordBatchStream:
-        """Return a :py:class:`RecordBatchStream` over this DataFrame's results.
-
-        This method is deprecated. Use :py:meth:`execute_stream` instead.
-
-        Returns:
-            A ``RecordBatchStream`` representing the lazily generated record
-            batches for this DataFrame.
-        """
-        return self.execute_stream()
-
     def to_pandas(self) -> pd.DataFrame:
         """Execute the :py:class:`DataFrame` and convert it into a Pandas DataFrame.
 
@@ -1122,33 +1105,21 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram
         return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls))
 
     def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
-        """Export the DataFrame as an Arrow C Stream.
+        """Export an Arrow PyCapsule Stream.
 
-        The DataFrame is executed using DataFusion's streaming APIs and exposed via
-        Arrow's C Stream interface. Record batches are produced incrementally, so the
-        full result set is never materialized in memory. When ``requested_schema`` is
-        provided, only straightforward projections such as column selection or
-        reordering are applied.
+        This will execute and collect the DataFrame. We will attempt to respect the
+        requested schema, but only trivial transformations will be applied such as only
+        returning the fields listed in the requested schema if their data types match
+        those in the DataFrame.
 
         Args:
             requested_schema: Attempt to provide the DataFrame using this schema.
 
         Returns:
-            Arrow PyCapsule object representing an ``ArrowArrayStream``.
+            Arrow PyCapsule object.
         """
-        # ``DataFrame.__arrow_c_stream__`` in the Rust extension leverages
-        # ``execute_stream_partitioned`` under the hood to stream batches while
-        # preserving the original partition order.
         return self.df.__arrow_c_stream__(requested_schema)
 
-    def __iter__(self) -> Iterator[RecordBatch]:
-        """Return an iterator over this DataFrame's record batches."""
-        return iter(self.execute_stream())
-
-    def __aiter__(self) -> AsyncIterator[RecordBatch]:
-        """Return an async iterator over this DataFrame's record batches."""
-        return self.execute_stream().__aiter__()
-
     def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame:
         """Apply a function to the current DataFrame which returns another DataFrame.
 
 
@@ -46,26 +46,6 @@ def to_pyarrow(self) -> pa.RecordBatch:
         """Convert to :py:class:`pa.RecordBatch`."""
         return self.record_batch.to_pyarrow()
 
-    def __arrow_c_array__(
-        self, requested_schema: object | None = None
-    ) -> tuple[object, object]:
-        """Export the record batch via the Arrow C Data Interface.
-
-        This allows zero-copy interchange with libraries that support the
-        `Arrow PyCapsule interface <https://arrow.apache.org/docs/format/
-        CDataInterface/PyCapsuleInterface.html>`_.
-
-        Args:
-            requested_schema: Attempt to provide the record batch using this
-                schema. Only straightforward projections such as column
-                selection or reordering are applied.
-
-        Returns:
-            Two Arrow PyCapsule objects representing the ``ArrowArray`` and
-            ``ArrowSchema``.
-        """
-        return self.record_batch.__arrow_c_array__(requested_schema)
-
 
 class RecordBatchStream:
     """This class represents a stream of record batches.
@@ -83,19 +63,19 @@ def next(self) -> RecordBatch:
         return next(self)
 
     async def __anext__(self) -> RecordBatch:
-        """Return the next :py:class:`RecordBatch` in the stream asynchronously."""
+        """Async iterator function."""
         next_batch = await self.rbs.__anext__()
         return RecordBatch(next_batch)
 
     def __next__(self) -> RecordBatch:
-        """Return the next :py:class:`RecordBatch` in the stream."""
+        """Iterator function."""
         next_batch = next(self.rbs)
         return RecordBatch(next_batch)
 
     def __aiter__(self) -> typing_extensions.Self:
-        """Return an asynchronous iterator over record batches."""
+        """Async iterator function."""
         return self
 
     def __iter__(self) -> typing_extensions.Self:
-        """Return an iterator over record batches."""
+        """Iterator function."""
         return self
@@ -17,7 +17,7 @@
 
 import pyarrow as pa
 import pytest
-from datafusion import DataFrame, SessionContext
+from datafusion import SessionContext
 from pyarrow.csv import write_csv
 
 
@@ -49,12 +49,3 @@ def database(ctx, tmp_path):
         delimiter=",",
         schema_infer_max_records=10,
     )
-
-
-@pytest.fixture
-def fail_collect(monkeypatch):
-    def _fail_collect(self, *args, **kwargs):  # pragma: no cover - failure path
-        msg = "collect should not be called"
-        raise AssertionError(msg)
-
-    monkeypatch.setattr(DataFrame, "collect", _fail_collect)