revert branch UNPICK

kosiew · kosiew · commit ab6a810ceb0e · 2025-08-29T21:40:27.000+08:00
diff --git a/docs/source/user-guide/dataframe/index.rst b/docs/source/user-guide/dataframe/index.rst
@@ -145,31 +145,10 @@ To materialize the results of your DataFrame operations:
     
     # Display results
     df.show()                         # Print tabular format to console
-
+    
     # Count rows
     count = df.count()
 
-PyArrow Streaming
------------------
-
-DataFusion DataFrames implement the ``__arrow_c_stream__`` protocol, enabling
-zero-copy streaming into libraries like `PyArrow <https://arrow.apache.org/>`_.
-Earlier versions eagerly converted the entire DataFrame when exporting to
-PyArrow, which could exhaust memory on large datasets. With streaming, batches
-are produced lazily so you can process arbitrarily large results without
-out-of-memory errors.
-
-.. code-block:: python
-
-    import pyarrow as pa
-
-    # Create a PyArrow RecordBatchReader without materializing all batches
-    reader = pa.RecordBatchReader._import_from_c(df.__arrow_c_stream__())
-    for batch in reader:
-        ...  # process each batch as it is produced
-
-See :doc:`../io/arrow` for additional details on the Arrow interface.
-
 HTML Rendering
 --------------
 
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
@@ -1582,29 +1582,6 @@ def test_empty_to_arrow_table(df):
     assert set(pyarrow_table.column_names) == {"a", "b", "c"}
 
 
-def test_arrow_c_stream_to_table(monkeypatch):
-    ctx = SessionContext()
-
-    # Create a DataFrame with two separate record batches
-    batch1 = pa.record_batch([pa.array([1])], names=["a"])
-    batch2 = pa.record_batch([pa.array([2])], names=["a"])
-    df = ctx.create_dataframe([[batch1], [batch2]])
-
-    # Fail if the DataFrame is pre-collected
-    def fail_collect(self):  # pragma: no cover - failure path
-        msg = "collect should not be called"
-        raise AssertionError(msg)
-
-    monkeypatch.setattr(DataFrame, "collect", fail_collect)
-
-    table = pa.Table.from_batches(df)
-    expected = pa.Table.from_batches([batch1, batch2])
-
-    assert table.equals(expected)
-    assert table.schema == df.schema()
-    assert table.column("a").num_chunks == 2
-
-
 def test_to_pylist(df):
     # Convert datafusion dataframe to Python list
     pylist = df.to_pylist()
diff --git a/src/context.rs b/src/context.rs
@@ -45,7 +45,7 @@ use crate::udaf::PyAggregateUDF;
 use crate::udf::PyScalarUDF;
 use crate::udtf::PyTableFunction;
 use crate::udwf::PyWindowUDF;
-use crate::utils::{get_global_ctx, spawn_stream, validate_pycapsule, wait_for_future};
+use crate::utils::{get_global_ctx, get_tokio_runtime, validate_pycapsule, wait_for_future};
 use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion::arrow::pyarrow::PyArrowType;
 use datafusion::arrow::record_batch::RecordBatch;
@@ -74,6 +74,7 @@ use datafusion_ffi::catalog_provider::{FFI_CatalogProvider, ForeignCatalogProvid
 use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider};
 use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType};
 use pyo3::IntoPyObjectExt;
+use tokio::task::JoinHandle;
 
 /// Configuration options for a SessionContext
 #[pyclass(name = "SessionConfig", module = "datafusion", subclass)]
@@ -1131,8 +1132,12 @@ impl PySessionContext {
         py: Python,
     ) -> PyDataFusionResult<PyRecordBatchStream> {
         let ctx: TaskContext = TaskContext::from(&self.ctx.state());
+        // create a Tokio runtime to run the async code
+        let rt = &get_tokio_runtime().0;
         let plan = plan.plan.clone();
-        let stream = spawn_stream(py, async move { plan.execute(part, Arc::new(ctx)) })?;
+        let fut: JoinHandle<datafusion::common::Result<SendableRecordBatchStream>> =
+            rt.spawn(async move { plan.execute(part, Arc::new(ctx)) });
+        let stream = wait_for_future(py, async { fut.await.map_err(to_datafusion_err) })???;
         Ok(PyRecordBatchStream::new(stream))
     }
 }
diff --git a/src/dataframe.rs b/src/dataframe.rs
@@ -19,13 +19,13 @@ use std::collections::HashMap;
 use std::ffi::CString;
 use std::sync::Arc;
 
-use arrow::array::{new_null_array, RecordBatch, RecordBatchReader};
+use arrow::array::{new_null_array, RecordBatch, RecordBatchIterator, RecordBatchReader};
 use arrow::compute::can_cast_types;
 use arrow::error::ArrowError;
 use arrow::ffi::FFI_ArrowSchema;
 use arrow::ffi_stream::FFI_ArrowArrayStream;
 use arrow::pyarrow::FromPyArrow;
-use datafusion::arrow::datatypes::{Schema, SchemaRef};
+use datafusion::arrow::datatypes::Schema;
 use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow};
 use datafusion::arrow::util::pretty;
 use datafusion::common::UnnestOptions;
@@ -42,7 +42,7 @@ use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::pybacked::PyBackedStr;
 use pyo3::types::{PyCapsule, PyList, PyTuple, PyTupleMethods};
-use tokio::{runtime::Handle, task::JoinHandle};
+use tokio::task::JoinHandle;
 
 use crate::catalog::PyTable;
 use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionError};
@@ -51,8 +51,7 @@ use crate::physical_plan::PyExecutionPlan;
 use crate::record_batch::PyRecordBatchStream;
 use crate::sql::logical::PyLogicalPlan;
 use crate::utils::{
-    get_tokio_runtime, is_ipython_env, py_obj_to_scalar_value, spawn_stream, validate_pycapsule,
-    wait_for_future,
+    get_tokio_runtime, is_ipython_env, py_obj_to_scalar_value, validate_pycapsule, wait_for_future,
 };
 use crate::{
     errors::PyDataFusionResult,
@@ -354,47 +353,6 @@ impl PyDataFrame {
         Ok(html_str)
     }
 }
-/// Synchronous wrapper around a [`SendableRecordBatchStream`] used for
-/// the `__arrow_c_stream__` implementation.
-///
-/// It uses `runtime.block_on` to consume the underlying async stream,
-/// providing synchronous iteration. When a `projection` is set, each
-/// batch is converted via `record_batch_into_schema` to apply schema
-/// changes per batch.
-struct DataFrameStreamReader {
-    stream: SendableRecordBatchStream,
-    runtime: Handle,
-    schema: SchemaRef,
-    projection: Option<SchemaRef>,
-}
-
-impl Iterator for DataFrameStreamReader {
-    type Item = Result<RecordBatch, ArrowError>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.runtime.block_on(self.stream.next()) {
-            Some(Ok(batch)) => {
-                let batch = if let Some(ref schema) = self.projection {
-                    match record_batch_into_schema(batch, schema.as_ref()) {
-                        Ok(b) => b,
-                        Err(e) => return Some(Err(e)),
-                    }
-                } else {
-                    batch
-                };
-                Some(Ok(batch))
-            }
-            Some(Err(e)) => Some(Err(ArrowError::ExternalError(Box::new(e)))),
-            None => None,
-        }
-    }
-}
-
-impl RecordBatchReader for DataFrameStreamReader {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
 
 #[pymethods]
 impl PyDataFrame {
@@ -921,12 +879,8 @@ impl PyDataFrame {
         py: Python<'py>,
         requested_schema: Option<Bound<'py, PyCapsule>>,
     ) -> PyDataFusionResult<Bound<'py, PyCapsule>> {
-        let rt = &get_tokio_runtime().0;
-        let df = self.df.as_ref().clone();
-        let stream = spawn_stream(py, async move { df.execute_stream().await })?;
-
+        let mut batches = wait_for_future(py, self.df.as_ref().clone().collect())??;
         let mut schema: Schema = self.df.schema().to_owned().into();
-        let mut projection: Option<SchemaRef> = None;
 
         if let Some(schema_capsule) = requested_schema {
             validate_pycapsule(&schema_capsule, "arrow_schema")?;
@@ -935,17 +889,16 @@ impl PyDataFrame {
             let desired_schema = Schema::try_from(schema_ptr)?;
 
             schema = project_schema(schema, desired_schema)?;
-            projection = Some(Arc::new(schema.clone()));
+
+            batches = batches
+                .into_iter()
+                .map(|record_batch| record_batch_into_schema(record_batch, &schema))
+                .collect::<Result<Vec<RecordBatch>, ArrowError>>()?;
         }
 
-        let schema_ref = projection.clone().unwrap_or_else(|| Arc::new(schema));
+        let batches_wrapped = batches.into_iter().map(Ok);
 
-        let reader = DataFrameStreamReader {
-            stream,
-            runtime: rt.handle().clone(),
-            schema: schema_ref,
-            projection,
-        };
+        let reader = RecordBatchIterator::new(batches_wrapped, Arc::new(schema));
         let reader: Box<dyn RecordBatchReader + Send> = Box::new(reader);
 
         let ffi_stream = FFI_ArrowArrayStream::new(reader);
@@ -954,8 +907,12 @@ impl PyDataFrame {
     }
 
     fn execute_stream(&self, py: Python) -> PyDataFusionResult<PyRecordBatchStream> {
+        // create a Tokio runtime to run the async code
+        let rt = &get_tokio_runtime().0;
         let df = self.df.as_ref().clone();
-        let stream = spawn_stream(py, async move { df.execute_stream().await })?;
+        let fut: JoinHandle<datafusion::common::Result<SendableRecordBatchStream>> =
+            rt.spawn(async move { df.execute_stream().await });
+        let stream = wait_for_future(py, async { fut.await.map_err(to_datafusion_err) })???;
         Ok(PyRecordBatchStream::new(stream))
     }
 
diff --git a/src/utils.rs b/src/utils.rs
@@ -17,17 +17,16 @@
 
 use crate::{
     common::data_type::PyScalarValue,
-    errors::{to_datafusion_err, PyDataFusionError, PyDataFusionResult},
+    errors::{PyDataFusionError, PyDataFusionResult},
     TokioRuntime,
 };
 use datafusion::{
-    common::ScalarValue, execution::context::SessionContext, execution::SendableRecordBatchStream,
-    logical_expr::Volatility,
+    common::ScalarValue, execution::context::SessionContext, logical_expr::Volatility,
 };
 use pyo3::prelude::*;
 use pyo3::{exceptions::PyValueError, types::PyCapsule};
 use std::{future::Future, sync::OnceLock, time::Duration};
-use tokio::{runtime::Runtime, task::JoinHandle, time::sleep};
+use tokio::{runtime::Runtime, time::sleep};
 /// Utility to get the Tokio Runtime from Python
 #[inline]
 pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime {
@@ -85,17 +84,6 @@ where
     })
 }
 
-/// Spawn a [`SendableRecordBatchStream`] on the Tokio runtime and wait for completion
-/// while respecting Python signal handling.
-pub(crate) fn spawn_stream<F>(py: Python, fut: F) -> PyDataFusionResult<SendableRecordBatchStream>
-where
-    F: Future<Output = datafusion::common::Result<SendableRecordBatchStream>> + Send + 'static,
-{
-    let rt = &get_tokio_runtime().0;
-    let handle: JoinHandle<datafusion::common::Result<SendableRecordBatchStream>> = rt.spawn(fut);
-    wait_for_future(py, async { handle.await.map_err(to_datafusion_err) })???
-}
-
 pub(crate) fn parse_volatility(value: &str) -> PyDataFusionResult<Volatility> {
     Ok(match value {
         "immutable" => Volatility::Immutable,