Skip to content

Commit b5b9759

Browse files
committed
Refactor ArrowArrayStream handling in PyDataFrame to improve memory management and encapsulation
1 parent 1a3fe05 commit b5b9759

1 file changed

Lines changed: 7 additions & 21 deletions

File tree

src/dataframe.rs

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use std::collections::HashMap;
19-
use std::ffi::{c_void, CString};
19+
use std::ffi::CString;
2020
use std::sync::Arc;
2121

2222
use arrow::array::{new_null_array, RecordBatch, RecordBatchReader};
@@ -58,6 +58,8 @@ use crate::{
5858
expr::{sort_expr::PySortExpr, PyExpr},
5959
};
6060

61+
const ARROW_STREAM_NAME: &str = "arrow_array_stream";
62+
6163
// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
6264
// - we have not decided on the table_provider approach yet
6365
// this is an interim implementation
@@ -952,26 +954,10 @@ impl PyDataFrame {
952954
};
953955
let reader: Box<dyn RecordBatchReader + Send> = Box::new(reader);
954956

955-
let stream = Box::new(FFI_ArrowArrayStream::new(reader));
956-
let stream_ptr = Box::into_raw(stream);
957-
assert!(
958-
!stream_ptr.is_null(),
959-
"ArrowArrayStream pointer should never be null"
960-
);
961-
let stream_capsule_name = CString::new("arrow_array_stream").unwrap();
962-
unsafe {
963-
PyCapsule::new_bound_with_destructor(
964-
py,
965-
stream_ptr,
966-
Some(stream_capsule_name),
967-
|ptr: *mut FFI_ArrowArrayStream, _| {
968-
if !ptr.is_null() {
969-
unsafe { Box::from_raw(ptr) };
970-
}
971-
},
972-
)
973-
}
974-
.map_err(PyDataFusionError::from)
957+
let stream = FFI_ArrowArrayStream::new(reader);
958+
let name = CString::new(ARROW_STREAM_NAME).unwrap();
959+
let capsule = PyCapsule::new(py, stream, Some(name)).map_err(PyDataFusionError::from)?;
960+
Ok(capsule)
975961
}
976962

977963
fn execute_stream(&self, py: Python) -> PyDataFusionResult<PyRecordBatchStream> {

0 commit comments

Comments
 (0)