@@ -496,6 +496,81 @@ duckdb::pyarrow::RecordBatchReader DuckDBPyResult::FetchRecordBatchReader(idx_t
496496 return py::cast<duckdb::pyarrow::RecordBatchReader>(record_batch_reader);
497497}
498498
499+ // Wraps pre-built Arrow arrays from an ArrowQueryResult into an ArrowArrayStream.
500+ // This avoids the double-materialization that happens when using ResultArrowArrayStreamWrapper
501+ // with an ArrowQueryResult (which throws NotImplementedException from FetchInternal).
502+ struct ArrowQueryResultStreamWrapper {
503+ ArrowQueryResultStreamWrapper (unique_ptr<QueryResult> result_p) : result(std::move(result_p)), index(0 ) {
504+ auto &arrow_result = result->Cast <ArrowQueryResult>();
505+ arrays = arrow_result.ConsumeArrays ();
506+ types = result->types ;
507+ names = result->names ;
508+ client_properties = result->client_properties ;
509+
510+ stream.private_data = this ;
511+ stream.get_schema = GetSchema;
512+ stream.get_next = GetNext;
513+ stream.release = Release;
514+ stream.get_last_error = GetLastError;
515+ }
516+
517+ static int GetSchema (ArrowArrayStream *stream, ArrowSchema *out) {
518+ if (!stream->release ) {
519+ return -1 ;
520+ }
521+ auto self = reinterpret_cast <ArrowQueryResultStreamWrapper *>(stream->private_data );
522+ out->release = nullptr ;
523+ try {
524+ ArrowConverter::ToArrowSchema (out, self->types , self->names , self->client_properties );
525+ } catch (std::runtime_error &e) {
526+ self->last_error = e.what ();
527+ return -1 ;
528+ }
529+ return 0 ;
530+ }
531+
532+ static int GetNext (ArrowArrayStream *stream, ArrowArray *out) {
533+ if (!stream->release ) {
534+ return -1 ;
535+ }
536+ auto self = reinterpret_cast <ArrowQueryResultStreamWrapper *>(stream->private_data );
537+ if (self->index >= self->arrays .size ()) {
538+ out->release = nullptr ;
539+ return 0 ;
540+ }
541+ *out = self->arrays [self->index ]->arrow_array ;
542+ self->arrays [self->index ]->arrow_array .release = nullptr ;
543+ self->index ++;
544+ return 0 ;
545+ }
546+
547+ static void Release (ArrowArrayStream *stream) {
548+ if (!stream || !stream->release ) {
549+ return ;
550+ }
551+ stream->release = nullptr ;
552+ delete reinterpret_cast <ArrowQueryResultStreamWrapper *>(stream->private_data );
553+ }
554+
555+ static const char *GetLastError (ArrowArrayStream *stream) {
556+ if (!stream->release ) {
557+ return " stream was released" ;
558+ }
559+ auto self = reinterpret_cast <ArrowQueryResultStreamWrapper *>(stream->private_data );
560+ return self->last_error .c_str ();
561+ }
562+
563+ ArrowArrayStream stream;
564+ unique_ptr<QueryResult> result;
565+ vector<unique_ptr<ArrowArrayWrapper>> arrays;
566+ vector<LogicalType> types;
567+ vector<string> names;
568+ ClientProperties client_properties;
569+ idx_t index;
570+ string last_error;
571+ };
572+
573+ // Destructor for capsules that own a heap-allocated ArrowArrayStream (slow path).
499574static void ArrowArrayStreamPyCapsuleDestructor (PyObject *object) {
500575 auto data = PyCapsule_GetPointer (object, " arrow_array_stream" );
501576 if (!data) {
@@ -508,7 +583,28 @@ static void ArrowArrayStreamPyCapsuleDestructor(PyObject *object) {
508583 delete stream;
509584}
510585
586+ // Destructor for capsules pointing at an embedded ArrowArrayStream (fast path).
587+ // The stream is owned by an ArrowQueryResultStreamWrapper; Release() frees both.
588+ static void ArrowArrayStreamEmbeddedPyCapsuleDestructor (PyObject *object) {
589+ auto data = PyCapsule_GetPointer (object, " arrow_array_stream" );
590+ if (!data) {
591+ return ;
592+ }
593+ auto stream = reinterpret_cast <ArrowArrayStream *>(data);
594+ if (stream->release ) {
595+ stream->release (stream);
596+ }
597+ }
598+
511599py::object DuckDBPyResult::FetchArrowCapsule (idx_t rows_per_batch) {
600+ if (result && result->type == QueryResultType::ARROW_RESULT) {
601+ // Fast path: yield pre-built Arrow arrays directly.
602+ // The wrapper is heap-allocated; Release() deletes it via private_data.
603+ // The capsule points at the embedded stream field — no separate heap allocation needed.
604+ auto wrapper = new ArrowQueryResultStreamWrapper (std::move (result));
605+ return py::capsule (&wrapper->stream , " arrow_array_stream" , ArrowArrayStreamEmbeddedPyCapsuleDestructor);
606+ }
607+ // Existing slow path for MaterializedQueryResult / StreamQueryResult
512608 auto stream_p = FetchArrowArrayStream (rows_per_batch);
513609 auto stream = new ArrowArrayStream ();
514610 *stream = stream_p;
0 commit comments