@@ -332,8 +332,8 @@ def _repr_html_(self) -> str:
332332
333333 @staticmethod
334334 def default_str_repr (
335- batches : list [pa . RecordBatch ],
336- schema : pa .Schema ,
335+ batches : list [RecordBatch ],
336+ schema : " pa.Schema" ,
337337 has_more : bool ,
338338 table_uuid : str | None = None ,
339339 ) -> str :
@@ -342,7 +342,13 @@ def default_str_repr(
342342 This method is used by the default formatter and implemented in Rust for
343343 performance reasons.
344344 """
345- return DataFrameInternal .default_str_repr (batches , schema , has_more , table_uuid )
345+ import pyarrow as pa
346+
347+ py_batches = [b .to_pyarrow () for b in batches ]
348+ schema = pa .schema (schema )
349+ return DataFrameInternal .default_str_repr (
350+ py_batches , schema , has_more , table_uuid
351+ )
346352
347353 def describe (self ) -> DataFrame :
348354 """Return the statistics for this DataFrame.
@@ -589,17 +595,17 @@ def tail(self, n: int = 5) -> DataFrame:
589595 """
590596 return DataFrame (self .df .limit (n , max (0 , self .count () - n )))
591597
592- def collect (self ) -> list [pa . RecordBatch ]:
598+ def collect (self ) -> list [RecordBatch ]:
593599 """Execute this :py:class:`DataFrame` and collect results into memory.
594600
595- Prior to calling ``collect``, modifying a DataFrme simply updates a plan
601+ Prior to calling ``collect``, modifying a DataFrame simply updates a plan
596602 (no actual computation is performed). Calling ``collect`` triggers the
597603 computation.
598604
599605 Returns:
600- List of :py:class:`pyarrow. RecordBatch` collected from the DataFrame.
606+ List of :py:class:`RecordBatch` collected from the DataFrame.
601607 """
602- return self .df .collect ()
608+ return [ RecordBatch ( rb ) for rb in self .df .collect ()]
603609
604610 def cache (self ) -> DataFrame :
605611 """Cache the DataFrame as a memory table.
@@ -609,17 +615,19 @@ def cache(self) -> DataFrame:
609615 """
610616 return DataFrame (self .df .cache ())
611617
612- def collect_partitioned (self ) -> list [list [pa . RecordBatch ]]:
618+ def collect_partitioned (self ) -> list [list [RecordBatch ]]:
613619 """Execute this DataFrame and collect all partitioned results.
614620
615- This operation returns :py:class:`pyarrow. RecordBatch` maintaining the input
621+ This operation returns :py:class:`RecordBatch` maintaining the input
616622 partitioning.
617623
618624 Returns:
619625 List of list of :py:class:`RecordBatch` collected from the
620626 DataFrame.
621627 """
622- return self .df .collect_partitioned ()
628+ return [
629+ [RecordBatch (rb ) for rb in rbs ] for rbs in self .df .collect_partitioned ()
630+ ]
623631
624632 def show (self , num : int = 20 ) -> None :
625633 """Execute the DataFrame and print the result to the console.
@@ -1047,13 +1055,15 @@ def execute_stream_partitioned(self) -> list[RecordBatchStream]:
10471055 streams = self .df .execute_stream_partitioned ()
10481056 return [RecordBatchStream (rbs ) for rbs in streams ]
10491057
1050- def to_pandas (self ) -> pd .DataFrame :
1051- """Execute the :py:class:`DataFrame` and convert it into a Pandas DataFrame.
1058+ def to_pandas (self ) -> " pd.DataFrame" :
1059+ """Execute the :py:class:`DataFrame` and convert it into a Pandas DataFrame."""
10521060
1053- Returns:
1054- Pandas DataFrame.
1055- """
1056- return self .df .to_pandas ()
1061+ import pandas as pd
1062+ import pyarrow as pa
1063+
1064+ batches = [rb .to_pyarrow () for rb in self .collect ()]
1065+ table = pa .Table .from_batches (batches )
1066+ return table .to_pandas ()
10571067
10581068 def to_pylist (self ) -> list [dict [str , Any ]]:
10591069 """Execute the :py:class:`DataFrame` and convert it into a list of dictionaries.
@@ -1071,13 +1081,15 @@ def to_pydict(self) -> dict[str, list[Any]]:
10711081 """
10721082 return self .df .to_pydict ()
10731083
1074- def to_polars (self ) -> pl .DataFrame :
1075- """Execute the :py:class:`DataFrame` and convert it into a Polars DataFrame.
1084+ def to_polars (self ) -> " pl.DataFrame" :
1085+ """Execute the :py:class:`DataFrame` and convert it into a Polars DataFrame."""
10761086
1077- Returns:
1078- Polars DataFrame.
1079- """
1080- return self .df .to_polars ()
1087+ import polars as pl
1088+ import pyarrow as pa
1089+
1090+ batches = [rb .to_pyarrow () for rb in self .collect ()]
1091+ table = pa .Table .from_batches (batches )
1092+ return pl .from_arrow (table )
10811093
10821094 def count (self ) -> int :
10831095 """Return the total number of rows in this :py:class:`DataFrame`.
0 commit comments