Skip to content

Commit 460d2d0

Browse files
committed
test: add test for converting Arrow C stream to DataFrame
1 parent c4e4ed2 commit 460d2d0

1 file changed

Lines changed: 23 additions & 0 deletions

File tree

python/tests/test_dataframe.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,6 +1582,29 @@ def test_empty_to_arrow_table(df):
15821582
assert set(pyarrow_table.column_names) == {"a", "b", "c"}
15831583

15841584

1585+
def test_arrow_c_stream_to_table(monkeypatch):
1586+
ctx = SessionContext()
1587+
1588+
# Create a DataFrame with two separate record batches
1589+
batch1 = pa.record_batch([pa.array([1])], names=["a"])
1590+
batch2 = pa.record_batch([pa.array([2])], names=["a"])
1591+
df = ctx.create_dataframe([[batch1], [batch2]])
1592+
1593+
# Fail if the DataFrame is pre-collected
1594+
def fail_collect(self): # pragma: no cover - failure path
1595+
msg = "collect should not be called"
1596+
raise AssertionError(msg)
1597+
1598+
monkeypatch.setattr(DataFrame, "collect", fail_collect)
1599+
1600+
table = pa.Table.from_batches(df)
1601+
expected = pa.Table.from_batches([batch1, batch2])
1602+
1603+
assert table.equals(expected)
1604+
assert table.schema == df.schema()
1605+
assert table.column("a").num_chunks == 2
1606+
1607+
15851608
def test_to_pylist(df):
15861609
# Convert datafusion dataframe to Python list
15871610
pylist = df.to_pylist()

0 commit comments

Comments
 (0)