What happens?
DuckDB Python fails to read a valid empty Avro container (schema‑only, 0 records). Reading a 1‑record Avro file works. The same empty file appears to be readable in the DuckDB CLI, but fails in Python with Invalid Input Error: Cannot read file block count....
To Reproduce
#!/usr/bin/env python3
"""
Repro: DuckDB Python read_avro fails on empty Avro container.
Requires:
- fastavro
- duckdb python package installed
"""
import tempfile
from pathlib import Path
import duckdb
def main():
tmpdir = Path(tempfile.mkdtemp(prefix="duckdb_avro_empty_"))
try:
empty_avro = tmpdir / "empty.avro"
one_avro = tmpdir / "one.avro"
schema = {
"type": "record",
"name": "TestRecord",
"fields": [
{"name": "id", "type": "string"}
]
}
try:
import fastavro # type: ignore
except Exception as e:
raise RuntimeError("fastavro is required for this repro script") from e
# Non-empty file via fastavro
with open(one_avro, "wb") as out_f:
fastavro.writer(out_f, schema, [{"id": "1"}])
# Empty container via fastavro
with open(empty_avro, "wb") as out_f:
fastavro.writer(out_f, schema, [])
print(f"DuckDB version: {duckdb.__version__}")
con = duckdb.connect()
try:
try:
row = con.execute(
"SELECT extension_version, installed, installed_from, install_path "
"FROM duckdb_extensions() WHERE extension_name = 'avro'"
).fetchone()
if row:
ext_version, installed, installed_from, install_path = row
print(
"DuckDB avro extension: "
f"version={ext_version}, installed={installed}, "
f"from={installed_from}, path={install_path}"
)
else:
print("DuckDB avro extension: not listed")
except Exception as e:
print(f"Could not read DuckDB avro extension version: {e}")
print("Reading one.avro...")
con.execute("SELECT count(*) FROM read_avro(?)", [str(one_avro)]).fetchall()
print("OK: one.avro")
except Exception as e:
print("FAIL: one.avro:", e)
try:
print("Reading empty.avro...")
con.execute("SELECT count(*) FROM read_avro(?)", [str(empty_avro)]).fetchall()
print("OK: empty.avro")
except Exception as e:
print("FAIL: empty.avro:", e)
finally:
con.close()
finally:
print(f"Artifacts in: {tmpdir}")
if __name__ == "__main__":
main()
Output:
DuckDB version: 1.2.2
DuckDB avro extension: version=ed18629, installed=True, from=core, path=/Users/sagi/.duckdb/extensions/v1.2.2/osx_arm64/avro.duckdb_extension
Reading one.avro...
OK: one.avro
Reading empty.avro...
FAIL: empty.avro: Invalid Input Error: Cannot read file block count: Cannot read 1 bytes from memory bufferCannot read 16 bytes from memory bufferCannot read file block count: Cannot read 1 bytes from memory buffer
Artifacts in: /var/folders/gn/pzxmt7f93zx0q1c66vybfd740000gn/T/duckdb_avro_empty_vijp14hz
OS:
OS X 15.7.3
DuckDB Package Version:
1.2.2
Python Version:
3.9.6
Full Name:
Sagi Bashari
Affiliation:
MyHeritage
What is the latest build you tested with? If possible, we recommend testing with the latest nightly build.
I have tested with a stable release
Did you include all relevant data sets for reproducing the issue?
Yes
Did you include all code required to reproduce the issue?
Did you include all relevant configuration to reproduce the issue?
What happens?
DuckDB Python fails to read a valid empty Avro container (schema‑only, 0 records). Reading a 1‑record Avro file works. The same empty file appears to be readable in the DuckDB CLI, but fails in Python with Invalid Input Error: Cannot read file block count....
To Reproduce
Output:
OS:
OS X 15.7.3
DuckDB Package Version:
1.2.2
Python Version:
3.9.6
Full Name:
Sagi Bashari
Affiliation:
MyHeritage
What is the latest build you tested with? If possible, we recommend testing with the latest nightly build.
I have tested with a stable release
Did you include all relevant data sets for reproducing the issue?
Yes
Did you include all code required to reproduce the issue?
Did you include all relevant configuration to reproduce the issue?