Skip to content

Commit cc274da

Browse files
committed
Add regression test for Python UDF return value refcount leak.
1 parent 7acec08 commit cc274da

1 file changed

Lines changed: 38 additions & 0 deletions

File tree

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import sys
2+
import gc
3+
import platform
4+
import duckdb
5+
import pytest
6+
7+
8+
@pytest.mark.parametrize("rows, iters", [(1000, 20)])
9+
def test_python_scalar_udf_return_value_refcount_does_not_leak(rows, iters):
10+
if platform.python_implementation() != "CPython":
11+
pytest.skip("refcount-based test requires CPython")
12+
13+
payload = (b"processed_data_" + b"x" * 8192) # large-ish bytes to mimic the reported issue
14+
15+
def udf_bytes(_):
16+
return payload # Always return the exact same object so we can track its refcount.
17+
18+
# Baseline refcount (note: getrefcount adds a temporary ref)
19+
baseline = sys.getrefcount(payload)
20+
21+
con = duckdb.connect()
22+
con.create_function("udf_bytes", udf_bytes, ["BIGINT"], "VARCHAR")
23+
24+
for _ in range(iters):
25+
con.execute(f"SELECT udf_bytes(range) FROM range({rows})")
26+
res = con.fetchall()
27+
# Drop the result ASAP so we don't keep any refs alive in Python
28+
del res
29+
gc.collect()
30+
31+
# Re-check refcount. In the buggy version this grows by rows*iters (huge).
32+
after = sys.getrefcount(payload)
33+
34+
# Allow a tiny tolerance for transient references/caches.
35+
# In the presence of the leak, this will be thousands+ higher.
36+
assert after <= baseline + 10, (baseline, after)
37+
38+
con.close()

0 commit comments

Comments
 (0)