Skip to content

Commit 4315493

Browse files
fix: make WriteBuffer.write_padded emit a single bytearray.extend
Previously write_padded called bytearray.extend() twice — once for the payload, once for the NUL padding. Under concurrent access the interpreter could switch threads between the two extends, interleaving another thread's payload with the first thread's padding. Output words like b'BBBBBAAA' were observable within a few thousand iterations at a tight switch interval. Build the padded bytes locally and emit a single extend so no thread can slip between payload and padding. This does not make WriteBuffer thread-safe in any strong sense (the single-owner contract from issue 021 still applies), but it closes the torn payload/pad split that was visible even to callers who believed "bytearray.extend is atomic". Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9f412b6 commit 4315493

2 files changed

Lines changed: 57 additions & 3 deletions

File tree

src/dqlitewire/buffer.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,20 @@ def write(self, data: bytes) -> None:
1515
self._data.extend(data)
1616

1717
def write_padded(self, data: bytes) -> None:
18-
"""Append data with padding to word boundary."""
19-
self._data.extend(data)
18+
"""Append data with padding to word boundary.
19+
20+
The padded bytes are built locally and emitted via a single
21+
``bytearray.extend`` so that under accidental concurrent misuse
22+
(see issue 021 for the single-owner contract) two threads'
23+
payloads and padding cannot interleave. This still does not make
24+
``WriteBuffer`` thread-safe in any strong sense, but it removes
25+
the torn payload/pad split that used to be visible to callers.
26+
"""
2027
remainder = len(data) % WORD_SIZE
2128
if remainder:
22-
self._data.extend(b"\x00" * (WORD_SIZE - remainder))
29+
self._data.extend(data + b"\x00" * (WORD_SIZE - remainder))
30+
else:
31+
self._data.extend(data)
2332

2433
def getvalue(self) -> bytes:
2534
"""Get buffer contents."""

tests/test_buffer.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,51 @@ def test_clear(self) -> None:
4141
assert len(buf) == 0
4242
assert buf.getvalue() == b""
4343

44+
def test_write_padded_no_interleaved_tearing_under_contention(self) -> None:
45+
"""Regression for issue 034.
46+
47+
``write_padded`` used to do two separate ``bytearray.extend`` calls
48+
(one for the payload, one for the NUL padding). Under concurrent
49+
access a thread switch between the two calls could land another
50+
thread's payload bytes inside the first thread's padding window,
51+
producing a torn word like ``b'BBBBBAAA'`` instead of a clean
52+
``b'AAAAA\\x00\\x00\\x00'`` or ``b'BBBBB\\x00\\x00\\x00'``.
53+
54+
The fix is to build the padded bytes locally and emit a single
55+
extend, so no thread can interleave between the payload and its
56+
padding. This test drives the race at a tight switch interval and
57+
asserts every output word is one of the two expected shapes.
58+
"""
59+
import sys
60+
import threading
61+
62+
old_interval = sys.getswitchinterval()
63+
sys.setswitchinterval(0.0000001)
64+
try:
65+
buf = WriteBuffer()
66+
67+
def worker(payload: bytes) -> None:
68+
for _ in range(5000):
69+
buf.write_padded(payload)
70+
71+
t1 = threading.Thread(target=worker, args=(b"A" * 5,))
72+
t2 = threading.Thread(target=worker, args=(b"B" * 5,))
73+
t1.start()
74+
t2.start()
75+
t1.join()
76+
t2.join()
77+
78+
out = buf.getvalue()
79+
assert len(out) == 2 * 5000 * 8
80+
for i in range(0, len(out), 8):
81+
word = out[i : i + 8]
82+
assert word in (
83+
b"AAAAA\x00\x00\x00",
84+
b"BBBBB\x00\x00\x00",
85+
), f"torn word at offset {i}: {bytes(word)!r}"
86+
finally:
87+
sys.setswitchinterval(old_interval)
88+
4489

4590
class TestReadBuffer:
4691
def test_empty(self) -> None:

0 commit comments

Comments
 (0)