Skip to content

Commit c67b745

Browse files
fix: poison buffer on torn header size reads
The wire header size field is exactly 4 bytes (uint32 little-endian), so any ``size_words`` value > ``0xFFFFFFFF`` cannot come from a well-formed header. It can only come from a ``bytearray`` slice that observed torn ``ob_size``/``ob_start`` during a concurrent realloc on a free-threaded build — the exact failure mode that issue 033 acknowledged with its hex-formatted error message but left on the non-poisoning ``DecodeError`` path meant for legitimate oversized server messages. Distinguish the two in ``peek_header``, ``read_message``, and ``skip_message``: wire-legal oversized values keep their recoverable-via-``skip_message`` contract, while structurally- impossible torn values poison the buffer so the next caller fails fast rather than calling ``skip_message`` on a buffer whose offset is unknowable. ``has_message`` remains a total predicate — it reports torn headers as "something to consume" so the caller's loop proceeds to ``read_message``, which does the actual poison. The check is a dead branch on GIL builds (slicing 4 bytes is atomic under the GIL) and only fires under ``3.13t`` concurrent misuse. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 97c74e6 commit c67b745

2 files changed

Lines changed: 164 additions & 0 deletions

File tree

src/dqlitewire/buffer.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,13 @@ def has_message(self) -> bool:
161161

162162
# Read size from header (first 4 bytes = size in words)
163163
size_words = int.from_bytes(self._data[self._pos : self._pos + 4], "little")
164+
# Torn-read sanity (issue 051): if the slice was widened by
165+
# a concurrent realloc, report "something to consume" so the
166+
# caller's while-loop proceeds to read_message(), which then
167+
# poisons. has_message() itself is a total predicate and
168+
# must not raise.
169+
if size_words > 0xFFFFFFFF:
170+
return True
164171
total_size = HEADER_SIZE + (size_words * WORD_SIZE)
165172

166173
if total_size > self._max_message_size:
@@ -196,6 +203,23 @@ def peek_header(self) -> tuple[int, int, int] | None:
196203
return None
197204

198205
size_words = int.from_bytes(self._data[self._pos : self._pos + 4], "little")
206+
# Sanity check for torn reads (issue 051). The wire size
207+
# field is exactly 4 bytes (uint32 little-endian), so any
208+
# value > 0xFFFFFFFF cannot come from a well-formed header
209+
# — it can only come from a ``bytearray`` slice that
210+
# observed torn ``ob_size``/``ob_start`` during a concurrent
211+
# realloc on a free-threaded build, returning more than 4
212+
# bytes. Distinguish this from legitimate oversized messages
213+
# so the non-poisoning ``DecodeError`` recovery contract
214+
# still applies to real wire-oversized messages while torn
215+
# reads poison the buffer.
216+
if size_words > 0xFFFFFFFF:
217+
err = DecodeError(
218+
f"torn header read: size_words={size_words:#x} (>32 bits, "
219+
"indicates concurrent misuse on a free-threaded build)"
220+
)
221+
self.poison(err)
222+
raise err
199223
total_size = HEADER_SIZE + (size_words * WORD_SIZE)
200224
if total_size > self._max_message_size:
201225
# Format size in hex: under concurrent misuse (see issue 033)
@@ -225,6 +249,23 @@ def read_message(self) -> bytes | None:
225249
return None
226250

227251
size_words = int.from_bytes(self._data[self._pos : self._pos + 4], "little")
252+
# Sanity check for torn reads (issue 051). The wire size
253+
# field is exactly 4 bytes (uint32 little-endian), so any
254+
# value > 0xFFFFFFFF cannot come from a well-formed header
255+
# — it can only come from a ``bytearray`` slice that
256+
# observed torn ``ob_size``/``ob_start`` during a concurrent
257+
# realloc on a free-threaded build, returning more than 4
258+
# bytes. Distinguish this from legitimate oversized messages
259+
# so the non-poisoning ``DecodeError`` recovery contract
260+
# still applies to real wire-oversized messages while torn
261+
# reads poison the buffer.
262+
if size_words > 0xFFFFFFFF:
263+
err = DecodeError(
264+
f"torn header read: size_words={size_words:#x} (>32 bits, "
265+
"indicates concurrent misuse on a free-threaded build)"
266+
)
267+
self.poison(err)
268+
raise err
228269
total_size = HEADER_SIZE + (size_words * WORD_SIZE)
229270

230271
if total_size > self._max_message_size:
@@ -266,6 +307,23 @@ def skip_message(self) -> bool:
266307
return False
267308

268309
size_words = int.from_bytes(self._data[self._pos : self._pos + 4], "little")
310+
# Sanity check for torn reads (issue 051). The wire size
311+
# field is exactly 4 bytes (uint32 little-endian), so any
312+
# value > 0xFFFFFFFF cannot come from a well-formed header
313+
# — it can only come from a ``bytearray`` slice that
314+
# observed torn ``ob_size``/``ob_start`` during a concurrent
315+
# realloc on a free-threaded build, returning more than 4
316+
# bytes. Distinguish this from legitimate oversized messages
317+
# so the non-poisoning ``DecodeError`` recovery contract
318+
# still applies to real wire-oversized messages while torn
319+
# reads poison the buffer.
320+
if size_words > 0xFFFFFFFF:
321+
err = DecodeError(
322+
f"torn header read: size_words={size_words:#x} (>32 bits, "
323+
"indicates concurrent misuse on a free-threaded build)"
324+
)
325+
self.poison(err)
326+
raise err
269327
total_size = HEADER_SIZE + (size_words * WORD_SIZE)
270328

271329
if total_size <= self._max_message_size:

tests/test_buffer_signal_safety.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,3 +323,109 @@ def test_happy_path_feed_still_works(self) -> None:
323323
assert not buf.is_poisoned
324324
assert buf.available() == 10
325325
assert buf.read_bytes(10) == b"helloworld"
326+
327+
328+
class _TornSizeBytearray(bytearray):
329+
"""Simulates a free-threaded torn bytearray read.
330+
331+
On free-threaded CPython (3.13t), ``bytearray.__getitem__`` can
332+
observe ``ob_size``/``ob_start`` inconsistently during a
333+
concurrent realloc/extend from another thread, producing a slice
334+
wider than the caller asked for. Under the GIL this is
335+
structurally impossible, so we synthesize the observable symptom
336+
by subclassing bytearray and widening only the 4-byte header
337+
size-field slice. All other slices (body reads, etc.) pass
338+
through unchanged.
339+
340+
The widened slice is 9 bytes: the original 4 plus 5 extra
341+
``\\xff`` bytes. ``int.from_bytes(..., "little")`` on that
342+
produces a value whose low 32 bits match the wire size field
343+
but whose total is > ``0xFFFFFFFF``, which is the structurally
344+
impossible condition that issue 051's sanity check is meant to
345+
detect.
346+
"""
347+
348+
def __getitem__(self, key: Any) -> Any:
349+
result = super().__getitem__(key)
350+
if isinstance(key, slice):
351+
start, stop, _ = key.indices(len(self))
352+
if stop - start == 4:
353+
return bytes(result) + b"\xff\xff\xff\xff\xff"
354+
return result
355+
356+
357+
class TestTornHeaderSizeSanityCheck:
358+
"""Regression tests for issue 051.
359+
360+
On free-threaded CPython, concurrent misuse of a ``ReadBuffer``
361+
can cause the 4-byte header-size slice to observably return more
362+
than 4 bytes (torn read during a realloc). The resulting
363+
``int.from_bytes`` value is a Python bigint wider than 32 bits —
364+
structurally impossible for a wire-legal ``size_words`` field
365+
(which is uint32 little-endian). The package already knew about
366+
this (issue 033 added hex formatting to avoid the
367+
bigint-to-decimal cap), but the torn-read case was routed
368+
through the non-poisoning ``DecodeError`` path meant for
369+
legitimate oversized server messages, leaving the caller free to
370+
call ``skip_message()`` on a buffer whose offset is unknowable.
371+
372+
The fix: any ``size_words > 0xFFFFFFFF`` is a torn-read indicator,
373+
not a legitimate oversized message. Poison the buffer and raise a
374+
diagnostic ``DecodeError`` identifying the torn read.
375+
376+
These tests use a ``bytearray`` subclass to synthesize the torn
377+
slice on GIL builds. On a real free-threaded build the symptom
378+
arises organically from concurrent access.
379+
"""
380+
381+
def _buf_with_torn_header(self) -> ReadBuffer:
382+
buf = ReadBuffer()
383+
# Put a wire-legal size_words=1 in the header plus 8 bytes of
384+
# body. The _TornSizeBytearray will widen the 4-byte slice to
385+
# 9 bytes on read, producing a size_words > 32 bits.
386+
buf._data = _TornSizeBytearray(b"\x01\x00\x00\x00" + b"\x00" * 12)
387+
return buf
388+
389+
def test_read_message_poisons_on_torn_header(self) -> None:
390+
buf = self._buf_with_torn_header()
391+
with pytest.raises(DecodeError, match="torn"):
392+
buf.read_message()
393+
assert buf.is_poisoned, (
394+
"torn header read must poison the buffer so subsequent "
395+
"callers fail fast rather than calling skip_message() on "
396+
"an unknowable offset"
397+
)
398+
with pytest.raises(ProtocolError, match="poisoned"):
399+
buf.read_message()
400+
401+
def test_peek_header_poisons_on_torn_header(self) -> None:
402+
buf = self._buf_with_torn_header()
403+
with pytest.raises(DecodeError, match="torn"):
404+
buf.peek_header()
405+
assert buf.is_poisoned
406+
407+
def test_skip_message_poisons_on_torn_header(self) -> None:
408+
buf = self._buf_with_torn_header()
409+
with pytest.raises(DecodeError, match="torn"):
410+
buf.skip_message()
411+
assert buf.is_poisoned
412+
413+
def test_wire_legal_max_size_still_routes_through_oversized_path(self) -> None:
414+
"""Counter-test: a legitimate ``size_words = 0xFFFFFFFF`` (a
415+
pathological but structurally-valid value) must NOT trip the
416+
torn-read sanity check — it must route through the existing
417+
non-poisoning ``DecodeError`` path so ``skip_message`` can
418+
recover. The check is strictly ``> 0xFFFFFFFF``.
419+
"""
420+
buf = ReadBuffer()
421+
# A real, non-torn 4-byte size field at its maximum value.
422+
buf._data = bytearray(b"\xff\xff\xff\xff" + b"\x00" * 4)
423+
with pytest.raises(DecodeError) as ei:
424+
buf.read_message()
425+
assert "torn" not in str(ei.value), (
426+
"wire-legal size_words=0xFFFFFFFF must not be misdiagnosed as torn"
427+
)
428+
assert not buf.is_poisoned, (
429+
"oversized-but-wire-legal DecodeError must remain non-poisoning "
430+
"to preserve the skip_message() recovery contract"
431+
)

0 commit comments

Comments
 (0)