Skip to content

Commit fe68156

Browse files
fix: validate size in ReadBuffer.peek_header()
peek_header() previously returned raw size_words from the wire with no bound check. The post-025 contract is "has_message() is total, read_message() raises on oversized" — but peek_header() sits in a different category from has_message(): its return tuple includes the raw size_words value, which a caller can plausibly trust for allocation, timeout, or routing decisions. Silently returning an unbounded attacker-controlled integer would defeat the max_message_size guard the caller is presumably relying on. Apply the same DecodeError check used by read_message(): raise when the header claims a total size larger than max_message_size. This introduces a deliberate asymmetry with has_message() (which is total) — the asymmetry is documented in the new docstring and locked in by a pinning test so any future "consistency fix" has to confront the trade-off explicitly. Tests cover: peek_header raises on oversized, returns the parsed tuple on a valid header, returns None on partial data, does not advance _pos, accepts the exact max_message_size boundary, raises one byte over the boundary, and is intentionally inconsistent with has_message() on the same oversized bytes. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2d19e03 commit fe68156

2 files changed

Lines changed: 123 additions & 1 deletion

File tree

src/dqlitewire/buffer.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,33 @@ def has_message(self) -> bool:
123123
def peek_header(self) -> tuple[int, int, int] | None:
124124
"""Peek at the message header without consuming it.
125125
126-
Returns (size_in_words, message_type, schema_version) or None if not enough data.
126+
Returns (size_in_words, message_type, schema_version) or None if not
127+
enough data. Raises ``DecodeError`` if the header claims a total
128+
message size larger than ``max_message_size``.
129+
130+
Note the deliberate asymmetry with ``has_message()``: that predicate
131+
is total (returns ``True`` for oversized so the documented
132+
``while has_message(): decode()`` loop proceeds to the consume call,
133+
which then raises). ``peek_header()`` instead raises immediately
134+
because its return value is an attacker-controlled integer that a
135+
caller might use directly for allocation, timeout, or routing
136+
decisions — silently returning an unbounded value would defeat the
137+
``max_message_size`` guard the caller is presumably relying on. The
138+
error message and exception type match ``read_message()`` so the
139+
consume-side recovery path (``skip_message()``) applies the same way.
127140
"""
128141
available = len(self._data) - self._pos
129142

130143
if available < HEADER_SIZE:
131144
return None
132145

133146
size_words = int.from_bytes(self._data[self._pos : self._pos + 4], "little")
147+
total_size = HEADER_SIZE + (size_words * WORD_SIZE)
148+
if total_size > self._max_message_size:
149+
raise DecodeError(
150+
f"Message size {total_size} bytes exceeds maximum {self._max_message_size}"
151+
)
152+
134153
msg_type = self._data[self._pos + 4]
135154
schema_version = self._data[self._pos + 5]
136155

tests/test_buffer.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,109 @@ def test_has_message_is_total_on_oversized(self) -> None:
168168
with pytest.raises(DecodeError, match="exceeds maximum"):
169169
buf.read_message()
170170

171+
def test_peek_header_validates_size(self) -> None:
172+
"""peek_header() must reject oversized headers, matching has_message().
173+
174+
peek_header() is a sibling inspection API; returning a raw unvalidated
175+
size_words lets callers accidentally trust an attacker-controlled value
176+
(e.g. preallocate based on it). Fail loudly and consistently.
177+
"""
178+
import struct
179+
180+
import pytest
181+
182+
from dqlitewire.exceptions import DecodeError
183+
184+
buf = ReadBuffer(max_message_size=1024)
185+
# size_words=1000 -> 8000-byte body > 1024 limit
186+
header = struct.pack("<IBBH", 1000, 0, 0, 0)
187+
buf.feed(header)
188+
189+
with pytest.raises(DecodeError, match="exceeds maximum"):
190+
buf.peek_header()
191+
192+
def test_peek_header_on_valid_header(self) -> None:
193+
"""peek_header() returns the parsed tuple for a valid header and
194+
does not advance _pos.
195+
"""
196+
import struct
197+
198+
buf = ReadBuffer(max_message_size=1024)
199+
# size_words=1 -> 8-byte body, type=5, schema=0, within limit
200+
header = struct.pack("<IBBH", 1, 5, 0, 0)
201+
buf.feed(header)
202+
result = buf.peek_header()
203+
assert result == (1, 5, 0)
204+
# Non-consuming — a second peek returns the same tuple.
205+
assert buf.peek_header() == (1, 5, 0)
206+
207+
def test_peek_header_partial_returns_none(self) -> None:
208+
"""With fewer than HEADER_SIZE bytes buffered, peek_header() returns None."""
209+
buf = ReadBuffer()
210+
buf.feed(b"\x00\x00") # only 2 bytes
211+
assert buf.peek_header() is None
212+
213+
def test_peek_header_does_not_advance_pos(self) -> None:
214+
"""peek_header() must leave the buffer position untouched."""
215+
import struct
216+
217+
buf = ReadBuffer(max_message_size=1024)
218+
buf.feed(struct.pack("<IBBH", 1, 5, 0, 0))
219+
assert buf.available() == 8
220+
buf.peek_header()
221+
assert buf.available() == 8
222+
buf.peek_header()
223+
assert buf.available() == 8
224+
225+
def test_peek_header_at_exact_size_boundary(self) -> None:
226+
"""A header whose total_size equals max_message_size must be accepted;
227+
one byte over must raise. Locks in the off-by-one boundary.
228+
"""
229+
import struct
230+
231+
import pytest
232+
233+
from dqlitewire.exceptions import DecodeError
234+
235+
# max_message_size = 16 means HEADER_SIZE (8) + 1 word (8) is the
236+
# exact boundary. size_words=1 -> total_size=16 -> ok.
237+
buf_ok = ReadBuffer(max_message_size=16)
238+
buf_ok.feed(struct.pack("<IBBH", 1, 0, 0, 0))
239+
assert buf_ok.peek_header() == (1, 0, 0)
240+
241+
# size_words=2 -> total_size=24 -> over the limit by 8.
242+
buf_over = ReadBuffer(max_message_size=16)
243+
buf_over.feed(struct.pack("<IBBH", 2, 0, 0, 0))
244+
with pytest.raises(DecodeError, match="exceeds maximum"):
245+
buf_over.peek_header()
246+
247+
def test_peek_header_and_has_message_disagree_on_oversized(self) -> None:
248+
"""Pinning test for the deliberate asymmetry between the two
249+
inspection methods on the same buffer.
250+
251+
has_message() is total (returns True for oversized so the consume
252+
call surfaces the error). peek_header() raises immediately because
253+
its return value is an attacker-controlled integer that callers
254+
might use for allocation. This test locks in the contract so a
255+
future refactor that "fixes the inconsistency" by making one
256+
match the other has to confront the trade-off explicitly.
257+
"""
258+
import struct
259+
260+
import pytest
261+
262+
from dqlitewire.exceptions import DecodeError
263+
264+
buf = ReadBuffer(max_message_size=1024)
265+
buf.feed(struct.pack("<IBBH", 1000, 0, 0, 0)) # 8000 > 1024
266+
267+
# Predicate: total, returns True.
268+
assert buf.has_message() is True
269+
270+
# Inspection-with-payload: raises.
271+
with pytest.raises(DecodeError, match="exceeds maximum"):
272+
buf.peek_header()
273+
171274
def test_rejects_oversized_message(self) -> None:
172275
"""Oversized messages surface at read_message(), not has_message()."""
173276
import struct

0 commit comments

Comments
 (0)