Cap blob size at encoder and decoder

antoineleclair · claude · antoineleclair · commit 8daee9dcef69 · 2026-04-19T15:35:45.000-04:00
The blob decoder previously computed total_size = 8 + length +
pad_to_word(length) from an attacker-controlled uint64 length before
the bounds check. A peer claiming length=2**62 forces the decoder to
do the arithmetic (and later allocation up to the 64 MiB frame cap)
before rejecting. Add a per-field _MAX_BLOB_SIZE = 16 MiB cap,
enforced at decode before any arithmetic with length and mirrored at
encode for symmetry with _MAX_PARAM_COUNT in tuples.py.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/dqlitewire/types.py b/src/dqlitewire/types.py
@@ -16,6 +16,16 @@
 from dqlitewire.constants import WORD_SIZE, ValueType
 from dqlitewire.exceptions import DecodeError, EncodeError
 
+# Per-BLOB byte cap. The overall frame-size cap in ``buffer.py`` (64 MiB)
+# already bounds any single message, but a hostile or buggy peer can
+# otherwise pack a single BLOB field that consumes the whole frame. The
+# cap is a defensive ceiling — real applications do not send
+# multi-megabyte blobs over the wire — and keeps the decoder fast-failing
+# well before large allocations or arithmetic on attacker-controlled
+# lengths. Sits beside ``_MAX_PARAM_COUNT`` / ``_MAX_COLUMN_COUNT`` /
+# ``_MAX_FILE_COUNT`` / ``_MAX_NODE_COUNT`` in spirit.
+_MAX_BLOB_SIZE = 16 * 1024 * 1024  # 16 MiB
+
 
 def encode_uint64(value: int) -> bytes:
     """Encode an unsigned 64-bit integer (little-endian)."""
@@ -202,6 +212,8 @@ def encode_blob(value: bytes) -> bytes:
     Format: uint64 length + data + padding
     """
     length = len(value)
+    if length > _MAX_BLOB_SIZE:
+        raise EncodeError(f"Blob length {length} exceeds maximum ({_MAX_BLOB_SIZE})")
     padding = pad_to_word(length)
     return encode_uint64(length) + value + (b"\x00" * padding)
 
@@ -216,6 +228,8 @@ def decode_blob(data: bytes | memoryview) -> tuple[bytes, int]:
         raise DecodeError("Not enough data for blob length")
 
     length = decode_uint64(data[:8])
+    if length > _MAX_BLOB_SIZE:
+        raise DecodeError(f"Blob length {length} exceeds maximum ({_MAX_BLOB_SIZE})")
     total_size = 8 + length + pad_to_word(length)
 
     if len(data) < total_size:
diff --git a/tests/test_types.py b/tests/test_types.py
@@ -7,6 +7,7 @@
 from dqlitewire.constants import ValueType
 from dqlitewire.exceptions import DecodeError, EncodeError
 from dqlitewire.types import (
+    _MAX_BLOB_SIZE,
     decode_blob,
     decode_double,
     decode_int64,
@@ -304,6 +305,30 @@ def test_decode_blob_truncated_data(self) -> None:
         with pytest.raises(DecodeError, match="Not enough data for blob"):
             decode_blob(data)
 
+    def test_decode_blob_rejects_length_beyond_cap(self) -> None:
+        """Crafted buffer claiming a length beyond the per-field cap must be
+        rejected before the decoder allocates or does total-size arithmetic
+        with the attacker-controlled length."""
+        oversized = _MAX_BLOB_SIZE + 1
+        data = encode_uint64(oversized)
+        with pytest.raises(DecodeError, match="exceeds maximum"):
+            decode_blob(data)
+
+    def test_decode_blob_accepts_length_at_cap(self) -> None:
+        """Length exactly at the cap is still accepted; the body check is
+        what fires on a truncated buffer."""
+        # Claim exactly the cap but provide a short buffer. The cap check
+        # must pass; the later "not enough data" check is what rejects.
+        data = encode_uint64(_MAX_BLOB_SIZE) + b"\x00" * 8
+        with pytest.raises(DecodeError, match="Not enough data for blob"):
+            decode_blob(data)
+
+    def test_encode_blob_rejects_oversize(self) -> None:
+        """encode_blob mirrors the decode cap so callers fail fast on an
+        accidental giant bytes input instead of burning allocations."""
+        with pytest.raises(EncodeError, match="exceeds maximum"):
+            encode_blob(b"\x00" * (_MAX_BLOB_SIZE + 1))
+
 
 class TestValue:
     def test_encode_integer(self) -> None: