perf: wrap body in memoryview to eliminate quadratic decode

antoineleclair · claude · antoineleclair · commit dbf1ea595dbd · 2026-04-16T21:03:08.000-04:00
Every body decoder in responses.py (RowsResponse, FilesResponse,
ServersResponse) and tuples.py walked the body with an integer offset
but passed `data[offset:]` into helpers. Each slice of a `bytes`
object copies the remaining tail, producing O(N²) cumulative memcpy
on messages with many small rows. Measured quadratic scaling: 50k
rows took 2s, 100k took 11s, 200k took 48s.

Wrap the body in a `memoryview` at the top of each `decode_body` so
per-iteration slices are O(1) views. Widen the primitive decoder
signatures (decode_uint32/uint64/int64/double/text/blob,
decode_row_header, decode_row_values) to accept `bytes | memoryview`.
Use `struct.unpack` (already memoryview-compatible) and a chunked
accumulating scan for `decode_text`'s NUL search — memoryview has no
`.index(bytes)` method, so the scan reads 4 KiB chunks at a time via
`bytes(data[a:b]).find(b"\x00")` and accumulates into a list. The
chunked accumulation supports arbitrarily long TEXT values (SQLite
TEXT columns routinely exceed 4 KiB for JSON / prose) while keeping
per-chunk copy cost bounded.

Post-fix: 10x input takes ~10x time, not ~100x. Regression tests
check both the linear scaling ratio and long-text roundtrip for
memoryview inputs.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/dqlitewire/messages/responses.py b/src/dqlitewire/messages/responses.py
@@ -322,17 +322,23 @@ def encode_body(self) -> bytes:
     def decode_body(
         cls, data: bytes, schema: int = 0, max_rows: int = DEFAULT_MAX_ROWS
     ) -> "RowsResponse":
+        # Wrap in memoryview so per-iteration slices are O(1) rather
+        # than O(remaining). Without this, a body with many small rows
+        # triggers quadratic-time decode (issue 228): each
+        # ``data[offset:]`` allocates a fresh ``bytes`` copy of the
+        # tail. Memoryview slicing is a view, so slicing is free.
+        view = memoryview(data)
         offset = 0
 
         # Column count
-        column_count = decode_uint64(data[offset:])
+        column_count = decode_uint64(view[offset:])
         offset += 8
 
         if column_count > _MAX_COLUMN_COUNT:
             raise DecodeError(f"Column count {column_count} exceeds maximum {_MAX_COLUMN_COUNT}")
 
         # Bounds check: each column name is at least 8 bytes (null + padding)
-        remaining = len(data) - offset
+        remaining = len(view) - offset
         if column_count > remaining // 8:
             raise DecodeError(
                 f"Column count {column_count} exceeds maximum possible in "
@@ -342,7 +348,7 @@ def decode_body(
         # Column names
         column_names: list[str] = []
         for _ in range(column_count):
-            name, consumed = decode_text(data[offset:])
+            name, consumed = decode_text(view[offset:])
             column_names.append(name)
             offset += consumed
 
@@ -354,11 +360,11 @@ def decode_body(
         # Zero-column results cannot have row data (each row would be zero
         # bytes), so skip the row loop and consume the end marker directly.
         if column_count == 0:
-            if offset + WORD_SIZE > len(data):
+            if offset + WORD_SIZE > len(view):
                 raise DecodeError(
                     "RowsResponse body exhausted without end marker (zero-column result)"
                 )
-            marker_byte = data[offset]
+            marker_byte = view[offset]
             if marker_byte == ROW_DONE_BYTE:
                 has_more = False
             elif marker_byte == ROW_PART_BYTE:
@@ -375,9 +381,9 @@ def decode_body(
                 has_more=has_more,
             )
 
-        while offset < len(data):
+        while offset < len(view):
             # Read row header; markers are detected byte-by-byte inside
-            result, consumed = decode_row_header(data[offset:], column_count)
+            result, consumed = decode_row_header(view[offset:], column_count)
             offset += consumed
 
             if result is RowMarker.DONE:
@@ -405,7 +411,7 @@ def decode_body(
                 column_types = types
 
             # Read row values
-            values, consumed = decode_row_values(data[offset:], types)
+            values, consumed = decode_row_values(view[offset:], types)
             rows.append(values)
             offset += consumed
 
@@ -414,7 +420,7 @@ def decode_body(
 
         raise DecodeError(
             f"RowsResponse body exhausted without end marker "
-            f"(decoded {len(rows)} rows, consumed {offset} of {len(data)} bytes)"
+            f"(decoded {len(rows)} rows, consumed {offset} of {len(view)} bytes)"
         )
 
 
@@ -464,30 +470,32 @@ def encode_body(self) -> bytes:
 
     @classmethod
     def decode_body(cls, data: bytes, schema: int = 0) -> "FilesResponse":
+        # Memoryview for O(1) slicing in the per-file loop (issue 228).
+        view = memoryview(data)
         files: dict[str, bytes] = {}
         offset = 0
-        count = decode_uint64(data[offset:])
+        count = decode_uint64(view[offset:])
         offset += 8
         if count > _MAX_FILE_COUNT:
             raise DecodeError(f"File count {count} exceeds maximum {_MAX_FILE_COUNT}")
         # Bounds check: each file is at least 16 bytes (name + size)
-        remaining = len(data) - offset
+        remaining = len(view) - offset
         if count > remaining // 16:
             raise DecodeError(
                 f"File count {count} exceeds maximum possible in "
                 f"{remaining} bytes of remaining data"
             )
         for _ in range(count):
-            name, consumed = decode_text(data[offset:])
+            name, consumed = decode_text(view[offset:])
             offset += consumed
-            size = decode_uint64(data[offset:])
+            size = decode_uint64(view[offset:])
             offset += 8
-            if offset + size > len(data):
+            if offset + size > len(view):
                 raise DecodeError(
                     f"FilesResponse file content truncated: expected {size} bytes "
-                    f"at offset {offset}, but only {len(data) - offset} bytes available"
+                    f"at offset {offset}, but only {len(view) - offset} bytes available"
                 )
-            content = data[offset : offset + size]
+            content = bytes(view[offset : offset + size])
             # No padding after content — matches Go's byte-by-byte read.
             offset += size
             files[name] = content
@@ -524,25 +532,27 @@ def encode_body(self) -> bytes:
 
     @classmethod
     def decode_body(cls, data: bytes, schema: int = 0) -> "ServersResponse":
+        # Memoryview for O(1) slicing in the per-node loop (issue 228).
+        view = memoryview(data)
         nodes: list[NodeInfo] = []
         offset = 0
-        count = decode_uint64(data[offset:])
+        count = decode_uint64(view[offset:])
         offset += 8
         if count > _MAX_NODE_COUNT:
             raise DecodeError(f"Node count {count} exceeds maximum {_MAX_NODE_COUNT}")
         # Bounds check: each node is at least 24 bytes (id + address + role)
-        remaining = len(data) - offset
+        remaining = len(view) - offset
         if count > remaining // 24:
             raise DecodeError(
                 f"Node count {count} exceeds maximum possible in "
                 f"{remaining} bytes of remaining data"
             )
         for _ in range(count):
-            node_id = decode_uint64(data[offset:])
+            node_id = decode_uint64(view[offset:])
             offset += 8
-            address, consumed = decode_text(data[offset:])
+            address, consumed = decode_text(view[offset:])
             offset += consumed
-            role = decode_uint64(data[offset:])
+            role = decode_uint64(view[offset:])
             offset += 8
             nodes.append(NodeInfo(node_id, address, role))
         return cls(nodes)
diff --git a/src/dqlitewire/tuples.py b/src/dqlitewire/tuples.py
@@ -203,7 +203,9 @@ def encode_row_header(types: Sequence[ValueType]) -> bytes:
     return bytes(header)
 
 
-def decode_row_header(data: bytes, column_count: int) -> tuple[list[ValueType] | RowMarker, int]:
+def decode_row_header(
+    data: bytes | memoryview, column_count: int
+) -> tuple[list[ValueType] | RowMarker, int]:
     """Decode row column type header.
 
     Format: 4-bit type codes packed two per byte, padded to word boundary.
@@ -255,7 +257,9 @@ def encode_row_values(values: Sequence[Any], types: Sequence[ValueType]) -> byte
     return bytes(result)
 
 
-def decode_row_values(data: bytes, types: Sequence[ValueType]) -> tuple[list[Any], int]:
+def decode_row_values(
+    data: bytes | memoryview, types: Sequence[ValueType]
+) -> tuple[list[Any], int]:
     """Decode row values according to column types.
 
     Returns (values, bytes_consumed).
diff --git a/src/dqlitewire/types.py b/src/dqlitewire/types.py
@@ -19,8 +19,12 @@ def encode_uint64(value: int) -> bytes:
     return struct.pack("<Q", value)
 
 
-def decode_uint64(data: bytes) -> int:
-    """Decode an unsigned 64-bit integer (little-endian)."""
+def decode_uint64(data: bytes | memoryview) -> int:
+    """Decode an unsigned 64-bit integer (little-endian).
+
+    Accepts ``bytes`` or ``memoryview`` so hot-path body decoders
+    (issue 228) can pass memoryview slices without copying.
+    """
     if len(data) < 8:
         raise DecodeError(f"Need 8 bytes for uint64, got {len(data)}")
     result: int = struct.unpack("<Q", data[:8])[0]
@@ -34,8 +38,11 @@ def encode_int64(value: int) -> bytes:
     return struct.pack("<q", value)
 
 
-def decode_int64(data: bytes) -> int:
-    """Decode a signed 64-bit integer (little-endian)."""
+def decode_int64(data: bytes | memoryview) -> int:
+    """Decode a signed 64-bit integer (little-endian).
+
+    Accepts ``bytes`` or ``memoryview`` (issue 228).
+    """
     if len(data) < 8:
         raise DecodeError(f"Need 8 bytes for int64, got {len(data)}")
     result: int = struct.unpack("<q", data[:8])[0]
@@ -49,8 +56,11 @@ def encode_uint32(value: int) -> bytes:
     return struct.pack("<I", value)
 
 
-def decode_uint32(data: bytes) -> int:
-    """Decode an unsigned 32-bit integer (little-endian)."""
+def decode_uint32(data: bytes | memoryview) -> int:
+    """Decode an unsigned 32-bit integer (little-endian).
+
+    Accepts ``bytes`` or ``memoryview`` (issue 228).
+    """
     if len(data) < 4:
         raise DecodeError(f"Need 4 bytes for uint32, got {len(data)}")
     result: int = struct.unpack("<I", data[:4])[0]
@@ -66,11 +76,12 @@ def encode_double(value: float) -> bytes:
     return struct.pack("<d", value)
 
 
-def decode_double(data: bytes) -> float:
+def decode_double(data: bytes | memoryview) -> float:
     """Decode a 64-bit floating point number (little-endian).
 
     All IEEE 754 values are accepted, including NaN and infinity,
-    matching the Go reference implementation behavior.
+    matching the Go reference implementation behavior. Accepts
+    ``bytes`` or ``memoryview`` (issue 228).
     """
     if len(data) < 8:
         raise DecodeError(f"Need 8 bytes for double, got {len(data)}")
@@ -103,21 +114,60 @@ def encode_text(value: str) -> bytes:
     return encoded + (b"\x00" * padding)
 
 
-def decode_text(data: bytes) -> tuple[str, int]:
+_TEXT_SCAN_CHUNK = 4096
+
+
+def decode_text(data: bytes | memoryview) -> tuple[str, int]:
     """Decode null-terminated UTF-8 text.
 
-    Returns the decoded string and the number of bytes consumed (including padding).
+    Accepts either ``bytes`` or ``memoryview``. Returns the decoded
+    string and the number of bytes consumed (including padding).
+
+    The decoder's hot body loops (RowsResponse, FilesResponse,
+    ServersResponse) wrap the body in a ``memoryview`` so
+    per-iteration slices are O(1) rather than O(remaining) — see
+    issue 228. ``bytes`` inputs use zero-copy ``.index(b"\\x00")``.
+    ``memoryview`` inputs walk the buffer in fixed-size chunks so the
+    per-chunk ``bytes(...)`` copy is bounded; arbitrarily long text
+    values (e.g. multi-KiB SQL strings or TEXT column values) are
+    supported because the scan simply visits more chunks. Per-call
+    cost scales with the actual text length, not with the remaining
+    body.
     """
-    # Find null terminator
-    try:
-        null_pos = data.index(b"\x00")
-    except ValueError as e:
-        raise DecodeError("Text not null-terminated") from e
+    if isinstance(data, memoryview):
+        # Memoryview has no ``.index(bytes)``. Scan in fixed chunks and
+        # accumulate so we can decode the full text without re-copying
+        # after the NUL is found.
+        chunks: list[bytes] = []
+        scanned = 0
+        null_pos = -1
+        data_len = len(data)
+        while scanned < data_len:
+            chunk_end = min(scanned + _TEXT_SCAN_CHUNK, data_len)
+            chunk = bytes(data[scanned:chunk_end])
+            local = chunk.find(b"\x00")
+            if local >= 0:
+                chunks.append(chunk[:local])
+                null_pos = scanned + local
+                break
+            chunks.append(chunk)
+            scanned = chunk_end
+        if null_pos < 0:
+            raise DecodeError("Text not null-terminated")
+        try:
+            text = b"".join(chunks).decode("utf-8")
+        except UnicodeDecodeError as e:
+            raise DecodeError(f"Invalid UTF-8 in text field: {e}") from e
+    else:
+        try:
+            null_pos = data.index(b"\x00")
+        except ValueError as e:
+            raise DecodeError("Text not null-terminated") from e
+        try:
+            text = data[:null_pos].decode("utf-8")
+        except UnicodeDecodeError as e:
+            raise DecodeError(f"Invalid UTF-8 in text field: {e}") from e
 
-    try:
-        text = data[:null_pos].decode("utf-8")
-    except UnicodeDecodeError as e:
-        raise DecodeError(f"Invalid UTF-8 in text field: {e}") from e
     # Calculate total size including padding
     total_size = null_pos + 1 + pad_to_word(null_pos + 1)
     if len(data) < total_size:
@@ -135,10 +185,11 @@ def encode_blob(value: bytes) -> bytes:
     return encode_uint64(length) + value + (b"\x00" * padding)
 
 
-def decode_blob(data: bytes) -> tuple[bytes, int]:
+def decode_blob(data: bytes | memoryview) -> tuple[bytes, int]:
     """Decode a blob.
 
-    Returns the blob data and the number of bytes consumed.
+    Accepts either ``bytes`` or ``memoryview``. Returns the blob data
+    (always as ``bytes``) and the number of bytes consumed.
     """
     if len(data) < 8:
         raise DecodeError("Not enough data for blob length")
@@ -149,7 +200,7 @@ def decode_blob(data: bytes) -> tuple[bytes, int]:
     if len(data) < total_size:
         raise DecodeError(f"Not enough data for blob: need {total_size}, got {len(data)}")
 
-    return data[8 : 8 + length], total_size
+    return bytes(data[8 : 8 + length]), total_size
 
 
 def _format_datetime_iso8601(value: datetime.datetime) -> str:
@@ -276,7 +327,7 @@ def _parse_iso8601(text: str) -> datetime.datetime:
     raise DecodeError(f"Cannot parse ISO 8601 datetime: {text!r}")
 
 
-def decode_value(data: bytes, value_type: ValueType) -> tuple[Any, int]:
+def decode_value(data: bytes | memoryview, value_type: ValueType) -> tuple[Any, int]:
     """Decode a value from wire format.
 
     Returns (value, bytes_consumed).
diff --git a/tests/test_decode_performance.py b/tests/test_decode_performance.py