Skip to content

Commit 9dd78cd

Browse files
Cover decode_text chunked branch error paths
The memoryview one-shot fast path (<= 64 KiB) had tests for the "not null-terminated" and "Invalid UTF-8" error cases, but the chunked fallback was only exercised for the happy path. Add tests that feed a > 64 KiB payload with no null byte, a > 64 KiB payload whose tail contains a truncated multi-byte UTF-8 sequence, and a payload with a 3-byte codepoint straddling the internal chunk boundary — the last one guards the accumulator against decoding chunks independently. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e13c53a commit 9dd78cd

1 file changed

Lines changed: 62 additions & 0 deletions

File tree

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""Error-path tests for the memoryview chunked branch of ``decode_text``.
2+
3+
The one-shot branch (payloads <= 64 KiB) has null-terminator and
4+
invalid-UTF-8 tests already. The chunked branch is only exercised for
5+
the happy path. Without dedicated error-path tests, a regression in
6+
the accumulator — wrong ``scanned`` update, dropped partial multi-byte
7+
UTF-8 split across chunk boundaries — would silently corrupt large
8+
CLOB / JSON-text columns.
9+
"""
10+
11+
import pytest
12+
13+
from dqlitewire.exceptions import DecodeError
14+
from dqlitewire.types import decode_text
15+
16+
17+
def _align(n: int) -> int:
18+
"""Pad up to a multiple of 8 bytes (word alignment for text fields)."""
19+
rem = n % 8
20+
return 0 if rem == 0 else 8 - rem
21+
22+
23+
class TestDecodeTextChunkedErrors:
24+
def test_chunked_missing_null_terminator_raises(self) -> None:
25+
# 70 KiB > 64 KiB threshold, forcing the chunked branch.
26+
payload = b"a" * (70 * 1024)
27+
# Pad to a word boundary so the decoder accepts the surrounding
28+
# size math once it reaches the scan-exhausted state.
29+
buf = memoryview(payload + b"\x00" * _align(len(payload)))
30+
with pytest.raises(DecodeError, match="not null-terminated"):
31+
decode_text(buf)
32+
33+
def test_chunked_invalid_utf8_before_null_raises(self) -> None:
34+
# 70 KiB of ASCII + a truncated multi-byte UTF-8 sequence, then null.
35+
prefix = b"a" * (70 * 1024)
36+
payload = prefix + b"\xc3" + b"\x00"
37+
buf = memoryview(payload + b"\x00" * _align(len(payload)))
38+
with pytest.raises(DecodeError, match="Invalid UTF-8"):
39+
decode_text(buf)
40+
41+
def test_chunked_multibyte_codepoint_across_boundary(self) -> None:
42+
"""A 3-byte UTF-8 codepoint straddling the internal
43+
``_TEXT_SCAN_CHUNK`` boundary must decode correctly. Verifies
44+
the accumulator joins chunks before decoding, rather than
45+
decoding each chunk independently.
46+
"""
47+
# Pad with 'a' until one byte before the chunk boundary, then
48+
# place a 3-byte codepoint so it straddles the boundary. The
49+
# payload total must exceed _TEXT_ONE_SHOT_MAX to force the
50+
# chunked branch.
51+
from dqlitewire.types import _TEXT_ONE_SHOT_MAX, _TEXT_SCAN_CHUNK
52+
53+
filler_len = _TEXT_SCAN_CHUNK - 1
54+
codepoint = "€".encode() # 3 bytes: e2 82 ac
55+
assert len(codepoint) == 3
56+
# Ensure we stay above the one-shot threshold.
57+
tail_padding = b"z" * (_TEXT_ONE_SHOT_MAX + 1 - filler_len - len(codepoint))
58+
payload = b"a" * filler_len + codepoint + tail_padding + b"\x00"
59+
buf = memoryview(payload + b"\x00" * _align(len(payload)))
60+
61+
text, _consumed = decode_text(buf)
62+
assert text == "a" * filler_len + "€" + "z" * len(tail_padding)

0 commit comments

Comments
 (0)