Skip to content

Commit fce2d11

Browse files
Add golden-byte tests to verify wire encoding against protocol spec
Hand-constructed byte sequences for key message types verify that the Python encoder/decoder matches the dqlite wire protocol specification. These catch symmetric bugs that round-trip tests cannot detect (wrong nibble order, padding miscalculation, etc.). Covers: LeaderRequest, ClientRequest, OpenRequest, FinalizeRequest, ExecRequest with params tuple, FailureResponse, ResultResponse, RowsResponse with nibble packing (including BOOLEAN=11 which uses all 4 bits), handshake, text/blob encoding edge cases. Fixes #018 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8eaefdd commit fce2d11

1 file changed

Lines changed: 315 additions & 0 deletions

File tree

tests/test_golden_bytes.py

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
"""Golden-byte tests: verify wire encoding matches the dqlite protocol spec.
2+
3+
Each test contains a hand-constructed byte sequence built from the protocol
4+
rules (header format, little-endian integers, null-terminated padded text,
5+
nibble-packed row headers, etc.). These are NOT generated by encoding with
6+
Python and copying the output — they are independently derived from the spec
7+
at https://canonical.com/dqlite/docs/reference/wire-protocol and the Go/C
8+
reference implementations.
9+
10+
This catches symmetric bugs where both encoder and decoder share the same
11+
mistake (e.g. wrong nibble order, wrong padding base).
12+
"""
13+
14+
import struct
15+
16+
from dqlitewire.codec import decode_message, encode_message
17+
from dqlitewire.constants import ValueType
18+
from dqlitewire.messages import (
19+
ClientRequest,
20+
ExecRequest,
21+
FailureResponse,
22+
FinalizeRequest,
23+
LeaderRequest,
24+
OpenRequest,
25+
ResultResponse,
26+
RowsResponse,
27+
)
28+
29+
30+
def _header(size_words: int, msg_type: int, schema: int = 0) -> bytes:
31+
"""Build an 8-byte message header from spec fields."""
32+
return struct.pack("<IBBH", size_words, msg_type, schema, 0)
33+
34+
35+
def _u64(v: int) -> bytes:
36+
"""Little-endian uint64."""
37+
return struct.pack("<Q", v)
38+
39+
40+
def _i64(v: int) -> bytes:
41+
"""Little-endian int64."""
42+
return struct.pack("<q", v)
43+
44+
45+
def _u32(v: int) -> bytes:
46+
"""Little-endian uint32."""
47+
return struct.pack("<I", v)
48+
49+
50+
def _text(s: str) -> bytes:
51+
"""Null-terminated UTF-8, padded to 8-byte boundary."""
52+
raw = s.encode("utf-8") + b"\x00"
53+
pad = (8 - len(raw) % 8) % 8
54+
return raw + b"\x00" * pad
55+
56+
57+
class TestGoldenRequests:
58+
"""Verify request message encoding matches hand-built spec bytes."""
59+
60+
def test_leader_request(self) -> None:
61+
"""LeaderRequest: type=0, body=uint64(0)."""
62+
expected = _header(1, 0) + _u64(0)
63+
assert encode_message(LeaderRequest()) == expected
64+
65+
msg = decode_message(expected, is_request=True)
66+
assert isinstance(msg, LeaderRequest)
67+
68+
def test_client_request(self) -> None:
69+
"""ClientRequest(client_id=42): type=1, body=uint64(42)."""
70+
expected = _header(1, 1) + _u64(42)
71+
assert encode_message(ClientRequest(client_id=42)) == expected
72+
73+
msg = decode_message(expected, is_request=True)
74+
assert isinstance(msg, ClientRequest)
75+
assert msg.client_id == 42
76+
77+
def test_open_request(self) -> None:
78+
"""OpenRequest(name="test.db", flags=0, vfs="unix"): type=3.
79+
80+
Body: text("test.db") + uint64(0) + text("unix")
81+
text("test.db") = b"test.db\\0" = 8 bytes (exact word)
82+
text("unix") = b"unix\\0\\0\\0\\0" = 8 bytes (5+3 pad)
83+
Total body = 8 + 8 + 8 = 24 bytes = 3 words
84+
"""
85+
expected = _header(3, 3) + _text("test.db") + _u64(0) + _text("unix")
86+
assert encode_message(OpenRequest(name="test.db", flags=0, vfs="unix")) == expected
87+
88+
msg = decode_message(expected, is_request=True)
89+
assert isinstance(msg, OpenRequest)
90+
assert msg.name == "test.db"
91+
assert msg.vfs == "unix"
92+
93+
def test_finalize_request(self) -> None:
94+
"""FinalizeRequest(db_id=1, stmt_id=2): type=7.
95+
96+
Body: uint32(1) + uint32(2) = 8 bytes = 1 word
97+
"""
98+
expected = _header(1, 7) + _u32(1) + _u32(2)
99+
assert encode_message(FinalizeRequest(db_id=1, stmt_id=2)) == expected
100+
101+
msg = decode_message(expected, is_request=True)
102+
assert isinstance(msg, FinalizeRequest)
103+
assert msg.db_id == 1
104+
assert msg.stmt_id == 2
105+
106+
def test_exec_request_with_params(self) -> None:
107+
"""ExecRequest(db_id=1, stmt_id=2, params=[42, "hello"]): type=5, schema=0.
108+
109+
Body layout:
110+
uint32(1) + uint32(2) = 8 bytes (offset 0-7)
111+
params tuple V0:
112+
count=2, types=[INTEGER=1, TEXT=3] → bytes [0x02, 0x01, 0x03]
113+
buffer_offset=8, absolute=8+3=11
114+
pad_to_word(11) = 5 → 5 zero bytes → total header = 8 bytes
115+
int64(42) = 8 bytes
116+
text("hello") = b"hello\\0\\0\\0" = 8 bytes
117+
Total body = 8 + 8 + 8 + 8 = 32 bytes = 4 words
118+
"""
119+
# Params tuple header: count=2, types INTEGER(1) TEXT(3), padded to word
120+
params_header = bytes([0x02, 0x01, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00])
121+
expected = (
122+
_header(4, 5, schema=0) + _u32(1) + _u32(2) + params_header + _i64(42) + _text("hello")
123+
)
124+
msg = ExecRequest(db_id=1, stmt_id=2, params=[42, "hello"])
125+
assert encode_message(msg) == expected
126+
127+
decoded = decode_message(expected, is_request=True)
128+
assert isinstance(decoded, ExecRequest)
129+
assert decoded.db_id == 1
130+
assert decoded.stmt_id == 2
131+
assert decoded.params == [42, "hello"]
132+
133+
134+
class TestGoldenResponses:
135+
"""Verify response message encoding matches hand-built spec bytes."""
136+
137+
def test_failure_response(self) -> None:
138+
"""FailureResponse(code=1, message="not leader"): type=0.
139+
140+
Body: uint64(1) + text("not leader")
141+
text("not leader") = 10 chars + null = 11 bytes, pad to 16
142+
Total body = 8 + 16 = 24 = 3 words
143+
"""
144+
expected = _header(3, 0) + _u64(1) + _text("not leader")
145+
assert encode_message(FailureResponse(code=1, message="not leader")) == expected
146+
147+
msg = decode_message(expected, is_request=False)
148+
assert isinstance(msg, FailureResponse)
149+
assert msg.code == 1
150+
assert msg.message == "not leader"
151+
152+
def test_result_response(self) -> None:
153+
"""ResultResponse(last_insert_id=5, rows_affected=3): type=6.
154+
155+
Body: uint64(5) + uint64(3) = 16 bytes = 2 words
156+
"""
157+
expected = _header(2, 6) + _u64(5) + _u64(3)
158+
assert encode_message(ResultResponse(last_insert_id=5, rows_affected=3)) == expected
159+
160+
msg = decode_message(expected, is_request=False)
161+
assert isinstance(msg, ResultResponse)
162+
assert msg.last_insert_id == 5
163+
assert msg.rows_affected == 3
164+
165+
def test_rows_response_nibble_packing(self) -> None:
166+
"""RowsResponse with INTEGER and TEXT columns: type=7.
167+
168+
Verifies row-header nibble packing order: low nibble = first column.
169+
170+
Body layout:
171+
uint64(2) = 8 bytes (column_count)
172+
text("id") = b"id\\0" + 5 pad = 8 bytes
173+
text("name") = b"name\\0" + 3 pad = 8 bytes
174+
row header: 2 cols [INTEGER=1, TEXT=3]
175+
byte 0 = (1 & 0x0F) | ((3 & 0x0F) << 4) = 0x31
176+
+ 7 pad = 8 bytes
177+
int64(1) = 8 bytes
178+
text("alice") = b"alice\\0" + 2 pad = 8 bytes
179+
DONE marker = uint64(0xFFFFFFFFFFFFFFFF) = 8 bytes
180+
Total body = 56 bytes = 7 words
181+
"""
182+
row_header = bytes([0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
183+
done_marker = _u64(0xFFFFFFFFFFFFFFFF)
184+
185+
expected = (
186+
_header(7, 7)
187+
+ _u64(2)
188+
+ _text("id")
189+
+ _text("name")
190+
+ row_header
191+
+ _i64(1)
192+
+ _text("alice")
193+
+ done_marker
194+
)
195+
196+
msg_obj = RowsResponse(
197+
column_names=["id", "name"],
198+
column_types=[ValueType.INTEGER, ValueType.TEXT],
199+
rows=[[1, "alice"]],
200+
)
201+
assert encode_message(msg_obj) == expected
202+
203+
msg = decode_message(expected, is_request=False)
204+
assert isinstance(msg, RowsResponse)
205+
assert msg.column_names == ["id", "name"]
206+
assert msg.rows == [[1, "alice"]]
207+
assert msg.has_more is False
208+
209+
def test_rows_response_boolean_nibble(self) -> None:
210+
"""RowsResponse with BOOLEAN column uses all 4 nibble bits.
211+
212+
BOOLEAN = 11 = 0x0B. This is the largest type code and uses all 4 bits.
213+
A 3-bit truncation bug would turn 0x0B (11) into 0x03 (TEXT).
214+
215+
Single column: row header byte 0 = 0x0B (low nibble only, high nibble 0)
216+
"""
217+
row_header = bytes([0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
218+
done_marker = _u64(0xFFFFFFFFFFFFFFFF)
219+
220+
expected = (
221+
_header(5, 7)
222+
+ _u64(1) # column_count
223+
+ _text("flag")
224+
+ row_header
225+
+ _i64(1) # BOOLEAN True encoded as int64(1)
226+
+ done_marker
227+
)
228+
229+
msg_obj = RowsResponse(
230+
column_names=["flag"],
231+
column_types=[ValueType.BOOLEAN],
232+
rows=[[True]],
233+
)
234+
assert encode_message(msg_obj) == expected
235+
236+
msg = decode_message(expected, is_request=False)
237+
assert isinstance(msg, RowsResponse)
238+
assert msg.rows == [[True]]
239+
240+
241+
class TestGoldenHandshake:
242+
"""Verify handshake encoding."""
243+
244+
def test_protocol_version_handshake(self) -> None:
245+
"""Protocol version 1 is sent as 8-byte little-endian uint64."""
246+
from dqlitewire.codec import MessageEncoder
247+
248+
expected = b"\x01\x00\x00\x00\x00\x00\x00\x00"
249+
assert MessageEncoder().encode_handshake() == expected
250+
251+
def test_legacy_version_handshake(self) -> None:
252+
"""Legacy version 0x86104dd760433fe5."""
253+
from dqlitewire.codec import MessageEncoder
254+
from dqlitewire.constants import PROTOCOL_VERSION_LEGACY
255+
256+
expected = struct.pack("<Q", 0x86104DD760433FE5)
257+
assert MessageEncoder(version=PROTOCOL_VERSION_LEGACY).encode_handshake() == expected
258+
259+
260+
class TestGoldenTextEncoding:
261+
"""Verify text encoding edge cases against spec."""
262+
263+
def test_text_exact_word_boundary(self) -> None:
264+
"""7-char string + null = 8 bytes = exactly 1 word, no extra padding."""
265+
from dqlitewire.types import encode_text
266+
267+
# "abcdefg" = 7 bytes + null = 8 bytes
268+
result = encode_text("abcdefg")
269+
assert result == b"abcdefg\x00"
270+
assert len(result) == 8
271+
272+
def test_text_needs_padding(self) -> None:
273+
"""8-char string + null = 9 bytes, padded to 16."""
274+
from dqlitewire.types import encode_text
275+
276+
result = encode_text("abcdefgh")
277+
assert result == b"abcdefgh\x00" + b"\x00" * 7
278+
assert len(result) == 16
279+
280+
def test_empty_text(self) -> None:
281+
"""Empty string = null byte + 7 pad = 8 bytes."""
282+
from dqlitewire.types import encode_text
283+
284+
result = encode_text("")
285+
assert result == b"\x00" + b"\x00" * 7
286+
assert len(result) == 8
287+
288+
289+
class TestGoldenBlobEncoding:
290+
"""Verify blob encoding against spec."""
291+
292+
def test_blob_with_padding(self) -> None:
293+
"""Blob: uint64 length prefix + data + padding to word boundary."""
294+
from dqlitewire.types import decode_blob, encode_blob
295+
296+
data = b"\xde\xad" # 2 bytes
297+
result = encode_blob(data)
298+
# uint64(2) + 2 data bytes + 6 pad bytes = 16
299+
expected = _u64(2) + b"\xde\xad" + b"\x00" * 6
300+
assert result == expected
301+
assert len(result) == 16
302+
303+
decoded_data, consumed = decode_blob(result)
304+
assert decoded_data == data
305+
assert consumed == 16
306+
307+
def test_blob_exact_word(self) -> None:
308+
"""8-byte blob needs no padding after data."""
309+
from dqlitewire.types import encode_blob
310+
311+
data = b"\x01\x02\x03\x04\x05\x06\x07\x08"
312+
result = encode_blob(data)
313+
expected = _u64(8) + data
314+
assert result == expected
315+
assert len(result) == 16

0 commit comments

Comments
 (0)