Skip to content

Commit 4625b15

Browse files
Sanitize server-supplied strings at decoder boundary
FailureResponse.message, LeaderResponse.address, and ServersResponse.nodes[*].address are decoded from server-supplied bytes and flow directly into exception messages and log records. A malicious or compromised peer could embed ANSI escape sequences to corrupt operator terminals, CR/LF to forge log entries, or NUL bytes that upset some log backends. Replace C0 control characters (except tab 0x09 and LF 0x0A) and DEL (0x7F) with the literal '?' in these three fields. CR is dropped because it is the log-injection vector alongside LF, and LF alone is enough to render legitimate multi-line server diagnostics. The sanitization lives in a small _sanitize_server_text helper and is called only from the three decode_body sites — SQL text, column names, and other application strings are intentionally untouched. Tests cover ANSI clear-screen, CR forging, raw NUL (hand-built body to bypass encode_text's strict encoding), tab/LF preservation, and all three message types. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 2f34907 commit 4625b15

2 files changed

Lines changed: 103 additions & 3 deletions

File tree

src/dqlitewire/messages/responses.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Server to client response messages."""
22

3+
import re
34
from dataclasses import dataclass, field
45
from typing import Any, ClassVar
56

@@ -39,6 +40,29 @@
3940
_MAX_FILE_COUNT = 100
4041
_MAX_NODE_COUNT = 10_000
4142

43+
# Sanitize server-supplied text destined for exception messages and
44+
# logs. The C server promises UTF-8 but makes no promise about terminal
45+
# escapes or log-injection characters: a malicious or compromised peer
46+
# can embed ANSI colour/clear sequences, CR/LF to forge log lines, or
47+
# NUL bytes that upset some log backends. Replace C0 controls (except
48+
# tab 0x09 and LF 0x0A) and DEL (0x7F) with a literal "?". CR (0x0D) is
49+
# dropped — it is the log-injection vector alongside LF, and LF alone
50+
# is enough to represent legitimate multi-line server messages in
51+
# journald / file handlers.
52+
_CONTROL_CHARS_RE = re.compile(r"[\x00-\x08\x0b-\x1f\x7f]")
53+
54+
55+
def _sanitize_server_text(s: str) -> str:
56+
"""Replace C0 control characters and DEL with '?' in server strings.
57+
58+
Applied at the decoder boundary for text fields that flow directly
59+
into exception messages and logs (FailureResponse.message,
60+
LeaderResponse.address, ServersResponse.nodes[*].address). Leaves
61+
tab and LF untouched so multi-line server diagnostics render
62+
correctly.
63+
"""
64+
return _CONTROL_CHARS_RE.sub("?", s)
65+
4266

4367
@dataclass
4468
class FailureResponse(Message):
@@ -66,7 +90,7 @@ def encode_body(self) -> bytes:
6690
def decode_body(cls, data: bytes, schema: int = 0) -> "FailureResponse":
6791
code = decode_uint64(data)
6892
message, _ = decode_text(data[8:])
69-
return cls(code, message)
93+
return cls(code, _sanitize_server_text(message))
7094

7195

7296
@dataclass
@@ -94,7 +118,7 @@ def decode_body(cls, data: bytes, schema: int = 0) -> "LeaderResponse":
94118
"""
95119
node_id = decode_uint64(data)
96120
address, _ = decode_text(data[8:])
97-
return cls(node_id, address)
121+
return cls(node_id, _sanitize_server_text(address))
98122

99123
@classmethod
100124
def decode_body_legacy(cls, data: bytes) -> "LeaderResponse":
@@ -104,7 +128,7 @@ def decode_body_legacy(cls, data: bytes) -> "LeaderResponse":
104128
Go reference: DecodeNodeLegacy in internal/protocol/message.go.
105129
"""
106130
address, _ = decode_text(data)
107-
return cls(node_id=0, address=address)
131+
return cls(node_id=0, address=_sanitize_server_text(address))
108132

109133

110134
@dataclass
@@ -613,6 +637,7 @@ def decode_body(cls, data: bytes, schema: int = 0) -> "ServersResponse":
613637
node_id = decode_uint64(view[offset:])
614638
offset += 8
615639
address, consumed = decode_text(view[offset:])
640+
address = _sanitize_server_text(address)
616641
offset += consumed
617642
raw_role = decode_uint64(view[offset:])
618643
offset += 8

tests/test_messages_responses.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,81 @@ def test_leader_response_legacy_missing_null_terminator_raises(self) -> None:
13111311
LeaderResponse.decode_body_legacy(b"abc")
13121312

13131313

1314+
class TestServerTextSanitization:
1315+
"""Server-supplied strings that flow into exception messages and
1316+
logs must have C0 control characters and DEL replaced with '?' so a
1317+
malicious server cannot inject ANSI escape sequences, CR/LF
1318+
log-forgery, or NUL bytes into operator-facing output.
1319+
1320+
Tab (0x09) and LF (0x0A) are preserved so multi-line diagnostics
1321+
render as real newlines. CR (0x0D) is dropped because it is the
1322+
log-injection vector.
1323+
"""
1324+
1325+
def test_failure_response_sanitizes_ansi(self) -> None:
1326+
# Encode bytes directly because encode_text(str) stringifies.
1327+
from dqlitewire.types import encode_text, encode_uint64
1328+
1329+
payload = encode_uint64(42) + encode_text("\x1b[2J\x1b[Hwiped!")
1330+
decoded = FailureResponse.decode_body(payload)
1331+
assert decoded.message == "?[2J?[Hwiped!"
1332+
1333+
def test_failure_response_sanitizes_cr(self) -> None:
1334+
from dqlitewire.types import encode_text, encode_uint64
1335+
1336+
payload = encode_uint64(1) + encode_text("ok\r\nFAKE log line")
1337+
decoded = FailureResponse.decode_body(payload)
1338+
# LF preserved; CR replaced with '?' so it cannot forge a log line.
1339+
assert decoded.message == "ok?\nFAKE log line"
1340+
1341+
def test_failure_response_sanitizes_raw_nul(self) -> None:
1342+
# Construct a body whose text field contains a raw NUL byte
1343+
# *before* the real NUL terminator. The legitimate encoder
1344+
# rejects this, so hand-build the body to model a malicious
1345+
# peer that bypasses encode_text.
1346+
from dqlitewire.types import encode_uint64
1347+
1348+
# uint64 code (8 bytes) + raw "ab\x01cd" + NUL terminator + pad.
1349+
body = encode_uint64(1) + b"ab\x01cd\x00\x00\x00"
1350+
decoded = FailureResponse.decode_body(body)
1351+
assert decoded.message == "ab?cd"
1352+
1353+
def test_failure_response_preserves_tab_and_lf(self) -> None:
1354+
from dqlitewire.types import encode_text, encode_uint64
1355+
1356+
payload = encode_uint64(1) + encode_text("line1\nline2\tkey=val")
1357+
decoded = FailureResponse.decode_body(payload)
1358+
assert decoded.message == "line1\nline2\tkey=val"
1359+
1360+
def test_leader_response_sanitizes_address(self) -> None:
1361+
from dqlitewire.types import encode_text, encode_uint64
1362+
1363+
payload = encode_uint64(5) + encode_text("evil.com:9001\r\nHost: x")
1364+
decoded = LeaderResponse.decode_body(payload)
1365+
assert decoded.address == "evil.com:9001?\nHost: x"
1366+
1367+
def test_leader_response_legacy_sanitizes(self) -> None:
1368+
from dqlitewire.types import encode_text
1369+
1370+
payload = encode_text("\x1b[31mred\x1b[0m")
1371+
decoded = LeaderResponse.decode_body_legacy(payload)
1372+
assert decoded.address == "?[31mred?[0m"
1373+
1374+
def test_servers_response_sanitizes_node_addresses(self) -> None:
1375+
# Build a SERVERS body with a single node whose address contains
1376+
# an ANSI clear-screen sequence.
1377+
from dqlitewire.constants import NodeRole
1378+
from dqlitewire.types import encode_text, encode_uint64
1379+
1380+
body = encode_uint64(1)
1381+
body += encode_uint64(1)
1382+
body += encode_text("node1\x1b[2J:9001")
1383+
body += encode_uint64(NodeRole.VOTER)
1384+
decoded = ServersResponse.decode_body(body)
1385+
assert len(decoded.nodes) == 1
1386+
assert decoded.nodes[0].address == "node1?[2J:9001"
1387+
1388+
13141389
class TestReservedFieldValidation:
13151390
"""Non-zero reserved bytes must fail to decode across all message
13161391
types that declare a reserved field, matching LeaderRequest's

0 commit comments

Comments
 (0)