Skip to content

Commit d2d6c13

Browse files
Drop outer _truncate_error wrap on handshake FailureResponse rendering
The handshake FailureResponse arm in protocol.py composed: raise ProtocolError( f"Handshake failed: [{response.code}] " f"{_truncate_error(self._failure_text(response))}" ) The outer _truncate_error wrapped a string that _failure_text had already pre-truncated (body capped at 200, then addr suffix appended). With a long FailureResponse.message, the outer call saw the result as oversized again and re-truncated to 200 chars — silently stripping the " to host:port" addr suffix the inner pre-truncation just preserved, and reporting a confused "[truncated, N chars]" marker against the post-inner-truncation length rather than the original server-message length. Drop the outer wrap. _failure_text already bounds the result at ~240 chars, and the handshake ProtocolError is a per-peer diagnostic (not an aggregate) where peer attribution is precisely what the operator paged on needs. Pin the composition shape and the addr-suffix preservation against regression — including a source-level guard that prevents re-introduction of the outer wrap. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent d6dd7bf commit d2d6c13

2 files changed

Lines changed: 89 additions & 10 deletions

File tree

src/dqliteclient/protocol.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -233,17 +233,14 @@ async def handshake(self, client_id: int | None = None) -> int:
233233
# server-reported code and peer address so log aggregators
234234
# can group on the numeric code (DQLITE_PARSE,
235235
# DQLITE_NOTLEADER, etc.) rather than on text alone.
236-
# Truncate the failure body so a hostile-or-buggy peer
237-
# cannot inflate the handshake error into a multi-KiB
238-
# log line — the rest of the protocol module's
239-
# OperationalError raise sites get this for free via
240-
# ``OperationalError._MAX_DISPLAY_MESSAGE``; the raw
241-
# ProtocolError shape doesn't.
242-
from dqliteclient.cluster import _truncate_error
243-
236+
# ``_failure_text`` already pre-truncates the body before
237+
# appending the addr suffix, bounding the result at
238+
# ~240 chars; do NOT wrap it in an outer ``_truncate_error``,
239+
# which would re-truncate over the addr suffix and silently
240+
# strip the peer attribution exactly when the operator
241+
# needs it most (a long handshake failure).
244242
raise ProtocolError(
245-
f"Handshake failed: [{response.code}] "
246-
f"{_truncate_error(self._failure_text(response))}"
243+
f"Handshake failed: [{response.code}] {self._failure_text(response)}"
247244
)
248245

249246
if not isinstance(response, WelcomeResponse):
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""Pin: handshake-time ``ProtocolError`` raised on a ``FailureResponse``
2+
preserves the peer-address attribution suffix even when the server
3+
message is long.
4+
5+
``DqliteProtocol._failure_text`` pre-truncates the body before
6+
appending the addr suffix; the handshake raise site MUST NOT wrap the
7+
result in an outer truncator that would re-truncate over the suffix
8+
and silently strip the peer attribution. The most-likely-paged class
9+
of handshake error (a flaky / mis-configured peer) is exactly the one
10+
that produces a long FailureResponse body — losing the suffix exactly
11+
when an operator needs to know "which node?" defeats the purpose of
12+
the suffix.
13+
"""
14+
15+
from __future__ import annotations
16+
17+
from dqliteclient.exceptions import ProtocolError
18+
from dqliteclient.protocol import DqliteProtocol
19+
from dqlitewire.messages import FailureResponse
20+
21+
22+
def _make_proto_with_address(address: str) -> DqliteProtocol:
23+
proto = DqliteProtocol.__new__(DqliteProtocol)
24+
proto._address = address
25+
return proto
26+
27+
28+
def test_handshake_protocolerror_composition_preserves_addr_suffix() -> None:
29+
"""The production handshake raise site composes:
30+
31+
ProtocolError(f"Handshake failed: [{code}] {self._failure_text(response)}")
32+
33+
With a long FailureResponse.message, the result must end with the
34+
addr suffix that ``_failure_text`` appends — i.e. NO outer
35+
truncator can wrap _failure_text and strip the suffix.
36+
"""
37+
proto = _make_proto_with_address("host-a:9001")
38+
body = "X" * 1024
39+
response = FailureResponse(code=1001, message=body)
40+
41+
rendered = f"Handshake failed: [{response.code}] {proto._failure_text(response)}"
42+
43+
# The ProtocolError text the operator sees must end with the
44+
# addr suffix — that is the load-bearing peer attribution.
45+
err = ProtocolError(rendered)
46+
text = str(err)
47+
assert text.endswith(" to host-a:9001"), f"Addr suffix dropped from handshake error: {text!r}"
48+
# Defensive cap on the body still applies.
49+
assert "[truncated," in text
50+
51+
52+
def test_handshake_protocolerror_composition_short_message_no_truncation() -> None:
53+
"""Short messages do not need truncation; the suffix is still
54+
appended verbatim. Pin that the no-truncation path also keeps
55+
the suffix (defensive against an over-eager outer truncator
56+
being re-introduced)."""
57+
proto = _make_proto_with_address("host-b:19002")
58+
response = FailureResponse(code=1001, message="boom")
59+
rendered = f"Handshake failed: [{response.code}] {proto._failure_text(response)}"
60+
text = str(ProtocolError(rendered))
61+
assert text.endswith(" to host-b:19002")
62+
assert "[truncated," not in text
63+
64+
65+
def test_handshake_protocolerror_does_not_use_outer_truncate_error() -> None:
66+
"""Source-level pin: the handshake raise site at protocol.py
67+
must NOT wrap _failure_text in _truncate_error — that wrapping
68+
silently strips the addr suffix the inner pre-truncation just
69+
preserved. This guard catches a regression that would re-introduce
70+
the outer wrap.
71+
"""
72+
import inspect
73+
74+
from dqliteclient import protocol
75+
76+
# Locate the _handshake method and read its source.
77+
src = inspect.getsource(protocol.DqliteProtocol.handshake)
78+
# The handshake's FailureResponse arm must not call
79+
# _truncate_error on top of self._failure_text.
80+
assert "_truncate_error(self._failure_text" not in src, (
81+
"Handshake raise site must not wrap _failure_text in _truncate_error"
82+
)

0 commit comments

Comments
 (0)