Skip to content

Commit 1653dab

Browse files
Truncate per-node error snippets in the ClusterError payload
find_leader concatenates ``{address}: {exception}`` strings across every probed node. A server that emits a multi-megabyte FailureResponse message would otherwise inflate the final ClusterError to O(N * M) characters, which gets copied into every traceback and log record. Cap each per-node snippet at 200 characters with a "[truncated, N chars]" marker. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 179eced commit 1653dab

2 files changed

Lines changed: 41 additions & 1 deletion

File tree

src/dqliteclient/cluster.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,18 @@
2929
# max_attempts=...).
3030
_DEFAULT_CONNECT_MAX_ATTEMPTS = 3
3131

32+
# Cap per-node error messages at this length before concatenating them
33+
# into the final ClusterError. A failing peer that returns a multi-MB
34+
# FailureResponse message would otherwise produce an O(N * M) string
35+
# held in memory and serialised into every traceback.
36+
_MAX_ERROR_MESSAGE_SNIPPET = 200
37+
38+
39+
def _truncate_error(message: str) -> str:
40+
if len(message) <= _MAX_ERROR_MESSAGE_SNIPPET:
41+
return message
42+
return message[:_MAX_ERROR_MESSAGE_SNIPPET] + f"... [truncated, {len(message)} chars]"
43+
3244

3345
class ClusterClient:
3446
"""Client with automatic leader detection and failover."""
@@ -134,7 +146,7 @@ async def find_leader(self, *, trust_server_heartbeat: bool = False) -> str:
134146
# Narrow the catch so programming bugs (TypeError, KeyError,
135147
# etc.) propagate directly instead of being stringified into
136148
# a retryable ClusterError.
137-
errors.append(f"{node.address}: {e}")
149+
errors.append(f"{node.address}: {_truncate_error(str(e))}")
138150
last_exc = e
139151
continue
140152

tests/test_cluster.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,3 +601,31 @@ async def get_leader(self) -> tuple[int, str]:
601601
result = await client._query_leader("localhost:9001")
602602

603603
assert result is None
604+
605+
606+
class TestClusterErrorMessageTruncation:
607+
"""Per-node error snippets are capped before concatenation so a
608+
verbose server message cannot inflate the ClusterError payload to
609+
O(N * M).
610+
"""
611+
612+
async def test_large_per_node_error_is_truncated(self) -> None:
613+
store = MemoryNodeStore(["a:9001", "b:9001", "c:9001"])
614+
client = ClusterClient(store, timeout=1.0)
615+
616+
huge = "x" * 50_000
617+
618+
async def fake_query(_address: str, **_kwargs: object) -> str | None:
619+
raise DqliteConnectionError(huge)
620+
621+
with (
622+
patch.object(client, "_query_leader", side_effect=fake_query),
623+
pytest.raises(ClusterError) as exc_info,
624+
):
625+
await client.find_leader()
626+
627+
message = str(exc_info.value)
628+
# Each per-node snippet is capped to ~200 chars + truncation
629+
# marker; total upper bound well under the raw 150k.
630+
assert len(message) < 3_000
631+
assert "truncated" in message

0 commit comments

Comments
 (0)