Cap the aggregate of per-node errors in ClusterClient.find_leader

antoineleclair · claude · antoineleclair · commit fbfe06aaf702 · 2026-04-30T18:15:09.000-04:00
The per-node snippet cap (_MAX_ERROR_MESSAGE_SNIPPET = 200)
already bounds the M axis of error-message size on a hostile
peer. The N axis (configured node-store size) is operator-
controlled and unbounded — a 500-node store of failing peers
produces ~100 KB of error text held in the ClusterError
args, in every traceback render, and in every __cause__
walk on a long-lived process's pool retry loops.

Cap the joined aggregate at _MAX_AGGREGATE_ERROR_PAYLOAD
= 16 KiB (= ~80 nodes' worth of detail at the per-node
cap; enough for diagnostic utility on any realistic
cluster). Truncation marker mirrors the per-node
``_truncate_error`` phrasing for symmetry.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/dqliteclient/cluster.py b/src/dqliteclient/cluster.py
@@ -51,6 +51,17 @@
 # held in memory and serialised into every traceback.
 _MAX_ERROR_MESSAGE_SNIPPET: Final[int] = 200
 
+# Cap the aggregate-of-all-per-node-errors payload before raising the
+# final ``ClusterError``. The per-node cap above bounds the M axis, but
+# the N axis (configured node-store size) is still operator-controlled
+# and unbounded — a 500-node store all returning hostile-cap-sized
+# messages produces ~100 KB of error text held in the ClusterError's
+# args, in every traceback render, and in every ``__cause__`` walk.
+# 16 KiB / 200 codepoints/snippet ≈ 80 nodes' worth of detail before
+# truncation, which is enough for diagnostic utility on any realistic
+# cluster while keeping the exception payload bounded.
+_MAX_AGGREGATE_ERROR_PAYLOAD: Final[int] = 16 * 1024
+
 # Use OS-entropy randomness for the per-sweep node shuffle so that the
 # stampede-avoidance is not defeated by a downstream call to
 # ``random.seed(...)``. Test suites and some libraries seed the global
@@ -389,7 +400,13 @@ async def _find_leader_impl(self, *, trust_server_heartbeat: bool) -> str:
                 last_exc = e
                 continue
 
-        raise ClusterError(f"Could not find leader. Errors: {'; '.join(errors)}") from last_exc
+        joined = "; ".join(errors)
+        if len(joined) > _MAX_AGGREGATE_ERROR_PAYLOAD:
+            kept = len(joined) - _MAX_AGGREGATE_ERROR_PAYLOAD
+            joined = (
+                joined[:_MAX_AGGREGATE_ERROR_PAYLOAD] + f"... [aggregate truncated, {kept} chars]"
+            )
+        raise ClusterError(f"Could not find leader. Errors: {joined}") from last_exc
 
     async def _query_leader(
         self, address: str, *, trust_server_heartbeat: bool = False
diff --git a/tests/test_cluster_aggregate_error_payload_cap.py b/tests/test_cluster_aggregate_error_payload_cap.py
@@ -0,0 +1,57 @@
+"""Pin: ``ClusterClient.find_leader``'s aggregate-of-per-node-errors
+payload is capped at ``_MAX_AGGREGATE_ERROR_PAYLOAD`` so the final
+``ClusterError`` does not grow O(N) in operator-configured node count.
+
+The per-node snippet cap (``_MAX_ERROR_MESSAGE_SNIPPET = 200``)
+already bounds M (per-node failure message size). Without the
+aggregate cap, a 500-node store of failing peers produced ≥100 KB
+held in the ClusterError args, in every traceback render, and in
+every ``__cause__`` walk on a long-lived process's pool retry
+loops.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from dqliteclient.cluster import (
+    _MAX_AGGREGATE_ERROR_PAYLOAD,
+    ClusterClient,
+)
+from dqliteclient.exceptions import ClusterError, DqliteConnectionError
+from dqliteclient.node_store import MemoryNodeStore
+
+
+@pytest.mark.asyncio
+async def test_find_leader_aggregate_error_payload_is_capped(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    addrs = [f"10.0.{i // 256}.{i % 256}:9001" for i in range(500)]
+    store = MemoryNodeStore(addrs)
+    client = ClusterClient(store, timeout=0.01)
+
+    huge = "x" * 50_000  # per-node hostile message size
+
+    async def _fake_query(self: ClusterClient, address: str, **kwargs: object) -> str:
+        raise DqliteConnectionError(huge)
+
+    monkeypatch.setattr(ClusterClient, "_query_leader", _fake_query)
+
+    with pytest.raises(ClusterError) as exc_info:
+        await client.find_leader()
+
+    aggregate = str(exc_info.value)
+    # The error string is "Could not find leader. Errors: <joined>"
+    # plus a small truncation marker. Bound the test loosely on the
+    # aggregate cap plus a small fixed overhead (prefix +
+    # truncation marker).
+    assert len(aggregate) <= _MAX_AGGREGATE_ERROR_PAYLOAD + 256, (
+        f"Aggregate error payload {len(aggregate)} exceeds "
+        f"_MAX_AGGREGATE_ERROR_PAYLOAD {_MAX_AGGREGATE_ERROR_PAYLOAD} "
+        f"+ overhead. The N axis (configured node-store size) is "
+        f"operator-controlled and unbounded — without the aggregate "
+        f"cap, a 500-node store of failing peers produced >100 KB."
+    )
+    # Sanity: the truncation marker should be present (the test
+    # configuration is designed to exceed the cap).
+    assert "[aggregate truncated" in aggregate