Skip to content

Commit 179eced

Browse files
Reject LEADER responses with non-zero id and empty address
Upstream raft_leader sets id and address atomically; the C server substitutes "" for NULL so the only wire-legal shapes are (0, "") and (nonzero, nonempty). The "non-zero id, empty address" branch was silently substituting the queried node's own address as the leader, which trusts a peer that has already violated the wire contract. Raise ProtocolError instead so find_leader skips the bad peer via its existing except-tuple and moves on. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 51a55e9 commit 179eced

2 files changed

Lines changed: 75 additions & 8 deletions

File tree

src/dqliteclient/cluster.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,20 @@ async def _query_leader(
164164
await protocol.handshake()
165165
node_id, leader_addr = await protocol.get_leader()
166166

167-
if not leader_addr and node_id != 0:
168-
# Non-zero node_id with empty address: this node is the leader
169-
return address
170-
elif leader_addr:
171-
# Non-empty address: redirect to the reported leader
167+
# Upstream dqlite's ``raft_leader`` sets ``id`` and ``address``
168+
# atomically: either both are filled in (a leader is known) or
169+
# both are zero/NULL. The server substitutes ``""`` for NULL,
170+
# so the only wire-legal shapes are ``(0, "")`` and
171+
# ``(nonzero, nonempty)``.
172+
if node_id != 0 and not leader_addr:
173+
raise ProtocolError(
174+
f"server {address} returned node_id={node_id} with empty "
175+
f"leader address; expected both or neither"
176+
)
177+
if leader_addr:
172178
return leader_addr
173-
else:
174-
# node_id=0 and empty address: no leader known
175-
return None
179+
# node_id=0 and empty address: no leader known
180+
return None
176181
finally:
177182
writer.close()
178183

tests/test_cluster.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,3 +539,65 @@ async def get_leader(self) -> tuple[int, str]:
539539

540540
assert captured.get("trust_server_heartbeat") is False
541541

542+
543+
class TestQueryLeaderRejectsUnreachableCombo:
544+
"""A (nonzero id, empty address) response is a protocol violation;
545+
reject it instead of silently substituting the queried address.
546+
"""
547+
548+
async def test_nonzero_id_with_empty_address_raises_protocol_error(self) -> None:
549+
from dqliteclient.exceptions import ProtocolError
550+
551+
store = MemoryNodeStore(["localhost:9001"])
552+
client = ClusterClient(store, timeout=1.0)
553+
554+
mock_reader = AsyncMock()
555+
mock_writer = MagicMock()
556+
mock_writer.drain = AsyncMock()
557+
mock_writer.close = MagicMock()
558+
mock_writer.wait_closed = AsyncMock()
559+
560+
class FakeProto:
561+
def __init__(self, *args: object, **kwargs: object) -> None:
562+
pass
563+
564+
async def handshake(self) -> None:
565+
pass
566+
567+
async def get_leader(self) -> tuple[int, str]:
568+
return (42, "")
569+
570+
with (
571+
patch("asyncio.open_connection", return_value=(mock_reader, mock_writer)),
572+
patch("dqliteclient.cluster.DqliteProtocol", FakeProto),
573+
pytest.raises(ProtocolError),
574+
):
575+
await client._query_leader("localhost:9001")
576+
577+
async def test_zero_id_empty_address_returns_none(self) -> None:
578+
store = MemoryNodeStore(["localhost:9001"])
579+
client = ClusterClient(store, timeout=1.0)
580+
581+
mock_reader = AsyncMock()
582+
mock_writer = MagicMock()
583+
mock_writer.drain = AsyncMock()
584+
mock_writer.close = MagicMock()
585+
mock_writer.wait_closed = AsyncMock()
586+
587+
class FakeProto:
588+
def __init__(self, *args: object, **kwargs: object) -> None:
589+
pass
590+
591+
async def handshake(self) -> None:
592+
pass
593+
594+
async def get_leader(self) -> tuple[int, str]:
595+
return (0, "")
596+
597+
with (
598+
patch("asyncio.open_connection", return_value=(mock_reader, mock_writer)),
599+
patch("dqliteclient.cluster.DqliteProtocol", FakeProto),
600+
):
601+
result = await client._query_leader("localhost:9001")
602+
603+
assert result is None

0 commit comments

Comments
 (0)