Skip to content

Commit bf8ddf3

Browse files
fix: classify leader-change errors during connect()
connect() called handshake()/open_database() directly, bypassing _run_protocol's leader-error classification. A NOT_LEADER (10250) or LEADERSHIP_LOST (10506) response on OPEN surfaced as a generic OperationalError — indistinguishable from a SQL error — and the caller lost the signal to re-resolve the leader. Re-raise leader-coded OperationalError as DqliteConnectionError with the original chained as __cause__, so retry layers and cluster failover treat it as a transport issue. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e4beb34 commit bf8ddf3

2 files changed

Lines changed: 42 additions & 0 deletions

File tree

src/dqliteclient/connection.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,17 @@ async def connect(self) -> None:
136136
try:
137137
await self._protocol.handshake()
138138
self._db_id = await self._protocol.open_database(self._database)
139+
except OperationalError as e:
140+
self._protocol.close()
141+
self._protocol = None
142+
if e.code in _LEADER_ERROR_CODES:
143+
# Leader-change errors during OPEN are transport-level
144+
# problems — the caller needs to reconnect elsewhere, not
145+
# treat this as a SQL error.
146+
raise DqliteConnectionError(
147+
f"Node {self._address} is no longer leader: {e.message}"
148+
) from e
149+
raise
139150
except BaseException:
140151
self._protocol.close()
141152
self._protocol = None

tests/test_connection.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,37 @@ async def use_conn() -> None:
727727
assert isinstance(error_from_thread, InterfaceError)
728728
assert "event loop" in str(error_from_thread).lower()
729729

730+
async def test_connect_open_not_leader_surfaces_as_connection_error(self) -> None:
731+
"""If OPEN returns a leader-change code during connect(), callers must
732+
see it as a transport-level DqliteConnectionError, not as a generic
733+
OperationalError indistinguishable from a SQL error. Retry layers
734+
and cluster failover hinge on this classification.
735+
"""
736+
from dqlitewire.messages import FailureResponse, WelcomeResponse
737+
738+
conn = DqliteConnection("localhost:9001")
739+
740+
mock_reader = AsyncMock()
741+
mock_writer = MagicMock()
742+
mock_writer.drain = AsyncMock()
743+
mock_writer.close = MagicMock()
744+
mock_writer.wait_closed = AsyncMock()
745+
746+
# Handshake succeeds, OPEN returns SQLITE_IOERR_NOT_LEADER (10250).
747+
mock_reader.read.side_effect = [
748+
WelcomeResponse(heartbeat_timeout=15000).encode(),
749+
FailureResponse(code=10250, message="not leader").encode(),
750+
]
751+
752+
with (
753+
patch("asyncio.open_connection", return_value=(mock_reader, mock_writer)),
754+
pytest.raises(DqliteConnectionError, match="leader"),
755+
):
756+
await conn.connect()
757+
758+
# Socket must also be cleaned up.
759+
assert not conn.is_connected
760+
730761
async def test_cross_event_loop_raises_interface_error(self) -> None:
731762
"""Using a connection from a different event loop must raise InterfaceError."""
732763
import asyncio

0 commit comments

Comments
 (0)