Skip to content

Commit 5a49e37

Browse files
Reorder pool.close() fork check before _closed early-return
The fork-after-init short-circuit in ConnectionPool.close() was positioned AFTER the "second-caller-waits-on-_close_done" arm. A child that inherited _closed=True with _close_done set but not yet set() (parent forked while mid-drain) would await an asyncio.Event bound to the parent's defunct loop — hanging forever in the child's fresh loop. Move the pid check to the top of close() so the local-state flip runs before any code path that can suspend on inherited primitives. Add tests for both the "_closed with pending Event" and "_closed without Event" cases. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 21ce632 commit 5a49e37

2 files changed

Lines changed: 96 additions & 4 deletions

File tree

src/dqliteclient/pool.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,20 +1306,26 @@ async def close(self) -> None:
13061306
re-entry after completion) waits on the first caller's drain
13071307
via ``_close_done`` rather than racing ``_drain_idle``.
13081308
"""
1309-
if self._closed:
1310-
if self._close_done is not None:
1311-
await self._close_done.wait()
1312-
return
13131309
# Fork-after-init: the inherited connection FDs are shared with
13141310
# the parent. Draining and writer.close() in the child would
13151311
# send FIN on sockets the parent still uses. Flip the closed
13161312
# flag so the child's references can be GC'd quietly without
13171313
# touching the wire. The child cannot acquire new connections
13181314
# either way (pid-aware ``acquire`` rejects). Symmetric with
13191315
# ``DqliteConnection.close``'s fork short-circuit.
1316+
#
1317+
# The fork-pid check runs BEFORE the ``_closed`` early-return
1318+
# so a child whose parent forked while mid-close (with
1319+
# ``_close_done`` set but not yet ``set()``) does not block on
1320+
# an Event bound to the parent's loop. Awaiting that Event in
1321+
# the child's fresh loop hangs forever.
13201322
if os.getpid() != self._creator_pid:
13211323
self._closed = True
13221324
return
1325+
if self._closed:
1326+
if self._close_done is not None:
1327+
await self._close_done.wait()
1328+
return
13231329
# Publish the drain-done event BEFORE flipping the closed flag
13241330
# so any second caller observing ``_closed=True`` is guaranteed
13251331
# to see a valid ``_close_done`` to wait on. Under single-task
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""Pin: ``ConnectionPool.close()`` fork short-circuit runs BEFORE the
2+
``_closed`` early-return so a child whose parent forked mid-close
3+
(with ``_close_done`` set but not yet ``set()``) does not hang on
4+
a parent-loop Event.
5+
6+
The fork-after-init guard short-circuits ``close()`` to a local-state
7+
flip without touching inherited FDs. The pid check has to run BEFORE
8+
the ``if self._closed:`` arm that awaits ``_close_done`` — otherwise a
9+
child that fell into the "second-caller-waits" arm would await an
10+
``asyncio.Event`` bound to the parent's defunct event loop, blocking
11+
forever in the child's fresh loop.
12+
13+
Reproduce by simulating a fork-during-close: stage a pool where
14+
``_closed=True`` and ``_close_done`` is an Event bound to a different
15+
loop, then call ``close()`` from a fresh loop with a fake-mismatched
16+
pid, and confirm it returns immediately instead of hanging.
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import asyncio
22+
from unittest.mock import MagicMock, patch
23+
24+
import pytest
25+
26+
from dqliteclient.cluster import ClusterClient
27+
from dqliteclient.pool import ConnectionPool
28+
29+
30+
def _make_pool() -> ConnectionPool:
31+
cluster = MagicMock(spec=ClusterClient)
32+
return ConnectionPool(
33+
addresses=["localhost:9001"],
34+
min_size=0,
35+
max_size=1,
36+
timeout=1.0,
37+
cluster=cluster,
38+
)
39+
40+
41+
@pytest.mark.asyncio
42+
async def test_close_with_pid_mismatch_returns_even_when_closed_with_pending_event() -> None:
43+
"""``close()`` in a forked child must short-circuit to a local-
44+
state flip and return immediately, even if ``_closed=True`` and
45+
``_close_done`` is set but not yet completed (i.e. the parent's
46+
``_drain_idle`` was in flight at fork time).
47+
48+
Without the fix the child enters the ``if self._closed:`` arm
49+
and awaits ``_close_done.wait()`` forever, because the Event was
50+
bound to the parent's loop and will never be set in the child.
51+
"""
52+
pool = _make_pool()
53+
pool._closed = True
54+
# Stage an Event the child would inherit — bound to *our* loop, but
55+
# we will pretend it is the parent's via the pid mismatch. The
56+
# important property is that nobody will ``set()`` it during this
57+
# test, so a hang would be observable as a TimeoutError.
58+
pool._close_done = asyncio.Event()
59+
fake_parent_pid = pool._creator_pid + 1
60+
pool._creator_pid = fake_parent_pid # Simulate "we are the child."
61+
62+
# Bound the await so a regression manifests as a clean failure
63+
# instead of a hung test.
64+
with patch("dqliteclient.pool.os.getpid", return_value=fake_parent_pid + 1):
65+
await asyncio.wait_for(pool.close(), timeout=1.0)
66+
67+
# Local state flipped, no wait happened.
68+
assert pool._closed is True
69+
70+
71+
@pytest.mark.asyncio
72+
async def test_close_with_pid_mismatch_returns_even_when_closed_with_no_event() -> None:
73+
"""Sibling case: ``_closed=True`` but ``_close_done`` was never
74+
published (synchronous-close-and-set window). Child must still
75+
short-circuit and return — without entering the ``if
76+
self._closed:`` arm at all."""
77+
pool = _make_pool()
78+
pool._closed = True
79+
pool._close_done = None
80+
fake_parent_pid = pool._creator_pid + 1
81+
pool._creator_pid = fake_parent_pid
82+
83+
with patch("dqliteclient.pool.os.getpid", return_value=fake_parent_pid + 1):
84+
await asyncio.wait_for(pool.close(), timeout=1.0)
85+
86+
assert pool._closed is True

0 commit comments

Comments
 (0)