Skip to content

Commit ffa5bb9

Browse files
Narrow retry_with_backoff's default retryable tuple
The helper defaulted to retrying any Exception, which meant a programming bug (TypeError, AttributeError, KeyError, ...) got retried up to max_attempts times with exponential backoff before finally propagating. Restrict the default to the transport- and cluster-level error set that callers actually want auto-retried (OSError, TimeoutError, DqliteError); callers that genuinely want "retry anything" can still override the argument. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 5a3758b commit ffa5bb9

2 files changed

Lines changed: 71 additions & 5 deletions

File tree

src/dqliteclient/retry.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,27 @@
44
import random
55
from collections.abc import Awaitable, Callable
66

7+
from dqliteclient.exceptions import DqliteError
8+
9+
# Default retry set: transport- and cluster-level errors that a caller
10+
# could reasonably want auto-retried. Leaves programming bugs
11+
# (TypeError, AttributeError, KeyError, …) to propagate on first call
12+
# so debugging is not buried under exponential-backoff pauses. Callers
13+
# that genuinely want "retry anything" can pass their own tuple.
14+
_DEFAULT_RETRYABLE: tuple[type[BaseException], ...] = (
15+
OSError,
16+
TimeoutError,
17+
DqliteError,
18+
)
19+
720

821
async def retry_with_backoff[T](
922
func: Callable[[], Awaitable[T]],
1023
max_attempts: int = 5,
1124
base_delay: float = 0.1,
1225
max_delay: float = 10.0,
1326
jitter: float = 0.1,
14-
retryable_exceptions: tuple[type[Exception], ...] = (Exception,),
27+
retryable_exceptions: tuple[type[BaseException], ...] = _DEFAULT_RETRYABLE,
1528
) -> T:
1629
"""Retry an async function with exponential backoff.
1730
@@ -33,7 +46,7 @@ async def retry_with_backoff[T](
3346
if max_attempts < 1:
3447
raise ValueError("max_attempts must be at least 1")
3548

36-
last_error: Exception | None = None
49+
last_error: BaseException | None = None
3750

3851
for attempt in range(max_attempts):
3952
try:

tests/test_retry.py

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ async def fail_then_succeed() -> str:
3030
raise ValueError("not yet")
3131
return "ok"
3232

33-
result = await retry_with_backoff(fail_then_succeed, base_delay=0.01)
33+
result = await retry_with_backoff(
34+
fail_then_succeed,
35+
base_delay=0.01,
36+
retryable_exceptions=(ValueError,),
37+
)
3438
assert result == "ok"
3539
assert call_count == 3
3640

@@ -43,7 +47,12 @@ async def always_fail() -> str:
4347
raise ValueError("fail")
4448

4549
with pytest.raises(ValueError, match="fail"):
46-
await retry_with_backoff(always_fail, max_attempts=3, base_delay=0.01)
50+
await retry_with_backoff(
51+
always_fail,
52+
max_attempts=3,
53+
base_delay=0.01,
54+
retryable_exceptions=(ValueError,),
55+
)
4756

4857
assert call_count == 3
4958

@@ -111,7 +120,12 @@ async def mock_sleep(delay: float) -> None:
111120

112121
with patch("dqliteclient.retry.asyncio.sleep", side_effect=mock_sleep):
113122
await retry_with_backoff(
114-
fail_twice, max_attempts=3, base_delay=0.1, max_delay=0.05, jitter=0
123+
fail_twice,
124+
max_attempts=3,
125+
base_delay=0.1,
126+
max_delay=0.05,
127+
jitter=0,
128+
retryable_exceptions=(ValueError,),
115129
)
116130

117131
assert call_count == 3
@@ -151,9 +165,48 @@ def max_jitter(_low: float, high: float) -> float:
151165
base_delay=1.0,
152166
max_delay=2.0,
153167
jitter=0.1,
168+
retryable_exceptions=(ValueError,),
154169
)
155170

156171
# Several attempts will hit the cap; none should exceed max_delay.
157172
assert sleep_args, "expected at least one sleep"
158173
for d in sleep_args:
159174
assert d <= 2.0, f"delay {d} exceeded max_delay=2.0"
175+
176+
177+
class TestRetryDefaults:
178+
"""The default retryable set is restricted to transport- / cluster-
179+
level exceptions; programming bugs propagate on first call."""
180+
181+
async def test_default_retries_oserror(self) -> None:
182+
call_count = 0
183+
184+
async def fail_once() -> str:
185+
nonlocal call_count
186+
call_count += 1
187+
if call_count < 2:
188+
raise OSError("transient")
189+
return "ok"
190+
191+
result = await retry_with_backoff(fail_once, base_delay=0.01)
192+
assert result == "ok"
193+
assert call_count == 2
194+
195+
async def test_default_does_not_retry_programming_errors(self) -> None:
196+
import pytest as _pytest
197+
198+
from dqliteclient.retry import retry_with_backoff
199+
200+
call_count = 0
201+
202+
async def buggy() -> str:
203+
nonlocal call_count
204+
call_count += 1
205+
raise TypeError("bug")
206+
207+
with _pytest.raises(TypeError, match="bug"):
208+
await retry_with_backoff(buggy, max_attempts=5, base_delay=0.01)
209+
210+
assert call_count == 1, (
211+
"TypeError is outside the default retryable set and must propagate on the first call"
212+
)

0 commit comments

Comments
 (0)