Skip to content

Commit c8344ea

Browse files
authored
Merge pull request #1304 from khushthecoder/fix/issue-1303-cloud-storage-retry-backoff
fix: add retry, timeout, and backoff defaults for cloud storage access
2 parents 6e15613 + 9295ec4 commit c8344ea

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

malariagen_data/util.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,12 @@ def _init_filesystem(url, **kwargs):
479479

480480
kwargs.setdefault("token", credentials)
481481

482+
# Set retry and timeout defaults for GCS to handle transient errors
483+
# on unreliable networks (e.g., field stations in endemic regions).
484+
# gcsfs supports retry (number of retries) and timeout (seconds).
485+
kwargs.setdefault("retry", 3)
486+
kwargs.setdefault("timeout", 60)
487+
482488
# Ensure options are passed through to gcsfs, even if URL is chained.
483489
if url.startswith("gs://") or url.startswith("gcs://"):
484490
storage_options = kwargs
@@ -501,6 +507,13 @@ def _init_filesystem(url, **kwargs):
501507
kwargs.setdefault("endpoint_url", "https://cog.sanger.ac.uk")
502508
kwargs.setdefault("config_kwargs", config)
503509

510+
# Set retry and timeout defaults for S3 to handle transient errors.
511+
# s3fs supports retries (max retry attempts) and
512+
# config_kwargs for connect/read timeouts.
513+
kwargs.setdefault("retries", 3)
514+
config.setdefault("connect_timeout", 60)
515+
config.setdefault("read_timeout", 60)
516+
504517
if url.startswith("s3://"):
505518
storage_options = kwargs
506519
else:
@@ -509,6 +522,9 @@ def _init_filesystem(url, **kwargs):
509522

510523
else:
511524
# Some other kind of URL, pass through kwargs as-is.
525+
# Set a default timeout for remote HTTP/HTTPS filesystems.
526+
if url.startswith("http://") or url.startswith("https://"):
527+
kwargs.setdefault("timeout", 60)
512528
storage_options = kwargs
513529

514530
if simplecache_options is not None:

tests/anoph/test_frq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def check_plot_frequencies_heatmap(api, frq_df):
1616
# Test index parameter - if None, should use dataframe index.
1717
fig = api.plot_frequencies_heatmap(frq_df, show=False, index=None, max_len=None)
1818

19-
if "contig" in list(frq_df.columns):
19+
if "contig" in list(frq_df.columns) and not frq_df["contig"].is_unique:
2020
# Not unique.
2121
with pytest.raises(ValueError):
2222
api.plot_frequencies_heatmap(

0 commit comments

Comments
 (0)