Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions malariagen_data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,12 @@ def _init_filesystem(url, **kwargs):

kwargs.setdefault("token", credentials)

# Set retry and timeout defaults for GCS to handle transient errors
# on unreliable networks (e.g., field stations in endemic regions).
# gcsfs supports retry (number of retries) and timeout (seconds).
kwargs.setdefault("retry", 3)
kwargs.setdefault("timeout", 60)

# Ensure options are passed through to gcsfs, even if URL is chained.
if url.startswith("gs://") or url.startswith("gcs://"):
storage_options = kwargs
Expand All @@ -501,6 +507,13 @@ def _init_filesystem(url, **kwargs):
kwargs.setdefault("endpoint_url", "https://cog.sanger.ac.uk")
kwargs.setdefault("config_kwargs", config)

# Set retry and timeout defaults for S3 to handle transient errors.
# s3fs supports retries (max retry attempts) and
# config_kwargs for connect/read timeouts.
kwargs.setdefault("retries", 3)
config.setdefault("connect_timeout", 60)
config.setdefault("read_timeout", 60)

if url.startswith("s3://"):
storage_options = kwargs
else:
Expand All @@ -509,6 +522,9 @@ def _init_filesystem(url, **kwargs):

else:
# Some other kind of URL, pass through kwargs as-is.
# Set a default timeout for remote HTTP/HTTPS filesystems.
if url.startswith("http://") or url.startswith("https://"):
kwargs.setdefault("timeout", 60)
storage_options = kwargs

if simplecache_options is not None:
Expand Down
2 changes: 1 addition & 1 deletion tests/anoph/test_frq.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def check_plot_frequencies_heatmap(api, frq_df):
# Test index parameter - if None, should use dataframe index.
fig = api.plot_frequencies_heatmap(frq_df, show=False, index=None, max_len=None)

if "contig" in list(frq_df.columns):
if "contig" in list(frq_df.columns) and not frq_df["contig"].is_unique:
# Not unique.
with pytest.raises(ValueError):
api.plot_frequencies_heatmap(
Expand Down
Loading