diff --git a/malariagen_data/util.py b/malariagen_data/util.py index ca7b38677..3cec8dee4 100644 --- a/malariagen_data/util.py +++ b/malariagen_data/util.py @@ -479,6 +479,12 @@ def _init_filesystem(url, **kwargs): kwargs.setdefault("token", credentials) + # Set retry and timeout defaults for GCS to handle transient errors + # on unreliable networks (e.g., field stations in endemic regions). + # gcsfs supports retry (number of retries) and timeout (seconds). + kwargs.setdefault("retry", 3) + kwargs.setdefault("timeout", 60) + # Ensure options are passed through to gcsfs, even if URL is chained. if url.startswith("gs://") or url.startswith("gcs://"): storage_options = kwargs @@ -501,6 +507,13 @@ def _init_filesystem(url, **kwargs): kwargs.setdefault("endpoint_url", "https://cog.sanger.ac.uk") kwargs.setdefault("config_kwargs", config) + # Set retry and timeout defaults for S3 to handle transient errors. + # s3fs supports retries (max retry attempts) and + # config_kwargs for connect/read timeouts. + kwargs.setdefault("retries", 3) + config.setdefault("connect_timeout", 60) + config.setdefault("read_timeout", 60) + if url.startswith("s3://"): storage_options = kwargs else: @@ -509,6 +522,9 @@ def _init_filesystem(url, **kwargs): else: # Some other kind of URL, pass through kwargs as-is. + # Set a default timeout for remote HTTP/HTTPS filesystems. + if url.startswith("http://") or url.startswith("https://"): + kwargs.setdefault("timeout", 60) storage_options = kwargs if simplecache_options is not None: diff --git a/tests/anoph/test_frq.py b/tests/anoph/test_frq.py index dfebb9bf6..05c29060c 100644 --- a/tests/anoph/test_frq.py +++ b/tests/anoph/test_frq.py @@ -16,7 +16,7 @@ def check_plot_frequencies_heatmap(api, frq_df): # Test index parameter - if None, should use dataframe index. fig = api.plot_frequencies_heatmap(frq_df, show=False, index=None, max_len=None) - if "contig" in list(frq_df.columns): + if "contig" in list(frq_df.columns) and not frq_df["contig"].is_unique: # Not unique. with pytest.raises(ValueError): api.plot_frequencies_heatmap(