Skip to content

Commit a05bab0

Browse files
committed
fix: gracefully handle oversized window_size with warning and adjustment
Replace hard error when window_size exceeds available SNPs with a warning, and automatically adjust window_size to the maximum valid value so that computation can proceed. Signed-off-by: Suhrid Marwah <suhridmarwah07@gmail.com>
1 parent 472d074 commit a05bab0

2 files changed

Lines changed: 22 additions & 12 deletions

File tree

malariagen_data/anoph/fst.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
from typing import Tuple, Optional
23

34
import numpy as np
@@ -81,20 +82,23 @@ def _fst_gwss(
8182
chunks=chunks,
8283
).compute()
8384

85+
n_snps = len(pos)
86+
if window_size > n_snps:
87+
warnings.warn(
88+
f"window_size ({window_size}) is larger than the number of SNP sites "
89+
f"available ({n_snps}); adjusting window_size to {n_snps}.",
90+
UserWarning,
91+
stacklevel=2,
92+
)
93+
window_size = n_snps
94+
8495
with self._spinner(desc="Compute Fst"):
8596
with np.errstate(divide="ignore", invalid="ignore"):
8697
fst = allel.moving_hudson_fst(ac1, ac2, size=window_size)
8798
# Sometimes Fst can be very slightly below zero, clip for simplicity.
8899
fst = np.clip(fst, a_min=clip_min, a_max=1)
89100
x = allel.moving_statistic(pos, statistic=np.mean, size=window_size)
90101

91-
if len(x) == 0:
92-
raise ValueError(
93-
f"No Fst windows could be computed: window_size={window_size!r} is "
94-
f"larger than the number of SNP sites available ({len(pos)}) in the "
95-
"selected region. Try reducing window_size or selecting a larger region."
96-
)
97-
98102
results = dict(x=x, fst=fst)
99103

100104
return results
@@ -103,7 +107,9 @@ def _fst_gwss(
103107
@doc(
104108
summary="""
105109
Run a Fst genome-wide scan to investigate genetic differentiation
106-
between two cohorts.
110+
between two cohorts. If window_size exceeds the number of available
111+
SNP sites, a UserWarning is issued and window_size is automatically
112+
reduced to the number of available sites.
107113
""",
108114
returns=dict(
109115
x="An array containing the window centre point genomic positions",

tests/anoph/test_fst.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,15 @@ def test_fst_gwss(fixture, api: AnophelesFstAnalysis):
142142

143143
@parametrize_with_cases("fixture,api", cases=".")
144144
def test_fst_gwss_window_size_too_large(fixture, api: AnophelesFstAnalysis):
145-
# Use a window_size larger than the number of available SNPs to trigger the
146-
# ValueError guard added in _fst_gwss.
145+
# When window_size exceeds available SNPs, a UserWarning must be issued and
146+
# the function must still return a valid result using the adjusted window_size.
147147
all_sample_sets = api.sample_sets()["sample_set"].to_list()
148148
all_countries = api.sample_metadata()["country"].dropna().unique().tolist()
149149
countries = random.sample(all_countries, 2)
150150
cohort1_query = f"country == {countries[0]!r}"
151151
cohort2_query = f"country == {countries[1]!r}"
152-
with pytest.raises(ValueError, match="window_size"):
153-
api.fst_gwss(
152+
with pytest.warns(UserWarning, match="window_size"):
153+
x, fst = api.fst_gwss(
154154
contig=random.choice(api.contigs),
155155
sample_sets=all_sample_sets,
156156
cohort1_query=cohort1_query,
@@ -159,6 +159,10 @@ def test_fst_gwss_window_size_too_large(fixture, api: AnophelesFstAnalysis):
159159
window_size=10_000_000, # far larger than any fixture SNP count
160160
min_cohort_size=1,
161161
)
162+
assert isinstance(x, np.ndarray)
163+
assert isinstance(fst, np.ndarray)
164+
assert len(x) > 0
165+
assert x.shape == fst.shape
162166

163167

164168
@parametrize_with_cases("fixture,api", cases=".")

0 commit comments

Comments
 (0)