fix: raise ValueError when window_size exceeds available SNPs in fst_gwss

suhr25 · suhr25 · commit 472d074a81cf · 2026-03-18T15:07:37.000Z
Signed-off-by: Suhrid Marwah &lt;suhridmarwah07@gmail.com&gt;
diff --git a/malariagen_data/anoph/fst.py b/malariagen_data/anoph/fst.py
@@ -88,6 +88,13 @@ def _fst_gwss(
                 fst = np.clip(fst, a_min=clip_min, a_max=1)
                 x = allel.moving_statistic(pos, statistic=np.mean, size=window_size)
 
+        if len(x) == 0:
+            raise ValueError(
+                f"No Fst windows could be computed: window_size={window_size!r} is "
+                f"larger than the number of SNP sites available ({len(pos)}) in the "
+                "selected region. Try reducing window_size or selecting a larger region."
+            )
+
         results = dict(x=x, fst=fst)
 
         return results
diff --git a/tests/anoph/test_fst.py b/tests/anoph/test_fst.py
@@ -140,6 +140,27 @@ def test_fst_gwss(fixture, api: AnophelesFstAnalysis):
     assert isinstance(fig, bokeh.models.GridPlot)
 
 
+@parametrize_with_cases("fixture,api", cases=".")
+def test_fst_gwss_window_size_too_large(fixture, api: AnophelesFstAnalysis):
+    # Use a window_size larger than the number of available SNPs to trigger the
+    # ValueError guard added in _fst_gwss.
+    all_sample_sets = api.sample_sets()["sample_set"].to_list()
+    all_countries = api.sample_metadata()["country"].dropna().unique().tolist()
+    countries = random.sample(all_countries, 2)
+    cohort1_query = f"country == {countries[0]!r}"
+    cohort2_query = f"country == {countries[1]!r}"
+    with pytest.raises(ValueError, match="window_size"):
+        api.fst_gwss(
+            contig=random.choice(api.contigs),
+            sample_sets=all_sample_sets,
+            cohort1_query=cohort1_query,
+            cohort2_query=cohort2_query,
+            site_mask=random.choice(api.site_mask_ids),
+            window_size=10_000_000,  # far larger than any fixture SNP count
+            min_cohort_size=1,
+        )
+
+
 @parametrize_with_cases("fixture,api", cases=".")
 def test_average_fst(fixture, api: AnophelesFstAnalysis):
     # Set up test parameters.