Skip to content

Commit c3f42ab

Browse files
committed
fix: gracefully handle oversized window_size by auto-adjusting
1 parent a05bab0 commit c3f42ab

1 file changed

Lines changed: 17 additions & 6 deletions

File tree

malariagen_data/anoph/fst.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,24 @@ def _fst_gwss(
8383
).compute()
8484

8585
n_snps = len(pos)
86-
if window_size > n_snps:
86+
_min_snps_threshold = 1000
87+
_window_adjustment_factor = 10
88+
if n_snps < _min_snps_threshold:
89+
raise ValueError(
90+
f"Too few SNP sites ({n_snps}) available for Fst GWSS. "
91+
f"At least {_min_snps_threshold} sites are required. "
92+
"Try a larger genomic region or different site selection criteria."
93+
)
94+
if window_size >= n_snps:
95+
adjusted_window_size = max(1, n_snps // _window_adjustment_factor)
8796
warnings.warn(
88-
f"window_size ({window_size}) is larger than the number of SNP sites "
89-
f"available ({n_snps}); adjusting window_size to {n_snps}.",
97+
f"window_size ({window_size}) is >= the number of SNP sites "
98+
f"available ({n_snps}); automatically adjusting window_size to "
99+
f"{adjusted_window_size} (= {n_snps} // {_window_adjustment_factor}).",
90100
UserWarning,
91101
stacklevel=2,
92102
)
93-
window_size = n_snps
103+
window_size = adjusted_window_size
94104

95105
with self._spinner(desc="Compute Fst"):
96106
with np.errstate(divide="ignore", invalid="ignore"):
@@ -107,9 +117,10 @@ def _fst_gwss(
107117
@doc(
108118
summary="""
109119
Run a Fst genome-wide scan to investigate genetic differentiation
110-
between two cohorts. If window_size exceeds the number of available
120+
between two cohorts. If window_size is >= the number of available
111121
SNP sites, a UserWarning is issued and window_size is automatically
112-
reduced to the number of available sites.
122+
adjusted to number_of_snps // 10. A ValueError is raised if the
123+
number of available SNP sites is below 1000.
113124
""",
114125
returns=dict(
115126
x="An array containing the window centre point genomic positions",

0 commit comments

Comments
 (0)