Skip to content

Commit 16252c1

Browse files
author
suhr25
committed
refactor: expose SNP threshold and window adjustment as function parameters
1 parent f2f4f91 commit 16252c1

1 file changed

Lines changed: 18 additions & 9 deletions

File tree

malariagen_data/anoph/fst.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ def _fst_gwss(
4444
inline_array,
4545
chunks,
4646
clip_min,
47+
min_snps_threshold,
48+
window_adjustment_factor,
4749
):
4850
# Compute allele counts.
4951
ac1 = self.snp_allele_counts(
@@ -83,20 +85,18 @@ def _fst_gwss(
8385
).compute()
8486

8587
n_snps = len(pos)
86-
_min_snps_threshold = 1000
87-
_window_adjustment_factor = 10
88-
if n_snps < _min_snps_threshold:
88+
if n_snps < min_snps_threshold:
8989
raise ValueError(
9090
f"Too few SNP sites ({n_snps}) available for Fst GWSS. "
91-
f"At least {_min_snps_threshold} sites are required. "
91+
f"At least {min_snps_threshold} sites are required. "
9292
"Try a larger genomic region or different site selection criteria."
9393
)
9494
if window_size >= n_snps:
95-
adjusted_window_size = max(1, n_snps // _window_adjustment_factor)
95+
adjusted_window_size = max(1, n_snps // window_adjustment_factor)
9696
warnings.warn(
9797
f"window_size ({window_size}) is >= the number of SNP sites "
9898
f"available ({n_snps}); automatically adjusting window_size to "
99-
f"{adjusted_window_size} (= {n_snps} // {_window_adjustment_factor}).",
99+
f"{adjusted_window_size} (= {n_snps} // {window_adjustment_factor}).",
100100
UserWarning,
101101
stacklevel=2,
102102
)
@@ -119,8 +119,9 @@ def _fst_gwss(
119119
Run a Fst genome-wide scan to investigate genetic differentiation
120120
between two cohorts. If window_size is >= the number of available
121121
SNP sites, a UserWarning is issued and window_size is automatically
122-
adjusted to number_of_snps // 10. A ValueError is raised if the
123-
number of available SNP sites is below 1000.
122+
adjusted to number_of_snps // window_adjustment_factor. A ValueError
123+
is raised if the number of available SNP sites is below
124+
min_snps_threshold.
124125
""",
125126
returns=dict(
126127
x="An array containing the window centre point genomic positions",
@@ -147,6 +148,8 @@ def fst_gwss(
147148
inline_array: base_params.inline_array = base_params.inline_array_default,
148149
chunks: base_params.chunks = base_params.native_chunks,
149150
clip_min: fst_params.clip_min = 0.0,
151+
min_snps_threshold: int = 1000,
152+
window_adjustment_factor: int = 10,
150153
) -> Tuple[np.ndarray, np.ndarray]:
151154
# Change this name if you ever change the behaviour of this function, to
152155
# invalidate any previously cached data.
@@ -171,7 +174,13 @@ def fst_gwss(
171174
results = self.results_cache_get(name=name, params=params)
172175

173176
except CacheMiss:
174-
results = self._fst_gwss(**params, inline_array=inline_array, chunks=chunks)
177+
results = self._fst_gwss(
178+
**params,
179+
inline_array=inline_array,
180+
chunks=chunks,
181+
min_snps_threshold=min_snps_threshold,
182+
window_adjustment_factor=window_adjustment_factor,
183+
)
175184
self.results_cache_set(name=name, params=params, results=results)
176185

177186
x = results["x"]

0 commit comments

Comments
 (0)