Skip to content

Commit 8a50ccc

Browse files
author
suhr25
committed
fix: document new params via Annotated types and @doc parameters dict
- Define min_snps_threshold and window_adjustment_factor as Annotated TypeAlias in fst_params.py so numpydoc_decorator 2.2.1 can extract their documentation automatically (plain int params without Annotated or explicit parameters dict cause DocumentationError at import time) - Add explicit parameters dict entries to @doc(fst_gwss) as belt-and- suspenders documentation for both new parameters - Switch fst_gwss signature from plain int to fst_params.min_snps_threshold and fst_params.window_adjustment_factor (consistent with codebase style) - Replace unseeded random.choice/random.sample in new tests with seeded np.random.choice (matching all existing tests that use np.random seeded with 42 via the autouse session fixture) - Remove import random from test_fst.py (no longer needed)
1 parent a6e0bdf commit 8a50ccc

File tree

3 files changed

+35
-9
lines changed

3 files changed

+35
-9
lines changed

malariagen_data/anoph/fst.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,17 @@ def _fst_gwss(
123123
is raised if the number of available SNP sites is below
124124
min_snps_threshold.
125125
""",
126+
parameters=dict(
127+
min_snps_threshold="""
128+
Minimum number of SNP sites required. If fewer sites are
129+
available a ValueError is raised.
130+
""",
131+
window_adjustment_factor="""
132+
If window_size is >= the number of available SNP sites,
133+
window_size is automatically set to
134+
number_of_snps // window_adjustment_factor.
135+
""",
136+
),
126137
returns=dict(
127138
x="An array containing the window centre point genomic positions",
128139
fst="An array with Fst statistic values for each window.",
@@ -148,8 +159,8 @@ def fst_gwss(
148159
inline_array: base_params.inline_array = base_params.inline_array_default,
149160
chunks: base_params.chunks = base_params.native_chunks,
150161
clip_min: fst_params.clip_min = 0.0,
151-
min_snps_threshold: int = 1000,
152-
window_adjustment_factor: int = 10,
162+
min_snps_threshold: fst_params.min_snps_threshold = 1000,
163+
window_adjustment_factor: fst_params.window_adjustment_factor = 10,
153164
) -> Tuple[np.ndarray, np.ndarray]:
154165
# Change this name if you ever change the behaviour of this function, to
155166
# invalidate any previously cached data.

malariagen_data/anoph/fst_params.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,22 @@
3434
""",
3535
]
3636

37+
min_snps_threshold: TypeAlias = Annotated[
38+
int,
39+
"""
40+
Minimum number of SNP sites required for the Fst GWSS computation. If
41+
fewer sites are available, a ValueError is raised.
42+
""",
43+
]
44+
45+
window_adjustment_factor: TypeAlias = Annotated[
46+
int,
47+
"""
48+
If window_size is >= the number of available SNP sites, the window_size
49+
is automatically adjusted to number_of_snps // window_adjustment_factor.
50+
""",
51+
]
52+
3753
annotation: TypeAlias = Annotated[
3854
Optional[Literal["standard error", "Z score", "lower triangle"]],
3955
"""

tests/anoph/test_fst.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import itertools
2-
import random
32
import pytest
43
from pytest_cases import parametrize_with_cases
54
import numpy as np
@@ -146,16 +145,16 @@ def test_fst_gwss_window_size_too_large(fixture, api: AnophelesFstAnalysis):
146145
# the function must still return a valid result using the adjusted window_size.
147146
all_sample_sets = api.sample_sets()["sample_set"].to_list()
148147
all_countries = api.sample_metadata()["country"].dropna().unique().tolist()
149-
countries = random.sample(all_countries, 2)
148+
countries = np.random.choice(all_countries, size=2, replace=False).tolist()
150149
cohort1_query = f"country == {countries[0]!r}"
151150
cohort2_query = f"country == {countries[1]!r}"
152151
with pytest.warns(UserWarning, match="window_size"):
153152
x, fst = api.fst_gwss(
154-
contig=random.choice(api.contigs),
153+
contig=str(np.random.choice(api.contigs)),
155154
sample_sets=all_sample_sets,
156155
cohort1_query=cohort1_query,
157156
cohort2_query=cohort2_query,
158-
site_mask=random.choice(api.site_mask_ids),
157+
site_mask=str(np.random.choice(api.site_mask_ids)),
159158
window_size=10_000_000, # far larger than any fixture SNP count
160159
min_cohort_size=1,
161160
)
@@ -170,16 +169,16 @@ def test_fst_gwss_too_few_snps(fixture, api: AnophelesFstAnalysis):
170169
# When min_snps_threshold exceeds available SNPs, a ValueError must be raised.
171170
all_sample_sets = api.sample_sets()["sample_set"].to_list()
172171
all_countries = api.sample_metadata()["country"].dropna().unique().tolist()
173-
countries = random.sample(all_countries, 2)
172+
countries = np.random.choice(all_countries, size=2, replace=False).tolist()
174173
cohort1_query = f"country == {countries[0]!r}"
175174
cohort2_query = f"country == {countries[1]!r}"
176175
with pytest.raises(ValueError, match="Too few SNP sites"):
177176
api.fst_gwss(
178-
contig=random.choice(api.contigs),
177+
contig=str(np.random.choice(api.contigs)),
179178
sample_sets=all_sample_sets,
180179
cohort1_query=cohort1_query,
181180
cohort2_query=cohort2_query,
182-
site_mask=random.choice(api.site_mask_ids),
181+
site_mask=str(np.random.choice(api.site_mask_ids)),
183182
window_size=100,
184183
min_cohort_size=1,
185184
min_snps_threshold=10_000_000, # far larger than any fixture SNP count (~28k-70k)

0 commit comments

Comments
 (0)