Skip to content

Commit f9e68a8

Browse files
authored
Merge branch 'master' into fix/pr817-ci-cleanup
2 parents f135b2d + 8545ce1 commit f9e68a8

3 files changed

Lines changed: 44 additions & 8 deletions

File tree

malariagen_data/anoph/fst.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,14 @@ def average_fst(
404404
)
405405

406406
# Calculate block length for jackknife.
407-
n_sites = ac1.shape[0] # number of sites
408-
block_length = n_sites // n_jack # number of sites in each block
407+
n_sites = ac1.shape[0]
408+
block_length = n_sites // n_jack
409+
410+
if block_length < 1:
411+
raise ValueError(
412+
f"Not enough sites ({n_sites}) for {n_jack} jackknife blocks. "
413+
"Choose a larger region or reduce n_jack."
414+
)
409415

410416
# Calculate average Fst.
411417
fst, se, _, _ = allel.blockwise_hudson_fst(ac1, ac2, blen=block_length)
@@ -530,7 +536,7 @@ def plot_pairwise_average_fst(
530536

531537
# Set up plot title.
532538
title = "<i>F</i><sub>ST</sub>"
533-
if annotation is not None:
539+
if annotation is not None and annotation != "lower triangle":
534540
title += " ⧅ " + annotation
535541

536542
# Fill the figure dataframe from the Fst dataframe.
@@ -543,12 +549,15 @@ def plot_pairwise_average_fst(
543549
fig_df.loc[cohort1, cohort2] = np.nan
544550
else:
545551
fig_df.loc[cohort1, cohort2] = fst / se
552+
elif annotation == "lower triangle":
553+
# Leave the upper triangle as NaN (empty).
554+
pass
546555
else:
547556
fig_df.loc[cohort1, cohort2] = fst
548557

549558
# Don't colour the plot if the upper triangle is SE or Z score,
550559
# as the colouring doesn't really make sense.
551-
if annotation is not None and zmax is None:
560+
if annotation is not None and annotation != "lower triangle" and zmax is None:
552561
zmax = 1e9
553562

554563
# Dynamically size the figure based on number of cohorts.

malariagen_data/anoph/fst_params.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@
3535
]
3636

3737
annotation: TypeAlias = Annotated[
38-
Optional[Literal["standard error", "Z score"]],
38+
Optional[Literal["standard error", "Z score", "lower triangle"]],
3939
"""
40-
How to annotate the upper-right corner of the plot. Default behaviour (None) is using Fst, other options
41-
are using the standard error (if annotation is 'standard error') or the Z score of the two
42-
cohorts being the same (if annotation is 'Z score').
40+
How to annotate the upper-right corner of the plot. Default behaviour (None)
41+
is using Fst, other options are using the standard error (if annotation is
42+
'standard error'), the Z score of the two cohorts being the same (if
43+
annotation is 'Z score'), or leaving the upper triangle empty (if annotation
44+
is 'lower triangle').
4345
""",
4446
]

tests/anoph/test_fst.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,27 @@ def test_average_fst_with_min_cohort_size(fixture, api: AnophelesFstAnalysis):
190190
api.average_fst(**fst_params)
191191

192192

193+
@parametrize_with_cases("fixture,api", cases=".")
194+
def test_average_fst_region_too_small(fixture, api: AnophelesFstAnalysis):
195+
"""ValueError should be raised when block_length == 0 (n_jack > n_sites)."""
196+
all_sample_sets = api.sample_sets()["sample_set"].to_list()
197+
all_countries = api.sample_metadata()["country"].dropna().unique().tolist()
198+
countries = random.sample(all_countries, 2)
199+
cohort1_query = f"country == {countries[0]!r}"
200+
cohort2_query = f"country == {countries[1]!r}"
201+
fst_params = dict(
202+
region=random.choice(api.contigs),
203+
sample_sets=all_sample_sets,
204+
cohort1_query=cohort1_query,
205+
cohort2_query=cohort2_query,
206+
site_mask=random.choice(api.site_mask_ids),
207+
min_cohort_size=1,
208+
n_jack=1_000_000, # deliberately exceeds available sites
209+
)
210+
with pytest.raises(ValueError, match="Not enough sites"):
211+
api.average_fst(**fst_params)
212+
213+
193214
def check_pairwise_average_fst(api: AnophelesFstAnalysis, fst_params):
194215
# Run main function under test.
195216
fst_df = api.pairwise_average_fst(**fst_params)
@@ -236,6 +257,10 @@ def check_pairwise_average_fst(api: AnophelesFstAnalysis, fst_params):
236257
assert isinstance(fig, go.Figure)
237258
fig = api.plot_pairwise_average_fst(fst_df, annotation="Z score", show=False)
238259
assert isinstance(fig, go.Figure)
260+
fig = api.plot_pairwise_average_fst(
261+
fst_df, annotation="lower triangle", show=False
262+
)
263+
assert isinstance(fig, go.Figure)
239264

240265

241266
@pytest.mark.parametrize("cohorts", ["country", "admin1_year", "cohort_admin2_month"])

0 commit comments

Comments
 (0)