Skip to content

Commit 772c135

Browse files
authored
Merge pull request #921 from bleedblack1/fix/778-njt-descriptive-errors
Fix #778: descriptive errors in njt/plot_njt for insufficient data
2 parents 84de83d + 40966c4 commit 772c135

3 files changed

Lines changed: 60 additions & 19 deletions

File tree

malariagen_data/anoph/distance.py

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -365,24 +365,43 @@ def _njt(
365365
from scipy.spatial.distance import squareform # type: ignore
366366

367367
# Compute pairwise distances.
368-
dist, samples, n_snps = self.biallelic_diplotype_pairwise_distances(
369-
region=region,
370-
n_snps=n_snps,
371-
metric=metric,
372-
sample_sets=sample_sets,
373-
sample_indices=sample_indices,
374-
site_mask=site_mask,
375-
site_class=site_class,
376-
inline_array=inline_array,
377-
chunks=chunks,
378-
cohort_size=cohort_size,
379-
min_cohort_size=min_cohort_size,
380-
max_cohort_size=max_cohort_size,
381-
random_seed=random_seed,
382-
max_missing_an=max_missing_an,
383-
min_minor_ac=min_minor_ac,
384-
thin_offset=thin_offset,
385-
)
368+
try:
369+
dist, samples, n_snps_used = self.biallelic_diplotype_pairwise_distances(
370+
region=region,
371+
n_snps=n_snps,
372+
metric=metric,
373+
sample_sets=sample_sets,
374+
sample_indices=sample_indices,
375+
site_mask=site_mask,
376+
site_class=site_class,
377+
inline_array=inline_array,
378+
chunks=chunks,
379+
cohort_size=cohort_size,
380+
min_cohort_size=min_cohort_size,
381+
max_cohort_size=max_cohort_size,
382+
random_seed=random_seed,
383+
max_missing_an=max_missing_an,
384+
min_minor_ac=min_minor_ac,
385+
thin_offset=thin_offset,
386+
)
387+
388+
except ValueError as e:
389+
raise ValueError(
390+
f"Unable to construct neighbour-joining tree. {e} "
391+
f"This could be because the selected region does not "
392+
f"contain enough polymorphic SNPs for the given sample "
393+
f"sets and query parameters."
394+
) from e
395+
396+
# Validate enough samples for a tree.
397+
n_samples = len(samples)
398+
if n_samples < 3:
399+
raise ValueError(
400+
f"Not enough samples to construct a neighbour-joining tree. "
401+
f"A minimum of 3 samples is required, but only {n_samples} "
402+
f"were found for the given region and sample sets."
403+
)
404+
386405
D = squareform(dist)
387406

388407
# anjl supports passing in a progress bar function to get progress on the

malariagen_data/anoph/snp_data.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1903,7 +1903,12 @@ def biallelic_snp_calls(
19031903
ds_out = ds_out.isel(variants=loc_thin)
19041904

19051905
elif ds_out.sizes["variants"] < n_snps:
1906-
raise ValueError("Not enough SNPs.")
1906+
raise ValueError(
1907+
f"Not enough SNPs. Requested {n_snps} SNPs but only "
1908+
f"{ds_out.sizes['variants']} were found in the selected "
1909+
f"region after applying filters. Try using a larger region, "
1910+
f"relaxing site filters, or reducing the n_snps parameter."
1911+
)
19071912

19081913
return ds_out
19091914

tests/anoph/test_distance.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,20 @@ def test_plot_njt(fixture, api: AnophelesDistanceAnalysis):
269269
**data_params,
270270
)
271271
assert isinstance(fig, go.Figure)
272+
273+
274+
@parametrize_with_cases("fixture,api", cases=".")
275+
def test_njt_not_enough_snps(fixture, api: AnophelesDistanceAnalysis):
276+
all_sample_sets = api.sample_sets()["sample_set"].to_list()
277+
with pytest.raises(
278+
ValueError,
279+
match="Unable to construct neighbour-joining tree|Not enough SNPs",
280+
):
281+
api.njt(
282+
region=random.choice(api.contigs),
283+
n_snps=1_000_000_000, # impossibly high to guarantee failure
284+
sample_sets=random.sample(all_sample_sets, 1),
285+
site_mask=random.choice((None,) + api.site_mask_ids),
286+
min_minor_ac=pca_params.min_minor_ac_default,
287+
max_missing_an=pca_params.max_missing_an_default,
288+
)

0 commit comments

Comments
 (0)