Skip to content

Commit ee87fca

Browse files
committed
Fix #778: raise descriptive errors in njt/plot_njt
When plot_njt() or njt() fails due to insufficient SNPs or samples, the error messages now explain what went wrong and suggest possible remedies, instead of showing an uninformative 'Not enough SNPs.' message buried under CacheMiss tracebacks. Changes: - distance.py: wrap distance computation in _njt() to catch and re-raise ValueError with tree-building context - distance.py: validate minimum sample count before building tree - snp_data.py: improve 'Not enough SNPs' message with counts and suggestions - test_distance.py: add test for descriptive error messages
1 parent 509ad40 commit ee87fca

3 files changed

Lines changed: 60 additions & 19 deletions

File tree

malariagen_data/anoph/distance.py

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -362,24 +362,43 @@ def _njt(
362362
from scipy.spatial.distance import squareform # type: ignore
363363

364364
# Compute pairwise distances.
365-
dist, samples, n_snps = self.biallelic_diplotype_pairwise_distances(
366-
region=region,
367-
n_snps=n_snps,
368-
metric=metric,
369-
sample_sets=sample_sets,
370-
sample_indices=sample_indices,
371-
site_mask=site_mask,
372-
site_class=site_class,
373-
inline_array=inline_array,
374-
chunks=chunks,
375-
cohort_size=cohort_size,
376-
min_cohort_size=min_cohort_size,
377-
max_cohort_size=max_cohort_size,
378-
random_seed=random_seed,
379-
max_missing_an=max_missing_an,
380-
min_minor_ac=min_minor_ac,
381-
thin_offset=thin_offset,
382-
)
365+
try:
366+
dist, samples, n_snps_used = self.biallelic_diplotype_pairwise_distances(
367+
region=region,
368+
n_snps=n_snps,
369+
metric=metric,
370+
sample_sets=sample_sets,
371+
sample_indices=sample_indices,
372+
site_mask=site_mask,
373+
site_class=site_class,
374+
inline_array=inline_array,
375+
chunks=chunks,
376+
cohort_size=cohort_size,
377+
min_cohort_size=min_cohort_size,
378+
max_cohort_size=max_cohort_size,
379+
random_seed=random_seed,
380+
max_missing_an=max_missing_an,
381+
min_minor_ac=min_minor_ac,
382+
thin_offset=thin_offset,
383+
)
384+
385+
except ValueError as e:
386+
raise ValueError(
387+
f"Unable to construct neighbour-joining tree. {e} "
388+
f"This could be because the selected region does not "
389+
f"contain enough polymorphic SNPs for the given sample "
390+
f"sets and query parameters."
391+
) from e
392+
393+
# Validate enough samples for a tree.
394+
n_samples = len(samples)
395+
if n_samples < 3:
396+
raise ValueError(
397+
f"Not enough samples to construct a neighbour-joining tree. "
398+
f"A minimum of 3 samples is required, but only {n_samples} "
399+
f"were found for the given region and sample sets."
400+
)
401+
383402
D = squareform(dist)
384403

385404
# anjl supports passing in a progress bar function to get progress on the

malariagen_data/anoph/snp_data.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1886,7 +1886,12 @@ def biallelic_snp_calls(
18861886
ds_out = ds_out.isel(variants=loc_thin)
18871887

18881888
elif ds_out.sizes["variants"] < n_snps:
1889-
raise ValueError("Not enough SNPs.")
1889+
raise ValueError(
1890+
f"Not enough SNPs. Requested {n_snps} SNPs but only "
1891+
f"{ds_out.sizes['variants']} were found in the selected "
1892+
f"region after applying filters. Try using a larger region, "
1893+
f"relaxing site filters, or reducing the n_snps parameter."
1894+
)
18901895

18911896
return ds_out
18921897

tests/anoph/test_distance.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,20 @@ def test_plot_njt(fixture, api: AnophelesDistanceAnalysis):
269269
**data_params,
270270
)
271271
assert isinstance(fig, go.Figure)
272+
273+
274+
@parametrize_with_cases("fixture,api", cases=".")
275+
def test_njt_not_enough_snps(fixture, api: AnophelesDistanceAnalysis):
276+
all_sample_sets = api.sample_sets()["sample_set"].to_list()
277+
with pytest.raises(
278+
ValueError,
279+
match="Unable to construct neighbour-joining tree|Not enough SNPs",
280+
):
281+
api.njt(
282+
region=random.choice(api.contigs),
283+
n_snps=1_000_000_000, # impossibly high to guarantee failure
284+
sample_sets=random.sample(all_sample_sets, 1),
285+
site_mask=random.choice((None,) + api.site_mask_ids),
286+
min_minor_ac=pca_params.min_minor_ac_default,
287+
max_missing_an=pca_params.max_missing_an_default,
288+
)

0 commit comments

Comments
 (0)