Skip to content

Commit 69c858f

Browse files
committed
fix: validate taxon_by column in _prep_samples_for_cohort_grouping
Signed-off-by: Suhrid Marwah <suhridmarwah07@gmail.com>
1 parent 63a8201 commit 69c858f

2 files changed

Lines changed: 24 additions & 0 deletions

File tree

malariagen_data/anoph/frq_base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ def _prep_samples_for_cohort_grouping(
2929
# Users can explicitly override with True/False.
3030
filter_unassigned = taxon_by == "taxon"
3131

32+
# Validate taxon_by.
33+
if taxon_by not in df_samples.columns:
34+
raise ValueError(
35+
f"Invalid value for `taxon_by`: {taxon_by!r}. "
36+
f"Must be the name of an existing column in the sample metadata."
37+
)
38+
3239
if filter_unassigned:
3340
# Remove samples with "intermediate" or "unassigned" taxon values,
3441
# as we only want cohorts with clean taxon calls.

tests/anoph/test_frq_base.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,23 @@ def test_invalid_area_by_raises_value_error(self):
118118
)
119119

120120

121+
class TestPrepSamplesTaxonByValidation:
122+
"""Tests for taxon_by validation in _prep_samples_for_cohort_grouping."""
123+
124+
def test_invalid_taxon_by_raises_value_error(self):
125+
"""A non-existent taxon_by column should raise ValueError, not KeyError."""
126+
import pytest
127+
128+
df = _make_test_df()
129+
with pytest.raises(ValueError, match="Invalid value for `taxon_by`"):
130+
_prep_samples_for_cohort_grouping(
131+
df_samples=df,
132+
area_by="admin1_iso",
133+
period_by="year",
134+
taxon_by="nonexistent_column",
135+
)
136+
137+
121138
class TestPlotFrequenciesTimeSeriesMissingCI:
122139
"""Tests for plot_frequencies_time_series when CI variables are absent.
123140

0 commit comments

Comments
 (0)