File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -29,6 +29,13 @@ def _prep_samples_for_cohort_grouping(
2929 # Users can explicitly override with True/False.
3030 filter_unassigned = taxon_by == "taxon"
3131
32+ # Validate taxon_by.
33+ if taxon_by not in df_samples .columns :
34+ raise ValueError (
35+ f"Invalid value for `taxon_by`: { taxon_by !r} . "
36+ f"Must be the name of an existing column in the sample metadata."
37+ )
38+
3239 if filter_unassigned :
3340 # Remove samples with "intermediate" or "unassigned" taxon values,
3441 # as we only want cohorts with clean taxon calls.
Original file line number Diff line number Diff line change @@ -118,6 +118,23 @@ def test_invalid_area_by_raises_value_error(self):
118118 )
119119
120120
121+ class TestPrepSamplesTaxonByValidation :
122+ """Tests for taxon_by validation in _prep_samples_for_cohort_grouping."""
123+
124+ def test_invalid_taxon_by_raises_value_error (self ):
125+ """A non-existent taxon_by column should raise ValueError, not KeyError."""
126+ import pytest
127+
128+ df = _make_test_df ()
129+ with pytest .raises (ValueError , match = "Invalid value for `taxon_by`" ):
130+ _prep_samples_for_cohort_grouping (
131+ df_samples = df ,
132+ area_by = "admin1_iso" ,
133+ period_by = "year" ,
134+ taxon_by = "nonexistent_column" ,
135+ )
136+
137+
121138class TestPlotFrequenciesTimeSeriesMissingCI :
122139 """Tests for plot_frequencies_time_series when CI variables are absent.
123140
You can’t perform that action at this time.
0 commit comments