Skip to content

Commit b860bf9

Browse files
authored
Merge pull request #1140 from suhr25/fix/area-by-validation
fix: validate area_by column in _prep_samples_for_cohort_grouping
2 parents eae786e + 7ff11c7 commit b860bf9

2 files changed

Lines changed: 24 additions & 0 deletions

File tree

malariagen_data/anoph/frq_base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,13 @@ def _prep_samples_for_cohort_grouping(
7878
# Apply the matching period_by function to create a new "period" column.
7979
df_samples["period"] = df_samples.apply(period_by_func, axis="columns")
8080

81+
# Validate area_by.
82+
if area_by not in df_samples.columns:
83+
raise ValueError(
84+
f"Invalid value for `area_by`: {area_by!r}. "
85+
f"Must be the name of an existing column in the sample metadata."
86+
)
87+
8188
# Copy the specified area_by column to a new "area" column.
8289
df_samples["area"] = df_samples[area_by]
8390

tests/anoph/test_frq_base.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,23 @@ def test_does_not_modify_original(self):
101101
assert df["taxon"].tolist() == original_values
102102

103103

104+
class TestPrepSamplesAreaByValidation:
105+
"""Tests for area_by validation in _prep_samples_for_cohort_grouping."""
106+
107+
def test_invalid_area_by_raises_value_error(self):
108+
"""A non-existent area_by column should raise ValueError, not KeyError."""
109+
import pytest
110+
111+
df = _make_test_df()
112+
with pytest.raises(ValueError, match="Invalid value for `area_by`"):
113+
_prep_samples_for_cohort_grouping(
114+
df_samples=df,
115+
area_by="nonexistent_column",
116+
period_by="year",
117+
taxon_by="taxon",
118+
)
119+
120+
104121
class TestPlotFrequenciesTimeSeriesMissingCI:
105122
"""Tests for plot_frequencies_time_series when CI variables are absent.
106123

0 commit comments

Comments
 (0)