Skip to content

Commit 19e5efc

Browse files
committed
Extend test_allele_frequencies_advanced_with_period_by() to include period_by as random_year
1 parent 3c2ce1d commit 19e5efc

5 files changed

Lines changed: 60 additions & 16 deletions

File tree

malariagen_data/anoph/cnv_frq.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -447,10 +447,12 @@ def gene_cnv_frequencies_advanced(
447447
inline_array: base_params.inline_array = base_params.inline_array_default,
448448
taxon_by: Optional[frq_params.taxon_by] = frq_params.taxon_by_default,
449449
) -> xr.Dataset:
450-
# Check that the taxon_by default hasn't been subverted.
451-
# This avoids type-checking errors, e.g. with `getattr`.
450+
# Check that the taxon_by default hasn't been subverted, e.g. via `taxon_by=None`.
451+
# This also satisfies type-checking, e.g. with `getattr`.
452452
if taxon_by is None:
453-
raise ValueError("`taxon_by` cannot be set to `None`.")
453+
raise ValueError(
454+
f"`taxon_by` cannot be set to `None`. The default would be {frq_params.taxon_by_default!r}."
455+
)
454456

455457
regions: List[Region] = parse_multi_region(self, region)
456458
del region

malariagen_data/anoph/frq_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def prep_samples_for_cohort_grouping(*, df_samples, area_by, period_by, taxon_by
5757
)
5858
):
5959
raise TypeError(
60-
"Invalid values in {period_by!r} column. Must be either pandas.Period or null."
60+
f"Invalid values in {period_by!r} column. Must be either pandas.Period or null."
6161
)
6262

6363
# Copy the specified period_by column to a new "period" column.

malariagen_data/anoph/hap_frq.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,12 @@ def haplotypes_frequencies_advanced(
155155
inline_array: base_params.inline_array = base_params.inline_array_default,
156156
taxon_by: Optional[frq_params.taxon_by] = frq_params.taxon_by_default,
157157
) -> xr.Dataset:
158-
# Check that the taxon_by default hasn't been subverted.
159-
# This avoids type-checking errors, e.g. with `getattr`.
158+
# Check that the taxon_by default hasn't been subverted, e.g. via `taxon_by=None`.
159+
# This also satisfies type-checking, e.g. with `getattr`.
160160
if taxon_by is None:
161-
raise ValueError("`taxon_by` cannot be set to `None`.")
161+
raise ValueError(
162+
f"`taxon_by` cannot be set to `None`. The default would be {frq_params.taxon_by_default!r}."
163+
)
162164

163165
# Load sample metadata.
164166
df_samples = self.sample_metadata(

malariagen_data/anoph/snp_frq.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -453,10 +453,12 @@ def snp_allele_frequencies_advanced(
453453
inline_array: base_params.inline_array = base_params.inline_array_default,
454454
taxon_by: Optional[frq_params.taxon_by] = frq_params.taxon_by_default,
455455
) -> xr.Dataset:
456-
# Check that the taxon_by default hasn't been subverted.
457-
# This avoids type-checking errors, e.g. with `getattr`.
456+
# Check that the taxon_by default hasn't been subverted, e.g. via `taxon_by=None`.
457+
# This also satisfies type-checking, e.g. with `getattr`.
458458
if taxon_by is None:
459-
raise ValueError("`taxon_by` cannot be set to `None`.")
459+
raise ValueError(
460+
f"`taxon_by` cannot be set to `None`. The default would be {frq_params.taxon_by_default!r}."
461+
)
460462

461463
# Load sample metadata.
462464
df_samples = self.sample_metadata(
@@ -687,10 +689,12 @@ def aa_allele_frequencies_advanced(
687689
inline_array: base_params.inline_array = base_params.inline_array_default,
688690
taxon_by: Optional[frq_params.taxon_by] = frq_params.taxon_by_default,
689691
) -> xr.Dataset:
690-
# Check that the taxon_by default hasn't been subverted.
691-
# This avoids type-checking errors, e.g. with `getattr`.
692+
# Check that the taxon_by default hasn't been subverted, e.g. via `taxon_by=None`.
693+
# This also satisfies type-checking, e.g. with `getattr`.
692694
if taxon_by is None:
693-
raise ValueError("`taxon_by` cannot be set to `None`.")
695+
raise ValueError(
696+
f"`taxon_by` cannot be set to `None`. The default would be {frq_params.taxon_by_default!r}."
697+
)
694698

695699
# Begin by computing SNP allele frequencies.
696700
ds_snp_frq = self.snp_allele_frequencies_advanced(

tests/anoph/test_snp_frq.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,30 @@ def random_transcript(*, api):
114114
return transcript
115115

116116

117+
def add_random_year(*, api):
118+
# Add a 'random_year' column to the sample_metadata, if it doesn't exist.
119+
120+
# Get the existing sample metadata.
121+
sample_metadata_df = api.sample_metadata()
122+
123+
# Only create the new column if it doesn't already exist.
124+
# Otherwise we'll get multiple columns with different suffixes, e.g. 'random_year_x' and 'random_year_y'.
125+
if "random_year" not in sample_metadata_df.columns:
126+
random_years_as_list = np.random.choice(
127+
range(1900, 2100), len(sample_metadata_df)
128+
)
129+
random_years_as_period_index = pd.PeriodIndex(random_years_as_list, freq="Y")
130+
extra_metadata_df = pd.DataFrame(
131+
{
132+
"sample_id": sample_metadata_df["sample_id"],
133+
"random_year": random_years_as_period_index,
134+
}
135+
)
136+
api.add_extra_metadata(extra_metadata_df)
137+
138+
return api
139+
140+
117141
@parametrize_with_cases("fixture,api", cases=".")
118142
def test_snp_effects(fixture, api: AnophelesSnpFrequencyAnalysis):
119143
# Pick a random transcript.
@@ -888,7 +912,7 @@ def check_snp_allele_frequencies_advanced(
888912
if area_by is None:
889913
area_by = random.choice(["country", "admin1_iso", "admin2_name"])
890914
if period_by is None:
891-
period_by = random.choice(["year", "quarter", "month"])
915+
period_by = random.choice(["year", "quarter", "month", "random_year"])
892916
if sample_sets is None:
893917
all_sample_sets = api.sample_sets()["sample_set"].to_list()
894918
sample_sets = random.choice(all_sample_sets)
@@ -897,6 +921,10 @@ def check_snp_allele_frequencies_advanced(
897921
if site_mask is None:
898922
site_mask = random.choice(api.site_mask_ids + (None,))
899923

924+
if period_by == "random_year":
925+
# Add a random_year column to the sample metadata, if there isn't already.
926+
api = add_random_year(api=api)
927+
900928
# Run function under test.
901929
ds = api.snp_allele_frequencies_advanced(
902930
transcript=transcript,
@@ -1002,6 +1030,8 @@ def check_snp_allele_frequencies_advanced(
10021030
expected_freqstr = "M"
10031031
elif period_by == "quarter":
10041032
expected_freqstr = "Q-DEC"
1033+
elif period_by == "random_year":
1034+
expected_freqstr = "Y-DEC"
10051035
else:
10061036
assert False, "not implemented"
10071037
for p in period_values:
@@ -1082,13 +1112,17 @@ def check_aa_allele_frequencies_advanced(
10821112
if area_by is None:
10831113
area_by = random.choice(["country", "admin1_iso", "admin2_name"])
10841114
if period_by is None:
1085-
period_by = random.choice(["year", "quarter", "month"])
1115+
period_by = random.choice(["year", "quarter", "month", "random_year"])
10861116
if sample_sets is None:
10871117
all_sample_sets = api.sample_sets()["sample_set"].to_list()
10881118
sample_sets = random.choice(all_sample_sets)
10891119
if min_cohort_size is None:
10901120
min_cohort_size = random.randint(0, 2)
10911121

1122+
if period_by == "random_year":
1123+
# Add a random_year column to the sample metadata, if there isn't already.
1124+
api = add_random_year(api=api)
1125+
10921126
# Run function under test.
10931127
ds = api.aa_allele_frequencies_advanced(
10941128
transcript=transcript,
@@ -1185,6 +1219,8 @@ def check_aa_allele_frequencies_advanced(
11851219
expected_freqstr = "M"
11861220
elif period_by == "quarter":
11871221
expected_freqstr = "Q-DEC"
1222+
elif period_by == "random_year":
1223+
expected_freqstr = "Y-DEC"
11881224
else:
11891225
assert False, "not implemented"
11901226
for p in period_values:
@@ -1266,7 +1302,7 @@ def test_allele_frequencies_advanced_with_area_by(
12661302
)
12671303

12681304

1269-
@pytest.mark.parametrize("period_by", ["year", "quarter", "month"])
1305+
@pytest.mark.parametrize("period_by", ["year", "quarter", "month", "random_year"])
12701306
@parametrize_with_cases("fixture,api", cases=".")
12711307
def test_allele_frequencies_advanced_with_period_by(
12721308
fixture,

0 commit comments

Comments
 (0)