Extend test_gene_cnv_frequencies_advanced_with_period_by() to include period_by as random_year

leehart · leehart · commit 27e4b7293a2c · 2025-03-06T15:29:05.000Z
diff --git a/tests/anoph/test_cnv_frq.py b/tests/anoph/test_cnv_frq.py
@@ -17,6 +17,7 @@
     check_plot_frequencies_time_series_with_taxa,
     check_plot_frequencies_time_series_with_areas,
     check_plot_frequencies_interactive_map,
+    add_random_year,
 )
 
 
@@ -479,7 +480,7 @@ def test_gene_cnv_frequencies_advanced_with_area_by(
     )
 
 
-@pytest.mark.parametrize("period_by", ["year", "quarter", "month"])
+@pytest.mark.parametrize("period_by", ["year", "quarter", "month", "random_year"])
 @parametrize_with_cases("fixture,api", cases=".")
 def test_gene_cnv_frequencies_advanced_with_period_by(
     fixture,
@@ -711,13 +712,17 @@ def check_gene_cnv_frequencies_advanced(
     if area_by is None:
         area_by = random.choice(["country", "admin1_iso", "admin2_name"])
     if period_by is None:
-        period_by = random.choice(["year", "quarter", "month"])
+        period_by = random.choice(["year", "quarter", "month", "random_year"])
     if sample_sets is None:
         all_sample_sets = api.sample_sets()["sample_set"].to_list()
         sample_sets = random.choice(all_sample_sets)
     if min_cohort_size is None:
         min_cohort_size = random.randint(0, 2)
 
+    if period_by == "random_year":
+        # Add a random_year column to the sample metadata, if there isn't already.
+        api = add_random_year(api=api)
+
     # Run function under test.
     ds = api.gene_cnv_frequencies_advanced(
         region=region,
@@ -816,6 +821,8 @@ def check_gene_cnv_frequencies_advanced(
         expected_freqstr = "M"
     elif period_by == "quarter":
         expected_freqstr = "Q-DEC"
+    elif period_by == "random_year":
+        expected_freqstr = "Y-DEC"
     else:
         assert False, "not implemented"
     for p in period_values:
diff --git a/tests/anoph/test_frq.py b/tests/anoph/test_frq.py
@@ -1,6 +1,8 @@
 import pytest
 import plotly.graph_objects as go  # type: ignore
 
+import numpy as np
+import pandas as pd
 import random
 
 
@@ -91,3 +93,27 @@ def check_plot_frequencies_interactive_map(api, ds):
 
     # Test.
     assert isinstance(fig, ipywidgets.Widget)
+
+
+def add_random_year(*, api):
+    # Add a 'random_year' column to the sample_metadata, if it doesn't exist.
+
+    # Get the existing sample metadata.
+    sample_metadata_df = api.sample_metadata()
+
+    # Only create the new column if it doesn't already exist.
+    # Otherwise we'll get multiple columns with different suffixes, e.g. 'random_year_x' and 'random_year_y'.
+    if "random_year" not in sample_metadata_df.columns:
+        # Avoid "ValueError: No cohorts available" by selecting only a few different years at random.
+        selected_years = random.sample(range(1900, 2100), 3)
+        random_years_as_list = np.random.choice(selected_years, len(sample_metadata_df))
+        random_years_as_period_index = pd.PeriodIndex(random_years_as_list, freq="Y")
+        extra_metadata_df = pd.DataFrame(
+            {
+                "sample_id": sample_metadata_df["sample_id"],
+                "random_year": random_years_as_period_index,
+            }
+        )
+        api.add_extra_metadata(extra_metadata_df)
+
+    return api
diff --git a/tests/anoph/test_snp_frq.py b/tests/anoph/test_snp_frq.py
@@ -18,6 +18,7 @@
     check_plot_frequencies_time_series_with_taxa,
     check_plot_frequencies_time_series_with_areas,
     check_plot_frequencies_interactive_map,
+    add_random_year,
 )
 
 
@@ -114,30 +115,6 @@ def random_transcript(*, api):
     return transcript
 
 
-def add_random_year(*, api):
-    # Add a 'random_year' column to the sample_metadata, if it doesn't exist.
-
-    # Get the existing sample metadata.
-    sample_metadata_df = api.sample_metadata()
-
-    # Only create the new column if it doesn't already exist.
-    # Otherwise we'll get multiple columns with different suffixes, e.g. 'random_year_x' and 'random_year_y'.
-    if "random_year" not in sample_metadata_df.columns:
-        # Avoid "ValueError: No cohorts available" by selecting only a few different years at random.
-        selected_years = random.sample(range(1900, 2100), 3)
-        random_years_as_list = np.random.choice(selected_years, len(sample_metadata_df))
-        random_years_as_period_index = pd.PeriodIndex(random_years_as_list, freq="Y")
-        extra_metadata_df = pd.DataFrame(
-            {
-                "sample_id": sample_metadata_df["sample_id"],
-                "random_year": random_years_as_period_index,
-            }
-        )
-        api.add_extra_metadata(extra_metadata_df)
-
-    return api
-
-
 @parametrize_with_cases("fixture,api", cases=".")
 def test_snp_effects(fixture, api: AnophelesSnpFrequencyAnalysis):
     # Pick a random transcript.