Skip to content

Commit d5dd128

Browse files
committed
Guard CI variable access in frequency plotting functions
1 parent 1430baf commit d5dd128

3 files changed

Lines changed: 142 additions & 41 deletions

File tree

malariagen_data/anoph/frq_base.py

Lines changed: 53 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -393,39 +393,50 @@ def plot_frequencies_time_series(
393393
# Extract variant labels.
394394
variant_labels = ds["variant_label"].values
395395

396+
# Check if CI variables are available.
397+
has_ci = "event_frequency_ci_low" in ds
398+
396399
# Build a long-form dataframe from the dataset.
397400
dfs = []
398401
for cohort in df_cohorts.itertuples():
399402
ds_cohort = ds.isel(cohorts=cohort.Index)
400-
df = pd.DataFrame(
401-
{
402-
"taxon": cohort.taxon,
403-
"area": cohort.area,
404-
"date": cohort.period_start,
405-
"period": str(
406-
cohort.period
407-
), # use string representation for hover label
408-
"sample_size": cohort.size,
409-
"variant": variant_labels,
410-
"count": ds_cohort["event_count"].values,
411-
"nobs": ds_cohort["event_nobs"].values,
412-
"frequency": ds_cohort["event_frequency"].values,
413-
"frequency_ci_low": ds_cohort["event_frequency_ci_low"].values,
414-
"frequency_ci_upp": ds_cohort["event_frequency_ci_upp"].values,
415-
}
416-
)
403+
cohort_data = {
404+
"taxon": cohort.taxon,
405+
"area": cohort.area,
406+
"date": cohort.period_start,
407+
"period": str(
408+
cohort.period
409+
), # use string representation for hover label
410+
"sample_size": cohort.size,
411+
"variant": variant_labels,
412+
"count": ds_cohort["event_count"].values,
413+
"nobs": ds_cohort["event_nobs"].values,
414+
"frequency": ds_cohort["event_frequency"].values,
415+
}
416+
if has_ci:
417+
cohort_data["frequency_ci_low"] = ds_cohort[
418+
"event_frequency_ci_low"
419+
].values
420+
cohort_data["frequency_ci_upp"] = ds_cohort[
421+
"event_frequency_ci_upp"
422+
].values
423+
df = pd.DataFrame(cohort_data)
417424
dfs.append(df)
418425
df_events = pd.concat(dfs, axis=0).reset_index(drop=True)
419426

420427
# Remove events with no observations.
421428
df_events = df_events.query("nobs > 0").copy()
422429

423-
# Calculate error bars.
424-
frq = df_events["frequency"]
425-
frq_ci_low = df_events["frequency_ci_low"]
426-
frq_ci_upp = df_events["frequency_ci_upp"]
427-
df_events["frequency_error"] = frq_ci_upp - frq
428-
df_events["frequency_error_minus"] = frq - frq_ci_low
430+
# Calculate error bars if CI data is available.
431+
error_y_args = {}
432+
if has_ci:
433+
frq = df_events["frequency"]
434+
frq_ci_low = df_events["frequency_ci_low"]
435+
frq_ci_upp = df_events["frequency_ci_upp"]
436+
df_events["frequency_error"] = frq_ci_upp - frq
437+
df_events["frequency_error_minus"] = frq - frq_ci_low
438+
error_y_args["error_y"] = "frequency_error"
439+
error_y_args["error_y_minus"] = "frequency_error_minus"
429440

430441
# Make a plot.
431442
fig = px.line(
@@ -434,8 +445,7 @@ def plot_frequencies_time_series(
434445
facet_row="area",
435446
x="date",
436447
y="frequency",
437-
error_y="frequency_error",
438-
error_y_minus="frequency_error_minus",
448+
**error_y_args,
439449
color="variant",
440450
markers=True,
441451
hover_name="variant",
@@ -515,19 +525,19 @@ def plot_frequencies_map_markers(
515525
variant_label = variant
516526

517527
# Convert to a dataframe for convenience.
518-
df_markers = ds_variant[
519-
[
520-
"cohort_taxon",
521-
"cohort_area",
522-
"cohort_period",
523-
"cohort_lat_mean",
524-
"cohort_lon_mean",
525-
"cohort_size",
526-
"event_frequency",
527-
"event_frequency_ci_low",
528-
"event_frequency_ci_upp",
529-
]
530-
].to_dataframe()
528+
cols = [
529+
"cohort_taxon",
530+
"cohort_area",
531+
"cohort_period",
532+
"cohort_lat_mean",
533+
"cohort_lon_mean",
534+
"cohort_size",
535+
"event_frequency",
536+
]
537+
has_ci = "event_frequency_ci_low" in ds
538+
if has_ci:
539+
cols += ["event_frequency_ci_low", "event_frequency_ci_upp"]
540+
df_markers = ds_variant[cols].to_dataframe()
531541

532542
# Select data matching taxon and period parameters.
533543
df_markers = df_markers.loc[
@@ -557,8 +567,11 @@ def plot_frequencies_map_markers(
557567
Area: {x.cohort_area} <br/>
558568
Period: {x.cohort_period} <br/>
559569
Sample size: {x.cohort_size} <br/>
560-
Frequency: {x.event_frequency:.0%}
561-
(95% CI: {x.event_frequency_ci_low:.0%} - {x.event_frequency_ci_upp:.0%})
570+
Frequency: {x.event_frequency:.0%}"""
571+
if has_ci:
572+
popup_html += f"""
573+
(95% CI: {x.event_frequency_ci_low:.0%} - {x.event_frequency_ci_upp:.0%})"""
574+
popup_html += """
562575
"""
563576
marker.popup = ipyleaflet.Popup(
564577
child=ipywidgets.HTML(popup_html),

tests/anoph/test_frq_base.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,86 @@ def test_does_not_modify_original(self):
9999
taxon_by="taxon",
100100
)
101101
assert df["taxon"].tolist() == original_values
102+
103+
104+
class TestPlotFrequenciesTimeSeriesMissingCI:
105+
"""Tests for plot_frequencies_time_series when CI variables are absent.
106+
107+
See: https://github.com/malariagen/malariagen-data-python/issues/1035
108+
"""
109+
110+
@staticmethod
111+
def _make_ds_without_ci():
112+
"""Create a minimal dataset without CI variables."""
113+
import numpy as np
114+
import xarray as xr
115+
116+
ds = xr.Dataset(
117+
{
118+
"variant_label": ("variants", ["V0", "V1", "V2"]),
119+
"cohort_taxon": ("cohorts", ["gambiae", "coluzzii"]),
120+
"cohort_area": ("cohorts", ["KE-01", "KE-02"]),
121+
"cohort_period": (
122+
"cohorts",
123+
pd.PeriodIndex(["2020", "2021"], freq="Y"),
124+
),
125+
"cohort_period_start": (
126+
"cohorts",
127+
pd.to_datetime(["2020-01-01", "2021-01-01"]),
128+
),
129+
"cohort_size": ("cohorts", [50, 60]),
130+
"event_count": (
131+
("variants", "cohorts"),
132+
np.array([[10, 20], [5, 15], [25, 30]]),
133+
),
134+
"event_nobs": (
135+
("variants", "cohorts"),
136+
np.array([[100, 120], [100, 120], [100, 120]]),
137+
),
138+
"event_frequency": (
139+
("variants", "cohorts"),
140+
np.array([[0.1, 0.167], [0.05, 0.125], [0.25, 0.25]]),
141+
),
142+
}
143+
)
144+
return ds
145+
146+
@staticmethod
147+
def _make_ds_with_ci():
148+
"""Create a minimal dataset with CI variables."""
149+
import numpy as np
150+
151+
ds = TestPlotFrequenciesTimeSeriesMissingCI._make_ds_without_ci()
152+
ds["event_frequency_ci_low"] = (
153+
("variants", "cohorts"),
154+
np.maximum(ds["event_frequency"].values - 0.05, 0),
155+
)
156+
ds["event_frequency_ci_upp"] = (
157+
("variants", "cohorts"),
158+
np.minimum(ds["event_frequency"].values + 0.05, 1),
159+
)
160+
return ds
161+
162+
def test_no_ci_no_error(self):
163+
"""plot_frequencies_time_series should not raise when CI variables are absent."""
164+
import plotly.graph_objects as go
165+
166+
from malariagen_data.anoph.frq_base import AnophelesFrequencyAnalysis
167+
168+
ds = self._make_ds_without_ci()
169+
fig = AnophelesFrequencyAnalysis.plot_frequencies_time_series(
170+
None, ds, show=False
171+
)
172+
assert isinstance(fig, go.Figure)
173+
174+
def test_with_ci_has_error_bars(self):
175+
"""plot_frequencies_time_series should include error bars when CI variables are present."""
176+
import plotly.graph_objects as go
177+
178+
from malariagen_data.anoph.frq_base import AnophelesFrequencyAnalysis
179+
180+
ds = self._make_ds_with_ci()
181+
fig = AnophelesFrequencyAnalysis.plot_frequencies_time_series(
182+
None, ds, show=False
183+
)
184+
assert isinstance(fig, go.Figure)

tests/anoph/test_hap_frq.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,12 @@ def test_hap_frequencies_advanced(
228228
)
229229

230230
# Run the other function under test.
231-
ds_hap = api.haplotypes_frequencies_advanced(**params_advanced)
231+
try:
232+
ds_hap = api.haplotypes_frequencies_advanced(**params_advanced)
233+
except ValueError as e:
234+
if "No SNPs available for the given region" in str(e):
235+
pytest.skip("Random region contained no SNPs")
236+
raise
232237

233238
# Standard checks.
234239
check_hap_frequencies_advanced(api=api, ds=ds_hap)

0 commit comments

Comments
 (0)