Skip to content

Commit 1e72acc

Browse files
authored
Merge branch 'master' into fix/docs-typos-and-doi-link
2 parents 42e200f + 1ba2151 commit 1e72acc

5 files changed

Lines changed: 178 additions & 41 deletions

File tree

malariagen_data/anoph/frq_base.py

Lines changed: 53 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -396,39 +396,50 @@ def plot_frequencies_time_series(
396396
# Extract variant labels.
397397
variant_labels = ds["variant_label"].values
398398

399+
# Check if CI variables are available.
400+
has_ci = "event_frequency_ci_low" in ds
401+
399402
# Build a long-form dataframe from the dataset.
400403
dfs = []
401404
for cohort in df_cohorts.itertuples():
402405
ds_cohort = ds.isel(cohorts=cohort.Index)
403-
df = pd.DataFrame(
404-
{
405-
"taxon": cohort.taxon,
406-
"area": cohort.area,
407-
"date": cohort.period_start,
408-
"period": str(
409-
cohort.period
410-
), # use string representation for hover label
411-
"sample_size": cohort.size,
412-
"variant": variant_labels,
413-
"count": ds_cohort["event_count"].values,
414-
"nobs": ds_cohort["event_nobs"].values,
415-
"frequency": ds_cohort["event_frequency"].values,
416-
"frequency_ci_low": ds_cohort["event_frequency_ci_low"].values,
417-
"frequency_ci_upp": ds_cohort["event_frequency_ci_upp"].values,
418-
}
419-
)
406+
cohort_data = {
407+
"taxon": cohort.taxon,
408+
"area": cohort.area,
409+
"date": cohort.period_start,
410+
"period": str(
411+
cohort.period
412+
), # use string representation for hover label
413+
"sample_size": cohort.size,
414+
"variant": variant_labels,
415+
"count": ds_cohort["event_count"].values,
416+
"nobs": ds_cohort["event_nobs"].values,
417+
"frequency": ds_cohort["event_frequency"].values,
418+
}
419+
if has_ci:
420+
cohort_data["frequency_ci_low"] = ds_cohort[
421+
"event_frequency_ci_low"
422+
].values
423+
cohort_data["frequency_ci_upp"] = ds_cohort[
424+
"event_frequency_ci_upp"
425+
].values
426+
df = pd.DataFrame(cohort_data)
420427
dfs.append(df)
421428
df_events = pd.concat(dfs, axis=0).reset_index(drop=True)
422429

423430
# Remove events with no observations.
424431
df_events = df_events.query("nobs > 0").copy()
425432

426-
# Calculate error bars.
427-
frq = df_events["frequency"]
428-
frq_ci_low = df_events["frequency_ci_low"]
429-
frq_ci_upp = df_events["frequency_ci_upp"]
430-
df_events["frequency_error"] = frq_ci_upp - frq
431-
df_events["frequency_error_minus"] = frq - frq_ci_low
433+
# Calculate error bars if CI data is available.
434+
error_y_args = {}
435+
if has_ci:
436+
frq = df_events["frequency"]
437+
frq_ci_low = df_events["frequency_ci_low"]
438+
frq_ci_upp = df_events["frequency_ci_upp"]
439+
df_events["frequency_error"] = frq_ci_upp - frq
440+
df_events["frequency_error_minus"] = frq - frq_ci_low
441+
error_y_args["error_y"] = "frequency_error"
442+
error_y_args["error_y_minus"] = "frequency_error_minus"
432443

433444
# Make a plot.
434445
fig = px.line(
@@ -437,8 +448,7 @@ def plot_frequencies_time_series(
437448
facet_row="area",
438449
x="date",
439450
y="frequency",
440-
error_y="frequency_error",
441-
error_y_minus="frequency_error_minus",
451+
**error_y_args,
442452
color="variant",
443453
markers=True,
444454
hover_name="variant",
@@ -518,19 +528,19 @@ def plot_frequencies_map_markers(
518528
variant_label = variant
519529

520530
# Convert to a dataframe for convenience.
521-
df_markers = ds_variant[
522-
[
523-
"cohort_taxon",
524-
"cohort_area",
525-
"cohort_period",
526-
"cohort_lat_mean",
527-
"cohort_lon_mean",
528-
"cohort_size",
529-
"event_frequency",
530-
"event_frequency_ci_low",
531-
"event_frequency_ci_upp",
532-
]
533-
].to_dataframe()
531+
cols = [
532+
"cohort_taxon",
533+
"cohort_area",
534+
"cohort_period",
535+
"cohort_lat_mean",
536+
"cohort_lon_mean",
537+
"cohort_size",
538+
"event_frequency",
539+
]
540+
has_ci = "event_frequency_ci_low" in ds
541+
if has_ci:
542+
cols += ["event_frequency_ci_low", "event_frequency_ci_upp"]
543+
df_markers = ds_variant[cols].to_dataframe()
534544

535545
# Select data matching taxon and period parameters.
536546
df_markers = df_markers.loc[
@@ -560,8 +570,11 @@ def plot_frequencies_map_markers(
560570
Area: {x.cohort_area} <br/>
561571
Period: {x.cohort_period} <br/>
562572
Sample size: {x.cohort_size} <br/>
563-
Frequency: {x.event_frequency:.0%}
564-
(95% CI: {x.event_frequency_ci_low:.0%} - {x.event_frequency_ci_upp:.0%})
573+
Frequency: {x.event_frequency:.0%}"""
574+
if has_ci:
575+
popup_html += f"""
576+
(95% CI: {x.event_frequency_ci_low:.0%} - {x.event_frequency_ci_upp:.0%})"""
577+
popup_html += """
565578
"""
566579
marker.popup = ipyleaflet.Popup(
567580
child=ipywidgets.HTML(popup_html),

malariagen_data/util.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,12 @@ def __init__(
899899
handler = logging.FileHandler(out)
900900
self._handler = handler
901901

902+
# Remove any pre-existing handlers from the singleton logger to prevent
903+
# accumulation (and FileHandler FD leaks) on repeated instantiation.
904+
for existing_handler in logger.handlers[:]:
905+
logger.removeHandler(existing_handler)
906+
existing_handler.close()
907+
902908
# configure handler
903909
if handler is not None:
904910
if debug:

tests/anoph/test_base.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import io
2+
import logging
3+
14
import numpy as np
25
import pandas as pd
36
import pytest
@@ -8,6 +11,7 @@
811
from malariagen_data import ag3 as _ag3
912
from malariagen_data import adir1 as _adir1
1013
from malariagen_data.anoph.base import AnophelesBase
14+
from malariagen_data.util import LoggingHelper
1115

1216

1317
@pytest.fixture
@@ -258,6 +262,32 @@ def test_lookup_study(fixture, api):
258262
api.lookup_study("foobar")
259263

260264

265+
def test_logging_helper_no_handler_accumulation():
266+
# Regression test: repeated LoggingHelper construction on the same logger
267+
# name must not accumulate handlers (StreamHandler leak, FileHandler FD leak).
268+
logger_name = "test_logging_helper_no_handler_accumulation"
269+
for _ in range(10):
270+
LoggingHelper(name=logger_name, out=io.StringIO())
271+
logger = logging.getLogger(logger_name)
272+
assert (
273+
len(logger.handlers) <= 1
274+
), f"Handler leak: {len(logger.handlers)} handlers after 10 instantiations"
275+
276+
277+
def test_logging_helper_no_duplicate_output():
278+
# Regression test: a message emitted after N instantiations must appear
279+
# exactly once in the output stream.
280+
logger_name = "test_logging_helper_no_duplicate_output"
281+
out = io.StringIO()
282+
for _ in range(5):
283+
helper = LoggingHelper(name=logger_name, out=out)
284+
helper.info("sentinel")
285+
output = out.getvalue()
286+
assert (
287+
output.count("sentinel") == 1
288+
), f"Duplicate log output: 'sentinel' appeared {output.count('sentinel')} times"
289+
290+
261291
def _strip_terms_of_use_from_manifest(manifest_path):
262292
"""Rewrite a manifest TSV file without terms-of-use columns."""
263293
df = pd.read_csv(manifest_path, sep="\t")

tests/anoph/test_frq_base.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,86 @@ def test_does_not_modify_original(self):
9999
taxon_by="taxon",
100100
)
101101
assert df["taxon"].tolist() == original_values
102+
103+
104+
class TestPlotFrequenciesTimeSeriesMissingCI:
105+
"""Tests for plot_frequencies_time_series when CI variables are absent.
106+
107+
See: https://github.com/malariagen/malariagen-data-python/issues/1035
108+
"""
109+
110+
@staticmethod
111+
def _make_ds_without_ci():
112+
"""Create a minimal dataset without CI variables."""
113+
import numpy as np
114+
import xarray as xr
115+
116+
ds = xr.Dataset(
117+
{
118+
"variant_label": ("variants", ["V0", "V1", "V2"]),
119+
"cohort_taxon": ("cohorts", ["gambiae", "coluzzii"]),
120+
"cohort_area": ("cohorts", ["KE-01", "KE-02"]),
121+
"cohort_period": (
122+
"cohorts",
123+
pd.PeriodIndex(["2020", "2021"], freq="Y"),
124+
),
125+
"cohort_period_start": (
126+
"cohorts",
127+
pd.to_datetime(["2020-01-01", "2021-01-01"]),
128+
),
129+
"cohort_size": ("cohorts", [50, 60]),
130+
"event_count": (
131+
("variants", "cohorts"),
132+
np.array([[10, 20], [5, 15], [25, 30]]),
133+
),
134+
"event_nobs": (
135+
("variants", "cohorts"),
136+
np.array([[100, 120], [100, 120], [100, 120]]),
137+
),
138+
"event_frequency": (
139+
("variants", "cohorts"),
140+
np.array([[0.1, 0.167], [0.05, 0.125], [0.25, 0.25]]),
141+
),
142+
}
143+
)
144+
return ds
145+
146+
@staticmethod
147+
def _make_ds_with_ci():
148+
"""Create a minimal dataset with CI variables."""
149+
import numpy as np
150+
151+
ds = TestPlotFrequenciesTimeSeriesMissingCI._make_ds_without_ci()
152+
ds["event_frequency_ci_low"] = (
153+
("variants", "cohorts"),
154+
np.maximum(ds["event_frequency"].values - 0.05, 0),
155+
)
156+
ds["event_frequency_ci_upp"] = (
157+
("variants", "cohorts"),
158+
np.minimum(ds["event_frequency"].values + 0.05, 1),
159+
)
160+
return ds
161+
162+
def test_no_ci_no_error(self):
163+
"""plot_frequencies_time_series should not raise when CI variables are absent."""
164+
import plotly.graph_objects as go
165+
166+
from malariagen_data.anoph.frq_base import AnophelesFrequencyAnalysis
167+
168+
ds = self._make_ds_without_ci()
169+
fig = AnophelesFrequencyAnalysis.plot_frequencies_time_series(
170+
None, ds, show=False
171+
)
172+
assert isinstance(fig, go.Figure)
173+
174+
def test_with_ci_has_error_bars(self):
175+
"""plot_frequencies_time_series should include error bars when CI variables are present."""
176+
import plotly.graph_objects as go
177+
178+
from malariagen_data.anoph.frq_base import AnophelesFrequencyAnalysis
179+
180+
ds = self._make_ds_with_ci()
181+
fig = AnophelesFrequencyAnalysis.plot_frequencies_time_series(
182+
None, ds, show=False
183+
)
184+
assert isinstance(fig, go.Figure)

tests/anoph/test_hap_frq.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,12 @@ def test_hap_frequencies_advanced(
228228
)
229229

230230
# Run the other function under test.
231-
ds_hap = api.haplotypes_frequencies_advanced(**params_advanced)
231+
try:
232+
ds_hap = api.haplotypes_frequencies_advanced(**params_advanced)
233+
except ValueError as e:
234+
if "No SNPs available for the given region" in str(e):
235+
pytest.skip("Random region contained no SNPs")
236+
raise
232237

233238
# Standard checks.
234239
check_hap_frequencies_advanced(api=api, ds=ds_hap)

0 commit comments

Comments
 (0)