Skip to content

Commit 47583be

Browse files
authored
Merge branch 'master' into GH1005-fix-cnv-fixed-denominator
2 parents f43c70d + c6a63ee commit 47583be

5 files changed

Lines changed: 90 additions & 4 deletions

File tree

.github/workflows/tests.yml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ jobs:
1212
fail-fast: true
1313
matrix:
1414
python-version: ["3.10", "3.11", "3.12"]
15+
numpy-spec:
16+
# Keep this aligned with pyproject.toml: numpy = ">=2.0.2,<2.1"
17+
- "==2.0.2" # locked baseline
18+
- ">=2.0.2,<2.1" # latest allowed in declared range
1519
runs-on: ubuntu-latest
1620

1721
steps:
@@ -23,8 +27,26 @@ jobs:
2327
with:
2428
python-version: ${{ matrix.python-version }}
2529

26-
- name: Verify NumPy version
27-
run: poetry run python -c "import numpy; print('NumPy version:', numpy.__version__)"
30+
- name: Install matrix NumPy version
31+
run: poetry run pip install --upgrade --no-deps "numpy${{ matrix.numpy-spec }}"
32+
33+
- name: Verify NumPy version and spec
34+
env:
35+
NUMPY_SPEC: ${{ matrix.numpy-spec }}
36+
run: |
37+
poetry run python - <<'PY'
38+
import os
39+
import numpy
40+
from packaging.specifiers import SpecifierSet
41+
42+
spec = SpecifierSet(os.environ["NUMPY_SPEC"])
43+
version = numpy.__version__
44+
if version not in spec:
45+
raise RuntimeError(
46+
f"NumPy version {version} does not satisfy matrix spec {spec}"
47+
)
48+
print("NumPy version:", version, "| spec:", spec)
49+
PY
2850
2951
- name: Run unit tests
3052
run: poetry run pytest -v tests --ignore tests/integration --typeguard-packages=malariagen_data,malariagen_data.anoph

malariagen_data/anoph/cnv_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def open_cnv_coverage_calls(
296296
marker = path + "/.zmetadata"
297297
if not self._fs.exists(marker):
298298
raise ValueError(
299-
f"CNV coverage calls analysis f{analysis!r} not implemented for sample set {sample_set!r}"
299+
f"CNV coverage calls analysis {analysis!r} not implemented for sample set {sample_set!r}"
300300
)
301301
store = _init_zarr_store(fs=self._fs, path=path)
302302
root = zarr.open_consolidated(store=store)

malariagen_data/anoph/h12.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,8 @@ def plot_h12_gwss_multi_overlay_track(
558558
show: gplt_params.show = True,
559559
x_range: Optional[gplt_params.x_range] = None,
560560
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
561+
chunks: base_params.chunks = base_params.native_chunks,
562+
inline_array: base_params.inline_array = base_params.inline_array_default,
561563
) -> gplt_params.optional_figure:
562564
cohort_queries = self._setup_cohort_queries(
563565
cohorts=cohorts,
@@ -585,8 +587,11 @@ def plot_h12_gwss_multi_overlay_track(
585587
min_cohort_size=min_cohort_size,
586588
max_cohort_size=max_cohort_size,
587589
sample_query=cohort_query,
590+
sample_query_options=sample_query_options,
588591
sample_sets=sample_sets,
589592
random_seed=random_seed,
593+
chunks=chunks,
594+
inline_array=inline_array,
590595
)
591596

592597
# Determine X axis range.
@@ -679,6 +684,8 @@ def plot_h12_gwss_multi_overlay(
679684
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
680685
gene_labels: Optional[gplt_params.gene_labels] = None,
681686
gene_labelset: Optional[gplt_params.gene_labelset] = None,
687+
chunks: base_params.chunks = base_params.native_chunks,
688+
inline_array: base_params.inline_array = base_params.inline_array_default,
682689
) -> gplt_params.optional_figure:
683690
# Plot GWSS track.
684691
fig1 = self.plot_h12_gwss_multi_overlay_track(
@@ -700,6 +707,8 @@ def plot_h12_gwss_multi_overlay(
700707
height=track_height,
701708
show=False,
702709
output_backend=output_backend,
710+
chunks=chunks,
711+
inline_array=inline_array,
703712
)
704713

705714
fig1.xaxis.visible = False
@@ -764,6 +773,8 @@ def plot_h12_gwss_multi_panel(
764773
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
765774
gene_labels: Optional[gplt_params.gene_labels] = None,
766775
gene_labelset: Optional[gplt_params.gene_labelset] = None,
776+
chunks: base_params.chunks = base_params.native_chunks,
777+
inline_array: base_params.inline_array = base_params.inline_array_default,
767778
) -> gplt_params.optional_figure:
768779
cohort_queries = self._setup_cohort_queries(
769780
cohorts=cohorts,
@@ -789,6 +800,7 @@ def plot_h12_gwss_multi_panel(
789800
window_size=window_size[cohort_label],
790801
sample_sets=sample_sets,
791802
sample_query=cohort_query,
803+
sample_query_options=sample_query_options,
792804
cohort_size=cohort_size,
793805
min_cohort_size=min_cohort_size,
794806
max_cohort_size=max_cohort_size,
@@ -799,6 +811,8 @@ def plot_h12_gwss_multi_panel(
799811
height=track_height,
800812
show=False,
801813
output_backend=output_backend,
814+
chunks=chunks,
815+
inline_array=inline_array,
802816
)
803817
if i > 0:
804818
track = self.plot_h12_gwss_track(x_range=figs[0].x_range, **params)

malariagen_data/veff.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import collections
22
import operator
33

4+
import pandas as pd
45
from Bio.Seq import Seq # type: ignore
56

67
VariantEffect = collections.namedtuple(
@@ -62,7 +63,15 @@ def get_feature(self, feature_id):
6263
return self._idx_feature_id.loc[feature_id]
6364

6465
def get_children(self, feature_id):
65-
return self._idx_parent_id.loc[feature_id]
66+
result = self._idx_parent_id.loc[feature_id]
67+
# When there is only one child, pandas .loc returns a Series
68+
# instead of a DataFrame. Ensure we always return a DataFrame
69+
# so downstream code (e.g. .sort_values, column filtering) works.
70+
if isinstance(result, pd.Series):
71+
result = result.to_frame().T
72+
# Preserve the index name from the parent DataFrame.
73+
result.index.name = self._idx_parent_id.index.name
74+
return result
6675

6776
def get_ref_seq(self, chrom, start, stop):
6877
"""Accepts 1-based coords."""
@@ -104,6 +113,17 @@ def get_effects(self, transcript, variants, progress=None):
104113
utr3 = list(children[children.type == "three_prime_UTR"].itertuples())
105114
introns = [(x.end + 1, y.start - 1) for x, y in zip(exons[:-1], exons[1:])]
106115

116+
# Guard: raise an informative error if the transcript has no CDS
117+
# regions, as variant effect annotation is not meaningful for
118+
# non-coding transcripts.
119+
if len(cdss) == 0 and len(utr5) == 0 and len(utr3) == 0:
120+
raise ValueError(
121+
f"Transcript {transcript!r} has no CDS or UTR children. "
122+
f"Variant effect annotation is only supported for "
123+
f"protein-coding transcripts. This may indicate "
124+
f"incomplete or incorrect genome annotations."
125+
)
126+
107127
effect_values = []
108128
impact_values = []
109129
ref_codon_values = []

tests/anoph/test_h12.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,36 @@ def test_h12_gwss_multi_with_analysis(fixture, api: AnophelesH12Analysis):
330330
api.plot_h12_gwss_multi_panel(**params)
331331

332332

333+
@parametrize_with_cases("fixture,api", cases=".")
334+
def test_h12_gwss_multi_param_forwarding(fixture, api: AnophelesH12Analysis):
335+
"""Verify sample_query_options, chunks, and inline_array are
336+
forwarded through multi-cohort H12 plotting functions."""
337+
all_sample_sets = api.sample_sets()["sample_set"].to_list()
338+
all_countries = api.sample_metadata()["country"].unique().tolist()
339+
country1, country2 = random.sample(all_countries, 2)
340+
cohort1_query = f"country == '{country1}'"
341+
cohort2_query = f"country == '{country2}'"
342+
343+
h12_params = dict(
344+
contig=random.choice(api.contigs),
345+
sample_sets=all_sample_sets,
346+
window_size=200,
347+
min_cohort_size=1,
348+
cohorts={"cohort1": cohort1_query, "cohort2": cohort2_query},
349+
sample_query_options={"engine": "python"},
350+
chunks="auto",
351+
inline_array=False,
352+
)
353+
354+
# Test multi-overlay — should not raise.
355+
fig = api.plot_h12_gwss_multi_overlay(**h12_params, show=False)
356+
assert isinstance(fig, bokeh.models.GridPlot)
357+
358+
# Test multi-panel — should not raise.
359+
fig = api.plot_h12_gwss_multi_panel(**h12_params, show=False)
360+
assert isinstance(fig, bokeh.models.GridPlot)
361+
362+
333363
def test_garud_h12_empty_window():
334364
import numpy as np
335365
from malariagen_data.anoph.h12 import _garud_h12

0 commit comments

Comments
 (0)