Skip to content

Commit 77851ee

Browse files
authored
Merge branch 'master' into issue-848-numpy-matrix
2 parents 9f21e09 + 0c9aa64 commit 77851ee

2 files changed

Lines changed: 22 additions & 2 deletions

File tree

malariagen_data/anoph/cnv_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def open_cnv_coverage_calls(
296296
marker = path + "/.zmetadata"
297297
if not self._fs.exists(marker):
298298
raise ValueError(
299-
f"CNV coverage calls analysis f{analysis!r} not implemented for sample set {sample_set!r}"
299+
f"CNV coverage calls analysis {analysis!r} not implemented for sample set {sample_set!r}"
300300
)
301301
store = _init_zarr_store(fs=self._fs, path=path)
302302
root = zarr.open_consolidated(store=store)

malariagen_data/veff.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import collections
22
import operator
33

4+
import pandas as pd
45
from Bio.Seq import Seq # type: ignore
56

67
VariantEffect = collections.namedtuple(
@@ -62,7 +63,15 @@ def get_feature(self, feature_id):
6263
return self._idx_feature_id.loc[feature_id]
6364

6465
def get_children(self, feature_id):
65-
return self._idx_parent_id.loc[feature_id]
66+
result = self._idx_parent_id.loc[feature_id]
67+
# When there is only one child, pandas .loc returns a Series
68+
# instead of a DataFrame. Ensure we always return a DataFrame
69+
# so downstream code (e.g. .sort_values, column filtering) works.
70+
if isinstance(result, pd.Series):
71+
result = result.to_frame().T
72+
# Preserve the index name from the parent DataFrame.
73+
result.index.name = self._idx_parent_id.index.name
74+
return result
6675

6776
def get_ref_seq(self, chrom, start, stop):
6877
"""Accepts 1-based coords."""
@@ -104,6 +113,17 @@ def get_effects(self, transcript, variants, progress=None):
104113
utr3 = list(children[children.type == "three_prime_UTR"].itertuples())
105114
introns = [(x.end + 1, y.start - 1) for x, y in zip(exons[:-1], exons[1:])]
106115

116+
# Guard: raise an informative error if the transcript has no CDS
117+
# regions, as variant effect annotation is not meaningful for
118+
# non-coding transcripts.
119+
if len(cdss) == 0 and len(utr5) == 0 and len(utr3) == 0:
120+
raise ValueError(
121+
f"Transcript {transcript!r} has no CDS or UTR children. "
122+
f"Variant effect annotation is only supported for "
123+
f"protein-coding transcripts. This may indicate "
124+
f"incomplete or incorrect genome annotations."
125+
)
126+
107127
effect_values = []
108128
impact_values = []
109129
ref_codon_values = []

0 commit comments

Comments
 (0)