Merge branch 'master' into issue-848-numpy-matrix

jonbrenas · web-flow · commit 77851ee584c3 · 2026-03-03T10:39:26.000Z
diff --git a/malariagen_data/anoph/cnv_data.py b/malariagen_data/anoph/cnv_data.py
@@ -296,7 +296,7 @@ def open_cnv_coverage_calls(
             marker = path + "/.zmetadata"
             if not self._fs.exists(marker):
                 raise ValueError(
-                    f"CNV coverage calls analysis f{analysis!r} not implemented for sample set {sample_set!r}"
+                    f"CNV coverage calls analysis {analysis!r} not implemented for sample set {sample_set!r}"
                 )
             store = _init_zarr_store(fs=self._fs, path=path)
             root = zarr.open_consolidated(store=store)
diff --git a/malariagen_data/veff.py b/malariagen_data/veff.py
@@ -1,6 +1,7 @@
 import collections
 import operator
 
+import pandas as pd
 from Bio.Seq import Seq  # type: ignore
 
 VariantEffect = collections.namedtuple(
@@ -62,7 +63,15 @@ def get_feature(self, feature_id):
         return self._idx_feature_id.loc[feature_id]
 
     def get_children(self, feature_id):
-        return self._idx_parent_id.loc[feature_id]
+        result = self._idx_parent_id.loc[feature_id]
+        # When there is only one child, pandas .loc returns a Series
+        # instead of a DataFrame. Ensure we always return a DataFrame
+        # so downstream code (e.g. .sort_values, column filtering) works.
+        if isinstance(result, pd.Series):
+            result = result.to_frame().T
+            # Preserve the index name from the parent DataFrame.
+            result.index.name = self._idx_parent_id.index.name
+        return result
 
     def get_ref_seq(self, chrom, start, stop):
         """Accepts 1-based coords."""
@@ -104,6 +113,17 @@ def get_effects(self, transcript, variants, progress=None):
         utr3 = list(children[children.type == "three_prime_UTR"].itertuples())
         introns = [(x.end + 1, y.start - 1) for x, y in zip(exons[:-1], exons[1:])]
 
+        # Guard: raise an informative error if the transcript has no CDS
+        # regions, as variant effect annotation is not meaningful for
+        # non-coding transcripts.
+        if len(cdss) == 0 and len(utr5) == 0 and len(utr3) == 0:
+            raise ValueError(
+                f"Transcript {transcript!r} has no CDS or UTR children. "
+                f"Variant effect annotation is only supported for "
+                f"protein-coding transcripts. This may indicate "
+                f"incomplete or incorrect genome annotations."
+            )
+
         effect_values = []
         impact_values = []
         ref_codon_values = []

Original file line number	Diff line number	Diff line change
`@@ -296,7 +296,7 @@ def open_cnv_coverage_calls(`
`296`	`296`	`marker = path + "/.zmetadata"`
`297`	`297`	`if not self._fs.exists(marker):`
`298`	`298`	`raise ValueError(`
`299`		`- f"CNV coverage calls analysis f{analysis!r} not implemented for sample set {sample_set!r}"`
	`299`	`+ f"CNV coverage calls analysis {analysis!r} not implemented for sample set {sample_set!r}"`
`300`	`300`	`)`
`301`	`301`	`store = _init_zarr_store(fs=self._fs, path=path)`
`302`	`302`	`root = zarr.open_consolidated(store=store)`