|
1 | 1 | import collections |
2 | 2 | import operator |
3 | 3 |
|
| 4 | +import pandas as pd |
4 | 5 | from Bio.Seq import Seq # type: ignore |
5 | 6 |
|
6 | 7 | VariantEffect = collections.namedtuple( |
@@ -62,7 +63,15 @@ def get_feature(self, feature_id): |
62 | 63 | return self._idx_feature_id.loc[feature_id] |
63 | 64 |
|
64 | 65 | def get_children(self, feature_id): |
65 | | - return self._idx_parent_id.loc[feature_id] |
| 66 | + result = self._idx_parent_id.loc[feature_id] |
| 67 | + # When there is only one child, pandas .loc returns a Series |
| 68 | + # instead of a DataFrame. Ensure we always return a DataFrame |
| 69 | + # so downstream code (e.g. .sort_values, column filtering) works. |
| 70 | + if isinstance(result, pd.Series): |
| 71 | + result = result.to_frame().T |
| 72 | + # Preserve the index name from the parent DataFrame. |
| 73 | + result.index.name = self._idx_parent_id.index.name |
| 74 | + return result |
66 | 75 |
|
67 | 76 | def get_ref_seq(self, chrom, start, stop): |
68 | 77 | """Accepts 1-based coords.""" |
@@ -104,6 +113,17 @@ def get_effects(self, transcript, variants, progress=None): |
104 | 113 | utr3 = list(children[children.type == "three_prime_UTR"].itertuples()) |
105 | 114 | introns = [(x.end + 1, y.start - 1) for x, y in zip(exons[:-1], exons[1:])] |
106 | 115 |
|
| 116 | + # Guard: raise an informative error if the transcript has no CDS |
| 117 | + # regions, as variant effect annotation is not meaningful for |
| 118 | + # non-coding transcripts. |
| 119 | + if len(cdss) == 0 and len(utr5) == 0 and len(utr3) == 0: |
| 120 | + raise ValueError( |
| 121 | + f"Transcript {transcript!r} has no CDS or UTR children. " |
| 122 | + f"Variant effect annotation is only supported for " |
| 123 | + f"protein-coding transcripts. This may indicate " |
| 124 | + f"incomplete or incorrect genome annotations." |
| 125 | + ) |
| 126 | + |
107 | 127 | effect_values = [] |
108 | 128 | impact_values = [] |
109 | 129 | ref_codon_values = [] |
|
0 commit comments