From ede67b37af5da4e2588b215933ea442f5fc35c03 Mon Sep 17 00:00:00 2001 From: Tanisha Date: Sun, 19 Apr 2026 01:32:16 +0530 Subject: [PATCH 1/2] fix: classify in-frame complex variants (MNP+INDEL) as CODON_CHANGE in veff.py --- malariagen_data/veff.py | 7 +++++-- tests/test_veff.py | 45 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/malariagen_data/veff.py b/malariagen_data/veff.py index f3699792c..3b08825d6 100644 --- a/malariagen_data/veff.py +++ b/malariagen_data/veff.py @@ -444,9 +444,12 @@ def _get_within_cds_effect(ann, base_effect, cds, cdss): effect = base_effect._replace(effect="CODON_CHANGE", impact="MODERATE") else: - # TODO in-frame complex variation (MNP + INDEL) + # In-frame complex variation (MNP + INDEL combined), where the + # net length change is a multiple of 3, so no frameshift occurs. + # Classified as CODON_CHANGE with MODERATE impact, consistent + # with how pure MNPs are handled above. effect = base_effect._replace( - effect="TODO in-frame complex variation (MNP + INDEL)", impact="UNKNOWN" + effect="CODON_CHANGE", impact="MODERATE" ) return effect diff --git a/tests/test_veff.py b/tests/test_veff.py index dfe036979..48776d8a0 100644 --- a/tests/test_veff.py +++ b/tests/test_veff.py @@ -683,3 +683,48 @@ def test_isolation(self): ann1.clear_genome_cache() assert ann1._load_genome_seq.cache_info().currsize == 0 assert ann2._load_genome_seq.cache_info().currsize == 1 + + +# ── TestInFrameComplexVariant ───────────────────────────────────────────────── + + +class TestInFrameComplexVariant: + """ + Verify that in-frame complex variants (MNP + INDEL combined, where the + net length change is a multiple of 3) are classified as CODON_CHANGE + with MODERATE impact — not the old TODO string with UNKNOWN impact. + + The forward-strand CDS is: ATG|GCC|TTA|CAG|TGA (pos 4-18) + We use pos 7 (start of GCC codon) as our test position. + + Example variant: + ref = "GCC" (3 bp, the whole Ala codon at pos 7-9) + alt = "GCCATG" (6 bp — same start codon, plus a new Met codon inserted) + net length change = 6 - 3 = 3, which is a multiple of 3 → in-frame + len(ref) = 3 > 1, len(alt) = 6 > 1 → not a simple insertion or deletion + len(ref) != len(alt) → not a pure MNP + This hits the previously-unhandled else branch. + """ + + def setup_method(self): + self.ann = Annotator(make_genome(("chr1", FWD_SEQ)), FEATURES_BASIC_FWD.copy()) + + def test_inframe_complex_classified_as_codon_change(self): + # ref="GCC" (3 bp), alt="GCCATG" (6 bp): net change = +3, in-frame + row = _run(self.ann, "tx1", 7, "GCC", "GCCATG") + assert row["effect"] == "CODON_CHANGE" + assert row["impact"] == "MODERATE" + + def test_inframe_complex_not_todo_string(self): + # Make sure the old TODO string is never returned + row = _run(self.ann, "tx1", 7, "GCC", "GCCATG") + assert "TODO" not in str(row["effect"]) + assert row["impact"] != "UNKNOWN" + + def test_inframe_complex_deletion_classified_as_codon_change(self): + # ref="GCCTTA" (6 bp), alt="GCC" (3 bp): net change = -3, in-frame + # len(ref)=6 > 1, len(alt)=3 > 1 → not a simple deletion + row = _run(self.ann, "tx1", 7, "GCCTTA", "GCC") + assert row["effect"] == "CODON_CHANGE" + assert row["impact"] == "MODERATE" + assert "TODO" not in str(row["effect"]) \ No newline at end of file From 5342a95f3d27962a52dacf20ccca434b3f1e262e Mon Sep 17 00:00:00 2001 From: Tanisha Date: Sun, 19 Apr 2026 22:28:54 +0530 Subject: [PATCH 2/2] style: fix ruff-format and end-of-file issues --- malariagen_data/veff.py | 4 +--- tests/test_veff.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/malariagen_data/veff.py b/malariagen_data/veff.py index 3b08825d6..32c5e16c2 100644 --- a/malariagen_data/veff.py +++ b/malariagen_data/veff.py @@ -448,9 +448,7 @@ def _get_within_cds_effect(ann, base_effect, cds, cdss): # net length change is a multiple of 3, so no frameshift occurs. # Classified as CODON_CHANGE with MODERATE impact, consistent # with how pure MNPs are handled above. - effect = base_effect._replace( - effect="CODON_CHANGE", impact="MODERATE" - ) + effect = base_effect._replace(effect="CODON_CHANGE", impact="MODERATE") return effect diff --git a/tests/test_veff.py b/tests/test_veff.py index 48776d8a0..d4b2b753f 100644 --- a/tests/test_veff.py +++ b/tests/test_veff.py @@ -727,4 +727,4 @@ def test_inframe_complex_deletion_classified_as_codon_change(self): row = _run(self.ann, "tx1", 7, "GCCTTA", "GCC") assert row["effect"] == "CODON_CHANGE" assert row["impact"] == "MODERATE" - assert "TODO" not in str(row["effect"]) \ No newline at end of file + assert "TODO" not in str(row["effect"])