|
| 1 | +"""Tests for the Annotator genome cache in veff.py.""" |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | + |
| 6 | +from malariagen_data.veff import Annotator |
| 7 | + |
| 8 | + |
| 9 | +def _make_genome(contigs): |
| 10 | + """Create a minimal mock genome (dict-like) mapping contig names to |
| 11 | + numpy byte arrays.""" |
| 12 | + genome = {} |
| 13 | + for name in contigs: |
| 14 | + seq = np.frombuffer(f"ATCGATCG{name}".encode(), dtype="S1") |
| 15 | + genome[name] = seq |
| 16 | + return genome |
| 17 | + |
| 18 | + |
| 19 | +def _make_genome_features(): |
| 20 | + """Return a minimal genome_features DataFrame with the required columns.""" |
| 21 | + return pd.DataFrame( |
| 22 | + { |
| 23 | + "ID": ["gene1"], |
| 24 | + "Parent": ["root"], |
| 25 | + "type": ["gene"], |
| 26 | + "start": [1], |
| 27 | + "end": [100], |
| 28 | + "contig": ["chr1"], |
| 29 | + "strand": ["+"], |
| 30 | + } |
| 31 | + ) |
| 32 | + |
| 33 | + |
| 34 | +class TestGenomeCacheDefaultMaxsize: |
| 35 | + """Verify that the default cache maxsize is 5.""" |
| 36 | + |
| 37 | + def test_default_maxsize(self): |
| 38 | + genome = _make_genome(["chr1"]) |
| 39 | + ann = Annotator(genome=genome, genome_features=_make_genome_features()) |
| 40 | + cache_info = ann._load_genome_seq.cache_info() |
| 41 | + assert cache_info.maxsize == 5 |
| 42 | + |
| 43 | + |
| 44 | +class TestGenomeCacheLRUEviction: |
| 45 | + """Verify that the LRU cache evicts the oldest entry when full.""" |
| 46 | + |
| 47 | + def test_eviction(self): |
| 48 | + contigs = ["chr1", "chr2", "chr3"] |
| 49 | + genome = _make_genome(contigs) |
| 50 | + ann = Annotator( |
| 51 | + genome=genome, |
| 52 | + genome_features=_make_genome_features(), |
| 53 | + genome_cache_maxsize=2, |
| 54 | + ) |
| 55 | + |
| 56 | + # Load all three contigs in order. |
| 57 | + for c in contigs: |
| 58 | + ann._load_genome_seq(c) |
| 59 | + |
| 60 | + info = ann._load_genome_seq.cache_info() |
| 61 | + # Only 2 entries should remain (chr2 and chr3). |
| 62 | + assert info.currsize == 2 |
| 63 | + # 3 total misses (each first access is a miss). |
| 64 | + assert info.misses == 3 |
| 65 | + |
| 66 | + # Accessing chr1 again should be a miss because it was evicted. |
| 67 | + ann._load_genome_seq("chr1") |
| 68 | + info = ann._load_genome_seq.cache_info() |
| 69 | + assert info.misses == 4 |
| 70 | + |
| 71 | + |
| 72 | +class TestClearGenomeCache: |
| 73 | + """Verify that clear_genome_cache() empties the cache.""" |
| 74 | + |
| 75 | + def test_clear(self): |
| 76 | + genome = _make_genome(["chr1", "chr2"]) |
| 77 | + ann = Annotator(genome=genome, genome_features=_make_genome_features()) |
| 78 | + |
| 79 | + ann._load_genome_seq("chr1") |
| 80 | + ann._load_genome_seq("chr2") |
| 81 | + assert ann._load_genome_seq.cache_info().currsize == 2 |
| 82 | + |
| 83 | + ann.clear_genome_cache() |
| 84 | + assert ann._load_genome_seq.cache_info().currsize == 0 |
| 85 | + |
| 86 | + |
| 87 | +class TestGenomeCacheUnbounded: |
| 88 | + """Verify that maxsize=None gives an unbounded cache.""" |
| 89 | + |
| 90 | + def test_unbounded(self): |
| 91 | + contigs = [f"chr{i}" for i in range(20)] |
| 92 | + genome = _make_genome(contigs) |
| 93 | + ann = Annotator( |
| 94 | + genome=genome, |
| 95 | + genome_features=_make_genome_features(), |
| 96 | + genome_cache_maxsize=None, |
| 97 | + ) |
| 98 | + |
| 99 | + for c in contigs: |
| 100 | + ann._load_genome_seq(c) |
| 101 | + |
| 102 | + info = ann._load_genome_seq.cache_info() |
| 103 | + assert info.maxsize is None |
| 104 | + assert info.currsize == 20 |
| 105 | + |
| 106 | + |
| 107 | +class TestPerInstanceCacheIsolation: |
| 108 | + """Verify that two Annotator instances have independent caches.""" |
| 109 | + |
| 110 | + def test_isolation(self): |
| 111 | + genome = _make_genome(["chr1", "chr2"]) |
| 112 | + features = _make_genome_features() |
| 113 | + |
| 114 | + ann1 = Annotator(genome=genome, genome_features=features) |
| 115 | + ann2 = Annotator(genome=genome, genome_features=features) |
| 116 | + |
| 117 | + ann1._load_genome_seq("chr1") |
| 118 | + assert ann1._load_genome_seq.cache_info().currsize == 1 |
| 119 | + assert ann2._load_genome_seq.cache_info().currsize == 0 |
| 120 | + |
| 121 | + ann2._load_genome_seq("chr2") |
| 122 | + assert ann1._load_genome_seq.cache_info().currsize == 1 |
| 123 | + assert ann2._load_genome_seq.cache_info().currsize == 1 |
| 124 | + |
| 125 | + ann1.clear_genome_cache() |
| 126 | + assert ann1._load_genome_seq.cache_info().currsize == 0 |
| 127 | + assert ann2._load_genome_seq.cache_info().currsize == 1 |
0 commit comments