Skip to content

Commit d078091

Browse files
authored
Merge pull request #967 from suhr25/fix-snp-cache-leaks
Fix unbounded _cache_locate_site_class growth and instance retention via class-level lru_cache
2 parents 8e2fce4 + a5a5ccd commit d078091

2 files changed

Lines changed: 78 additions & 10 deletions

File tree

malariagen_data/anoph/snp_data.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@
3939
from .genome_sequence import AnophelesGenomeSequenceData
4040
from .sample_metadata import AnophelesSampleMetadata
4141

42+
# Maximum number of entries kept in the per-instance _cache_locate_site_class
43+
# dict. The natural ceiling is n_contigs × n_site_classes (≈ 45 for Ag3), so
44+
# 64 gives comfortable headroom without allowing unbounded growth.
45+
_LOCATE_SITE_CLASS_CACHE_MAXSIZE = 64
46+
4247

4348
class AnophelesSnpData(
4449
AnophelesSampleMetadata, AnophelesGenomeFeaturesData, AnophelesGenomeSequenceData
@@ -69,6 +74,14 @@ def __init__(
6974
self._cache_site_annotations = None
7075
self._cache_locate_site_class: Dict = dict()
7176

77+
# Create the SNP-calls cache as a per-instance lru_cache wrapping the
78+
# bound method. Storing it on the instance (rather than using a
79+
# class-level @lru_cache decorator) means:
80+
# 1. `self` is not part of the cache key, so old instances are freed
81+
# normally when the caller drops their reference.
82+
# 2. Different instances have independent, non-interfering caches.
83+
self._cached_snp_calls = lru_cache(maxsize=2)(self._raw_snp_calls)
84+
7285
@property
7386
def _site_filters_analysis(self) -> Optional[str]:
7487
if self._site_filters_analysis_override:
@@ -929,6 +942,13 @@ def _locate_site_class(
929942

930943
self._cache_locate_site_class[cache_key] = loc_ann
931944

945+
# Evict the oldest entry when the cache exceeds its size limit.
946+
# Plain dicts preserve insertion order (Python 3.7+), so the first
947+
# key is always the oldest.
948+
while len(self._cache_locate_site_class) > _LOCATE_SITE_CLASS_CACHE_MAXSIZE:
949+
oldest = next(iter(self._cache_locate_site_class))
950+
del self._cache_locate_site_class[oldest]
951+
932952
return loc_ann
933953

934954
def _snp_calls_for_contig(
@@ -1089,16 +1109,7 @@ def snp_calls(
10891109
chunks=chunks,
10901110
)
10911111

1092-
# Here we cache to improve performance for functions which
1093-
# access SNP calls more than once. For example, this currently
1094-
# happens during access of biallelic SNP calls, because a
1095-
# first computation of allele counts is required, before
1096-
# then using that to filter SNP calls.
1097-
#
1098-
# We only cache up to 2 items because otherwise we can see
1099-
# high memory usage.
1100-
@lru_cache(maxsize=2)
1101-
def _cached_snp_calls(
1112+
def _raw_snp_calls(
11021113
self,
11031114
*,
11041115
regions: Tuple[Region, ...],

tests/anoph/test_snp_data.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,63 @@ def test_snp_calls_with_site_class_param(ag3_sim_api: AnophelesSnpData, site_cla
950950
assert ds2.sizes["variants"] < ds1.sizes["variants"]
951951

952952

953+
def test_locate_site_class_cache_is_bounded(ag3_sim_api: AnophelesSnpData):
954+
"""_cache_locate_site_class must never grow beyond _LOCATE_SITE_CLASS_CACHE_MAXSIZE
955+
even when all contigs and site classes are exercised in a single session."""
956+
from malariagen_data.anoph.snp_data import _LOCATE_SITE_CLASS_CACHE_MAXSIZE
957+
958+
site_classes = [
959+
"CDS_DEG_4",
960+
"CDS_DEG_2_SIMPLE",
961+
"CDS_DEG_0",
962+
"INTRON_SHORT",
963+
"INTRON_LONG",
964+
"INTRON_SPLICE_5PRIME",
965+
"INTRON_SPLICE_3PRIME",
966+
"UTR_5PRIME",
967+
"UTR_3PRIME",
968+
"INTERGENIC",
969+
]
970+
for contig in ag3_sim_api.contigs:
971+
for site_class in site_classes:
972+
ag3_sim_api.snp_calls(region=contig, site_class=site_class)
973+
974+
assert len(ag3_sim_api._cache_locate_site_class) <= _LOCATE_SITE_CLASS_CACHE_MAXSIZE
975+
976+
977+
def test_snp_calls_cache_is_per_instance(ag3_sim_api: AnophelesSnpData):
978+
"""_cached_snp_calls must be a per-instance lru_cache, not a class-level one.
979+
980+
A class-level @lru_cache stores `self` as a key in a class-global dict,
981+
which prevents garbage collection of stale API instances and leaks all their
982+
subcaches. The fix stores the cache on the instance in __init__, so each
983+
object has its own independent cache that is freed with the object.
984+
"""
985+
# (1) The cache wrapper must live on the instance, not on the class.
986+
assert "_cached_snp_calls" in ag3_sim_api.__dict__, (
987+
"_cached_snp_calls should be an instance attribute (per-instance lru_cache), "
988+
"not a class-level descriptor"
989+
)
990+
991+
# (2) It must be a real lru_cache wrapper (exposes cache_info / cache_clear).
992+
assert hasattr(ag3_sim_api._cached_snp_calls, "cache_info")
993+
assert hasattr(ag3_sim_api._cached_snp_calls, "cache_clear")
994+
995+
# (3) Populate the cache and confirm it registers hits.
996+
ag3_sim_api.snp_calls(region="3L")
997+
ag3_sim_api.snp_calls(region="3L") # second call — should be a cache hit
998+
info = ag3_sim_api._cached_snp_calls.cache_info()
999+
assert info.currsize > 0
1000+
assert info.hits >= 1
1001+
1002+
# (4) The class itself must NOT own _cached_snp_calls (it must not be a
1003+
# class-level descriptor installed by @lru_cache).
1004+
assert "_cached_snp_calls" not in AnophelesSnpData.__dict__, (
1005+
"_cached_snp_calls must not be a class-level attribute; "
1006+
"a class-level @lru_cache would pin `self` in a global cache dict"
1007+
)
1008+
1009+
9531010
@pytest.mark.parametrize("chrom", ["2RL", "3RL"])
9541011
def test_snp_calls_with_virtual_contigs(ag3_sim_api, chrom):
9551012
api = ag3_sim_api

0 commit comments

Comments
 (0)