|
| 1 | +import pytest |
| 2 | + |
| 3 | +from malariagen_data import Adar1, Region |
| 4 | +from malariagen_data.util import _locate_region, _resolve_region |
| 5 | + |
| 6 | + |
| 7 | +def setup_adar1(url="simplecache::gs://vo_adar_release_master_us_central1/", **kwargs): |
| 8 | + kwargs.setdefault("check_location", False) |
| 9 | + kwargs.setdefault("show_progress", False) |
| 10 | + if url is None: |
| 11 | + # test default URL |
| 12 | + # This only tests the setup_af1 default url, not the Af1 default. |
| 13 | + # The test_anopheles setup_subclass tests true defaults. |
| 14 | + return Adar1(**kwargs) |
| 15 | + if url.startswith("simplecache::"): |
| 16 | + # configure the directory on the local file system to cache data |
| 17 | + kwargs["simplecache"] = dict(cache_storage="gcs_cache") |
| 18 | + return Adar1(url, **kwargs) |
| 19 | + |
| 20 | + |
| 21 | +def test_repr(): |
| 22 | + adar1 = setup_adar1(check_location=True) |
| 23 | + assert isinstance(adar1, Adar1) |
| 24 | + r = repr(adar1) |
| 25 | + assert isinstance(r, str) |
| 26 | + |
| 27 | + |
| 28 | +@pytest.mark.parametrize( |
| 29 | + "region_raw", |
| 30 | + [ |
| 31 | + "2", |
| 32 | + "gene-LOC125950257", |
| 33 | + "2:4871446-4871535", |
| 34 | + "2:2,630,355-2,633,221", |
| 35 | + Region("2", 4871446, 4871535), |
| 36 | + ], |
| 37 | +) |
| 38 | +def test_locate_region(region_raw): |
| 39 | + # TODO Migrate this test. |
| 40 | + adar1 = setup_adar1() |
| 41 | + gene_annotation = adar1.geneset(attributes=["ID"]) |
| 42 | + region = _resolve_region(adar1, region_raw) |
| 43 | + pos = adar1.snp_sites(region=region.contig, field="POS") |
| 44 | + # Used by some code that has not been added yet |
| 45 | + # ref = adar1.snp_sites(region=region.contig, field="REF") |
| 46 | + loc_region = _locate_region(region, pos) |
| 47 | + |
| 48 | + # check types |
| 49 | + assert isinstance(loc_region, slice) |
| 50 | + assert isinstance(region, Region) |
| 51 | + |
| 52 | + # check Region with contig |
| 53 | + if region_raw == "2": |
| 54 | + assert region.contig == "2" |
| 55 | + assert region.start is None |
| 56 | + assert region.end is None |
| 57 | + |
| 58 | + # check that Region goes through unchanged |
| 59 | + if isinstance(region_raw, Region): |
| 60 | + assert region == region_raw |
| 61 | + |
| 62 | + # check that gene name matches coordinates from the geneset and matches gene sequence |
| 63 | + if region_raw == "gene-LOC125950257": |
| 64 | + gene = gene_annotation.query("ID == 'gene-LOC125950257'").squeeze() |
| 65 | + assert region == Region(gene.contig, gene.start, gene.end) |
| 66 | + assert pos[loc_region][0] == gene.start |
| 67 | + assert pos[loc_region][-1] == gene.end |
| 68 | + # To be checked |
| 69 | + # assert ( |
| 70 | + # ref[loc_region][:5].compute() == np.array(["T", "T", "G", "T", "T"]) |
| 71 | + # ).all() |
| 72 | + |
| 73 | + # check string parsing |
| 74 | + if region_raw == "2:4871446-4871535": |
| 75 | + assert region == Region("2", 4871446, 4871535) |
| 76 | + if region_raw == "2:2,630,355-2,633,221": |
| 77 | + assert region == Region("2", 2630355, 2633221) |
0 commit comments