|
| 1 | +import numpy as np |
| 2 | +import pytest |
| 3 | + |
| 4 | +from malariagen_data import Adir1, Region |
| 5 | +from malariagen_data.util import locate_region, resolve_region |
| 6 | + |
| 7 | + |
| 8 | +def setup_adir1( |
| 9 | + url="simplecache::gs://vo_adir_production_us_central1/release/", **kwargs |
| 10 | +): |
| 11 | + kwargs.setdefault("check_location", False) |
| 12 | + kwargs.setdefault("show_progress", False) |
| 13 | + if url is None: |
| 14 | + # test default URL |
| 15 | + # This only tests the setup_af1 default url, not the Af1 default. |
| 16 | + # The test_anopheles setup_subclass tests true defaults. |
| 17 | + return Adir1(**kwargs) |
| 18 | + if url.startswith("simplecache::"): |
| 19 | + # configure the directory on the local file system to cache data |
| 20 | + kwargs["simplecache"] = dict(cache_storage="gcs_cache") |
| 21 | + return Adir1(url, **kwargs) |
| 22 | + |
| 23 | + |
| 24 | +def test_repr(): |
| 25 | + adir1 = setup_adir1(check_location=True) |
| 26 | + assert isinstance(adir1, Adir1) |
| 27 | + r = repr(adir1) |
| 28 | + assert isinstance(r, str) |
| 29 | + |
| 30 | + |
| 31 | +@pytest.mark.parametrize( |
| 32 | + "region_raw", |
| 33 | + [ |
| 34 | + "ADIR015707", |
| 35 | + "KB672490", |
| 36 | + "KB672490:4871446-4871535", |
| 37 | + "KB672490:2,630,355-2,633,221", |
| 38 | + Region("KB672490", 4871446, 4871535), |
| 39 | + ], |
| 40 | +) |
| 41 | +def test_locate_region(region_raw): |
| 42 | + # TODO Migrate this test. |
| 43 | + adir1 = setup_adir1() |
| 44 | + gene_annotation = adir1.geneset(attributes=["ID"]) |
| 45 | + region = resolve_region(adir1, region_raw) |
| 46 | + pos = adir1.snp_sites(region=region.contig, field="POS") |
| 47 | + ref = adir1.snp_sites(region=region.contig, field="REF") |
| 48 | + loc_region = locate_region(region, pos) |
| 49 | + |
| 50 | + # check types |
| 51 | + assert isinstance(loc_region, slice) |
| 52 | + assert isinstance(region, Region) |
| 53 | + |
| 54 | + # check Region with contig |
| 55 | + if region_raw == "KB672490": |
| 56 | + assert region.contig == "KB672490" |
| 57 | + assert region.start is None |
| 58 | + assert region.end is None |
| 59 | + |
| 60 | + # check that Region goes through unchanged |
| 61 | + if isinstance(region_raw, Region): |
| 62 | + assert region == region_raw |
| 63 | + |
| 64 | + # check that gene name matches coordinates from the geneset and matches gene sequence |
| 65 | + if region_raw == "ADIR015707": |
| 66 | + gene = gene_annotation.query("ID == 'ADIR015707'").squeeze() |
| 67 | + assert region == Region(gene.contig, gene.start, gene.end) |
| 68 | + assert pos[loc_region][0] == gene.start |
| 69 | + assert pos[loc_region][-1] == gene.end |
| 70 | + assert ( |
| 71 | + ref[loc_region][:5].compute() |
| 72 | + == np.array(["T", "T", "T", "C", "T"], dtype="S1") |
| 73 | + ).all() |
| 74 | + |
| 75 | + # check string parsing |
| 76 | + if region_raw == "KB672490:4871446-4871535": |
| 77 | + assert region == Region("KB672490", 4871446, 4871535) |
| 78 | + if region_raw == "KB672490:2,630,355-2,633,221": |
| 79 | + assert region == Region("KB672490", 2630355, 2633221) |
0 commit comments