Skip to content

Commit ad4f661

Browse files
add initial tests for adir1
1 parent 10e0548 commit ad4f661

1 file changed

Lines changed: 79 additions & 0 deletions

File tree

tests/integration/test_adir1.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import numpy as np
2+
import pytest
3+
4+
from malariagen_data import Adir1, Region
5+
from malariagen_data.util import locate_region, resolve_region
6+
7+
8+
def setup_adir1(
9+
url="simplecache::gs://vo_adir_production_us_central1/release/", **kwargs
10+
):
11+
kwargs.setdefault("check_location", False)
12+
kwargs.setdefault("show_progress", False)
13+
if url is None:
14+
# test default URL
15+
# This only tests the setup_af1 default url, not the Af1 default.
16+
# The test_anopheles setup_subclass tests true defaults.
17+
return Adir1(**kwargs)
18+
if url.startswith("simplecache::"):
19+
# configure the directory on the local file system to cache data
20+
kwargs["simplecache"] = dict(cache_storage="gcs_cache")
21+
return Adir1(url, **kwargs)
22+
23+
24+
def test_repr():
25+
adir1 = setup_adir1(check_location=True)
26+
assert isinstance(adir1, Adir1)
27+
r = repr(adir1)
28+
assert isinstance(r, str)
29+
30+
31+
@pytest.mark.parametrize(
32+
"region_raw",
33+
[
34+
"ADIR015707",
35+
"KB672490",
36+
"KB672490:4871446-4871535",
37+
"KB672490:2,630,355-2,633,221",
38+
Region("KB672490", 4871446, 4871535),
39+
],
40+
)
41+
def test_locate_region(region_raw):
42+
# TODO Migrate this test.
43+
adir1 = setup_adir1()
44+
gene_annotation = adir1.geneset(attributes=["ID"])
45+
region = resolve_region(adir1, region_raw)
46+
pos = adir1.snp_sites(region=region.contig, field="POS")
47+
ref = adir1.snp_sites(region=region.contig, field="REF")
48+
loc_region = locate_region(region, pos)
49+
50+
# check types
51+
assert isinstance(loc_region, slice)
52+
assert isinstance(region, Region)
53+
54+
# check Region with contig
55+
if region_raw == "KB672490":
56+
assert region.contig == "KB672490"
57+
assert region.start is None
58+
assert region.end is None
59+
60+
# check that Region goes through unchanged
61+
if isinstance(region_raw, Region):
62+
assert region == region_raw
63+
64+
# check that gene name matches coordinates from the geneset and matches gene sequence
65+
if region_raw == "ADIR015707":
66+
gene = gene_annotation.query("ID == 'ADIR015707'").squeeze()
67+
assert region == Region(gene.contig, gene.start, gene.end)
68+
assert pos[loc_region][0] == gene.start
69+
assert pos[loc_region][-1] == gene.end
70+
assert (
71+
ref[loc_region][:5].compute()
72+
== np.array(["T", "T", "T", "C", "T"], dtype="S1")
73+
).all()
74+
75+
# check string parsing
76+
if region_raw == "KB672490:4871446-4871535":
77+
assert region == Region("KB672490", 4871446, 4871535)
78+
if region_raw == "KB672490:2,630,355-2,633,221":
79+
assert region == Region("KB672490", 2630355, 2633221)

0 commit comments

Comments
 (0)