Skip to content

Commit d946f3f

Browse files
authored
Merge branch 'master' into fix/veff-replace-asserts-with-exceptions
2 parents 2291062 + 1430baf commit d946f3f

13 files changed

Lines changed: 1176 additions & 5 deletions

File tree

.github/workflows/tests.yml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ jobs:
1212
fail-fast: true
1313
matrix:
1414
python-version: ["3.10", "3.11", "3.12"]
15+
numpy-spec:
16+
# Keep this aligned with pyproject.toml: numpy = ">=2.0.2,<2.1"
17+
- "==2.0.2" # locked baseline
18+
- ">=2.0.2,<2.1" # latest allowed in declared range
1519
runs-on: ubuntu-latest
1620

1721
steps:
@@ -23,8 +27,26 @@ jobs:
2327
with:
2428
python-version: ${{ matrix.python-version }}
2529

26-
- name: Verify NumPy version
27-
run: poetry run python -c "import numpy; print('NumPy version:', numpy.__version__)"
30+
- name: Install matrix NumPy version
31+
run: poetry run pip install --upgrade --no-deps "numpy${{ matrix.numpy-spec }}"
32+
33+
- name: Verify NumPy version and spec
34+
env:
35+
NUMPY_SPEC: ${{ matrix.numpy-spec }}
36+
run: |
37+
poetry run python - <<'PY'
38+
import os
39+
import numpy
40+
from packaging.specifiers import SpecifierSet
41+
42+
spec = SpecifierSet(os.environ["NUMPY_SPEC"])
43+
version = numpy.__version__
44+
if version not in spec:
45+
raise RuntimeError(
46+
f"NumPy version {version} does not satisfy matrix spec {spec}"
47+
)
48+
print("NumPy version:", version, "| spec:", spec)
49+
PY
2850
2951
- name: Run unit tests
3052
run: poetry run pytest -v tests --ignore tests/integration --typeguard-packages=malariagen_data,malariagen_data.anoph

malariagen_data/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# flake8: noqa
2+
from .adar1 import Adar1
23
from .adir1 import Adir1
34
from .af1 import Af1
45
from .ag3 import Ag3

malariagen_data/adar1.py

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
import sys
2+
3+
import plotly.express as px # type: ignore
4+
5+
import malariagen_data
6+
from .anopheles import AnophelesDataResource
7+
8+
MAJOR_VERSION_NUMBER = 1
9+
MAJOR_VERSION_PATH = "v1.0"
10+
CONFIG_PATH = "v1.0-config.json"
11+
GCS_DEFAULT_URL = "gs://vo_adar_release_master_us_central1/"
12+
GCS_DEFAULT_PUBLIC_URL = "gs://vo_adar_release_master_us_central1/"
13+
GCS_REGION_URLS = {
14+
"us-central1": "gs://vo_adar_release_master_us_central1",
15+
}
16+
17+
TAXON_PALETTE = px.colors.qualitative.Plotly
18+
TAXON_COLORS = {
19+
"darlingi": TAXON_PALETTE[0],
20+
}
21+
22+
XPEHH_GWSS_CACHE_NAME = "adar1_xpehh_gwss_v1"
23+
IHS_GWSS_CACHE_NAME = "adar1_ihs_gwss_v1"
24+
ROH_HMM_CACHE_NAME = "adar1_roh_hmm_v1"
25+
26+
27+
class Adar1(AnophelesDataResource):
28+
"""Provides access to data from Adar1.0 releases.
29+
30+
Parameters
31+
----------
32+
url : str, optional
33+
Base path to data. Defaults to use Google Cloud Storage, or can
34+
be a local path on your file system if data have been downloaded.
35+
site_filters_analysis : str, optional
36+
Site filters analysis version.
37+
bokeh_output_notebook : bool, optional
38+
If True (default), configure bokeh to output plots to the notebook.
39+
results_cache : str, optional
40+
Path to directory on local file system to save results.
41+
log : str or stream, optional
42+
File path or stream output for logging messages.
43+
debug : bool, optional
44+
Set to True to enable debug level logging.
45+
show_progress : bool, optional
46+
If True, show a progress bar during longer-running computations. The default can be overridden using an environmental variable named MGEN_SHOW_PROGRESS.
47+
check_location : bool, optional
48+
If True, use ipinfo to check the location of the client system.
49+
**kwargs
50+
Passed through to fsspec when setting up file system access.
51+
52+
Examples
53+
--------
54+
Access data from Google Cloud Storage (default):
55+
56+
>>> import malariagen_data
57+
>>> adar1 = malariagen_data.Adar1()
58+
59+
Access data downloaded to a local file system:
60+
61+
>>> adar1 = malariagen_data.Adar1("/local/path/to/vo_adar_release/")
62+
63+
Access data from Google Cloud Storage, with caching on the local file system
64+
in a directory named "gcs_cache":
65+
66+
>>> adar1 = malariagen_data.Adar1(
67+
... "simplecache::gs://vo_adar_release_master_us_central1",
68+
... simplecache=dict(cache_storage="gcs_cache"),
69+
... )
70+
71+
Set up caching of some longer-running computations on the local file system,
72+
in a directory named "results_cache":
73+
74+
>>> adar1 = malariagen_data.Adar1(results_cache="results_cache")
75+
76+
"""
77+
78+
_xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME
79+
_ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME
80+
_roh_hmm_cache_name = ROH_HMM_CACHE_NAME
81+
82+
def __init__(
83+
self,
84+
url=None,
85+
public_url=GCS_DEFAULT_PUBLIC_URL,
86+
bokeh_output_notebook=True,
87+
results_cache=None,
88+
log=sys.stdout,
89+
debug=False,
90+
show_progress=None,
91+
check_location=True,
92+
cohorts_analysis=None,
93+
site_filters_analysis=None,
94+
discordant_read_calls_analysis=None,
95+
pre=False,
96+
tqdm_class=None,
97+
unrestricted_use_only=False,
98+
surveillance_use_only=False,
99+
**storage_options,
100+
):
101+
super().__init__(
102+
url=url,
103+
public_url=public_url,
104+
config_path=CONFIG_PATH,
105+
cohorts_analysis=cohorts_analysis,
106+
aim_analysis=None,
107+
aim_metadata_dtype=None,
108+
aim_ids=None,
109+
aim_palettes=None,
110+
site_filters_analysis=site_filters_analysis,
111+
discordant_read_calls_analysis=discordant_read_calls_analysis,
112+
default_site_mask="darlingi",
113+
default_phasing_analysis="darlingi",
114+
default_coverage_calls_analysis="darlingi",
115+
bokeh_output_notebook=bokeh_output_notebook,
116+
results_cache=results_cache,
117+
log=log,
118+
debug=debug,
119+
show_progress=show_progress,
120+
check_location=check_location,
121+
pre=pre,
122+
gcs_default_url=GCS_DEFAULT_URL,
123+
gcs_region_urls=GCS_REGION_URLS,
124+
major_version_number=MAJOR_VERSION_NUMBER,
125+
major_version_path=MAJOR_VERSION_PATH,
126+
gff_gene_type="gene",
127+
gff_gene_name_attribute="Note",
128+
gff_default_attributes=("ID", "Parent", "Note", "description"),
129+
storage_options=storage_options,
130+
tqdm_class=tqdm_class,
131+
taxon_colors=TAXON_COLORS,
132+
virtual_contigs=None,
133+
gene_names=None,
134+
inversion_tag_path=None,
135+
unrestricted_use_only=unrestricted_use_only,
136+
surveillance_use_only=surveillance_use_only,
137+
)
138+
139+
def __repr__(self):
140+
text = (
141+
f"<MalariaGEN Adar1 API client>\n"
142+
f"Storage URL : {self._url}\n"
143+
f"Data releases available : {', '.join(self._available_releases)}\n"
144+
f"Results cache : {self._results_cache}\n"
145+
f"Cohorts analysis : {self._cohorts_analysis}\n"
146+
f"Site filters analysis : {self._site_filters_analysis}\n"
147+
f"Software version : malariagen_data {malariagen_data.__version__}\n"
148+
f"Client location : {self.client_location}\n"
149+
f"Data filtered to unrestricted use only: {self._unrestricted_use_only}\n"
150+
f"Data filtered to surveillance use only: {self._surveillance_use_only}\n"
151+
f"Relevant data releases : {', '.join(self.releases)}\n"
152+
f"---\n"
153+
f"Please note that data are subject to terms of use,\n"
154+
f"for more information see https://www.malariagen.net/data\n"
155+
f"or contact support@malariagen.net. For API documentation see \n"
156+
f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adir1.html"
157+
)
158+
return text
159+
160+
def _repr_html_(self):
161+
html = f"""
162+
<table class="malariagen-adar1">
163+
<thead>
164+
<tr>
165+
<th style="text-align: left" colspan="2">MalariaGEN Adar1 API client</th>
166+
</tr>
167+
<tr><td colspan="2" style="text-align: left">
168+
Please note that data are subject to terms of use,
169+
for more information see <a href="https://www.malariagen.net/data">
170+
the MalariaGEN website</a> or contact support@malariagen.net.
171+
See also the <a href="https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adir1.html">Adir1 API docs</a>.
172+
</td></tr>
173+
</thead>
174+
<tbody>
175+
<tr>
176+
<th style="text-align: left">
177+
Storage URL
178+
</th>
179+
<td>{self._url}</td>
180+
</tr>
181+
<tr>
182+
<th style="text-align: left">
183+
Data releases available
184+
</th>
185+
<td>{', '.join(self._available_releases)}</td>
186+
</tr>
187+
<tr>
188+
<th style="text-align: left">
189+
Results cache
190+
</th>
191+
<td>{self._results_cache}</td>
192+
</tr>
193+
<tr>
194+
<th style="text-align: left">
195+
Cohorts analysis
196+
</th>
197+
<td>{self._cohorts_analysis}</td>
198+
</tr>
199+
<tr>
200+
<th style="text-align: left">
201+
Site filters analysis
202+
</th>
203+
<td>{self._site_filters_analysis}</td>
204+
</tr>
205+
<tr>
206+
<th style="text-align: left">
207+
Software version
208+
</th>
209+
<td>malariagen_data {malariagen_data.__version__}</td>
210+
</tr>
211+
<tr>
212+
<th style="text-align: left">
213+
Client location
214+
</th>
215+
<td>{self.client_location}</td>
216+
</tr>
217+
<tr>
218+
<th style="text-align: left">
219+
Data filtered for unrestricted use only
220+
</th>
221+
<td>{self._unrestricted_use_only}</td>
222+
</tr>
223+
<tr>
224+
<th style="text-align: left">
225+
Data filtered for surveillance use only
226+
</th>
227+
<td>{self._surveillance_use_only}</td>
228+
</tr>
229+
<tr>
230+
<th style="text-align: left">
231+
Relevant data releases
232+
</th>
233+
<td>{', '.join(self.releases)}</td>
234+
</tr>
235+
</tbody>
236+
</table>
237+
"""
238+
return html

0 commit comments

Comments
 (0)