Skip to content

Commit 31a0c31

Browse files
init As1 class file hooray
1 parent 3c2ee64 commit 31a0c31

1 file changed

Lines changed: 237 additions & 0 deletions

File tree

malariagen_data/as1.py

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
import sys
2+
3+
import plotly.express as px # type: ignore
4+
5+
import malariagen_data
6+
from .anopheles import AnophelesDataResource
7+
8+
MAJOR_VERSION_NUMBER = 1
9+
MAJOR_VERSION_PATH = "v1.0"
10+
CONFIG_PATH = "v1.0-config.json"
11+
GCS_DEFAULT_URL = "gs://vo_aste_release_master_us_central1/"
12+
GCS_DEFAULT_PUBLIC_URL = "gs://vo_aste_release_master_us_central1/"
13+
GCS_REGION_URLS = {
14+
"us-central1": "gs://vo_aste_release_master_us_central1",
15+
}
16+
17+
TAXON_PALETTE = px.colors.qualitative.Plotly
18+
TAXON_COLORS = {
19+
"dirus": TAXON_PALETTE[0],
20+
}
21+
22+
XPEHH_GWSS_CACHE_NAME = "as1_xpehh_gwss_v1"
23+
IHS_GWSS_CACHE_NAME = "as1_ihs_gwss_v1"
24+
ROH_HMM_CACHE_NAME = "as1_roh_hmm_v1"
25+
26+
27+
class As1(AnophelesDataResource):
28+
"""Provides access to data from As1.0 releases.
29+
30+
Parameters
31+
----------
32+
url : str, optional
33+
Base path to data. Defaults to use Google Cloud Storage, or can
34+
be a local path on your file system if data have been downloaded.
35+
site_filters_analysis : str, optional
36+
Site filters analysis version.
37+
bokeh_output_notebook : bool, optional
38+
If True (default), configure bokeh to output plots to the notebook.
39+
results_cache : str, optional
40+
Path to directory on local file system to save results.
41+
log : str or stream, optional
42+
File path or stream output for logging messages.
43+
debug : bool, optional
44+
Set to True to enable debug level logging.
45+
show_progress : bool, optional
46+
If True, show a progress bar during longer-running computations. The default can be overridden using an environmental variable named MGEN_SHOW_PROGRESS.
47+
check_location : bool, optional
48+
If True, use ipinfo to check the location of the client system.
49+
**kwargs
50+
Passed through to fsspec when setting up file system access.
51+
52+
Examples
53+
--------
54+
Access data from Google Cloud Storage (default):
55+
56+
>>> import malariagen_data
57+
>>> adir1 = malariagen_data.As1()
58+
59+
Access data downloaded to a local file system:
60+
61+
>>> adir1 = malariagen_data.As1("/local/path/to/vo_as_release/")
62+
63+
Access data from Google Cloud Storage, with caching on the local file system
64+
in a directory named "gcs_cache":
65+
66+
>>> as1 = malariagen_data.As1(
67+
... "simplecache::gs://vo_aste_release_master_us_central1",
68+
... simplecache=dict(cache_storage="gcs_cache"),
69+
... )
70+
71+
Set up caching of some longer-running computations on the local file system,
72+
in a directory named "results_cache":
73+
74+
>>> as1 = malariagen_data.As1(results_cache="results_cache")
75+
76+
"""
77+
78+
_xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME
79+
_ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME
80+
_roh_hmm_cache_name = ROH_HMM_CACHE_NAME
81+
82+
def __init__(
83+
self,
84+
url=None,
85+
public_url=GCS_DEFAULT_PUBLIC_URL,
86+
bokeh_output_notebook=True,
87+
results_cache=None,
88+
log=sys.stdout,
89+
debug=False,
90+
show_progress=None,
91+
check_location=True,
92+
cohorts_analysis=None,
93+
site_filters_analysis=None,
94+
discordant_read_calls_analysis=None,
95+
pre=False,
96+
tqdm_class=None,
97+
unrestricted_use_only=False,
98+
surveillance_use_only=False,
99+
**storage_options,
100+
):
101+
super().__init__(
102+
url=url,
103+
public_url=public_url,
104+
config_path=CONFIG_PATH,
105+
cohorts_analysis=cohorts_analysis,
106+
aim_analysis=None,
107+
aim_metadata_dtype=None,
108+
aim_ids=None,
109+
aim_palettes=None,
110+
site_filters_analysis=site_filters_analysis,
111+
discordant_read_calls_analysis=discordant_read_calls_analysis,
112+
default_site_mask="stephensi",
113+
default_phasing_analysis="stephensi",
114+
default_coverage_calls_analysis="stephensi",
115+
bokeh_output_notebook=bokeh_output_notebook,
116+
results_cache=results_cache,
117+
log=log,
118+
debug=debug,
119+
show_progress=show_progress,
120+
check_location=check_location,
121+
pre=pre,
122+
gcs_default_url=GCS_DEFAULT_URL,
123+
gcs_region_urls=GCS_REGION_URLS,
124+
major_version_number=MAJOR_VERSION_NUMBER,
125+
major_version_path=MAJOR_VERSION_PATH,
126+
gff_gene_type="protein_coding_gene",
127+
gff_gene_name_attribute="Note",
128+
gff_default_attributes=("ID", "Parent", "Note", "description"),
129+
storage_options=storage_options,
130+
tqdm_class=tqdm_class,
131+
taxon_colors=TAXON_COLORS,
132+
virtual_contigs=None,
133+
inversion_tag_path=None,
134+
unrestricted_use_only=unrestricted_use_only,
135+
surveillance_use_only=surveillance_use_only,
136+
)
137+
138+
def __repr__(self):
139+
text = (
140+
f"<MalariaGEN As1 API client>\n"
141+
f"Storage URL : {self._url}\n"
142+
f"Data releases available : {', '.join(self._available_releases)}\n"
143+
f"Results cache : {self._results_cache}\n"
144+
f"Cohorts analysis : {self._cohorts_analysis}\n"
145+
f"Site filters analysis : {self._site_filters_analysis}\n"
146+
f"Software version : malariagen_data {malariagen_data.__version__}\n"
147+
f"Client location : {self.client_location}\n"
148+
f"Data filtered to unrestricted use only: {self._unrestricted_use_only}\n"
149+
f"Data filtered to surveillance use only: {self._surveillance_use_only}\n"
150+
f"Relevant data releases : {', '.join(self.releases)}\n"
151+
f"---\n"
152+
f"Please note that data are subject to terms of use,\n"
153+
f"for more information see https://www.malariagen.net/data\n"
154+
f"or contact support@malariagen.net. For API documentation see \n"
155+
f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/As1.html"
156+
)
157+
return text
158+
159+
def _repr_html_(self):
160+
html = f"""
161+
<table class="malariagen-as1">
162+
<thead>
163+
<tr>
164+
<th style="text-align: left" colspan="2">MalariaGEN As1 API client</th>
165+
</tr>
166+
<tr><td colspan="2" style="text-align: left">
167+
Please note that data are subject to terms of use,
168+
for more information see <a href="https://www.malariagen.net/data">
169+
the MalariaGEN website</a> or contact support@malariagen.net.
170+
See also the <a href="https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/As1.html">As1 API docs</a>.
171+
</td></tr>
172+
</thead>
173+
<tbody>
174+
<tr>
175+
<th style="text-align: left">
176+
Storage URL
177+
</th>
178+
<td>{self._url}</td>
179+
</tr>
180+
<tr>
181+
<th style="text-align: left">
182+
Data releases available
183+
</th>
184+
<td>{", ".join(self._available_releases)}</td>
185+
</tr>
186+
<tr>
187+
<th style="text-align: left">
188+
Results cache
189+
</th>
190+
<td>{self._results_cache}</td>
191+
</tr>
192+
<tr>
193+
<th style="text-align: left">
194+
Cohorts analysis
195+
</th>
196+
<td>{self._cohorts_analysis}</td>
197+
</tr>
198+
<tr>
199+
<th style="text-align: left">
200+
Site filters analysis
201+
</th>
202+
<td>{self._site_filters_analysis}</td>
203+
</tr>
204+
<tr>
205+
<th style="text-align: left">
206+
Software version
207+
</th>
208+
<td>malariagen_data {malariagen_data.__version__}</td>
209+
</tr>
210+
<tr>
211+
<th style="text-align: left">
212+
Client location
213+
</th>
214+
<td>{self.client_location}</td>
215+
</tr>
216+
<tr>
217+
<th style="text-align: left">
218+
Data filtered for unrestricted use only
219+
</th>
220+
<td>{self._unrestricted_use_only}</td>
221+
</tr>
222+
<tr>
223+
<th style="text-align: left">
224+
Data filtered for surveillance use only
225+
</th>
226+
<td>{self._surveillance_use_only}</td>
227+
</tr>
228+
<tr>
229+
<th style="text-align: left">
230+
Relevant data releases
231+
</th>
232+
<td>{", ".join(self.releases)}</td>
233+
</tr>
234+
</tbody>
235+
</table>
236+
"""
237+
return html

0 commit comments

Comments
 (0)