Skip to content

Commit 784ad48

Browse files
Jon BrenasJon Brenas
authored andcommitted
Created the main file - no tests yet
1 parent 2d63f0d commit 784ad48

1 file changed

Lines changed: 230 additions & 0 deletions

File tree

malariagen_data/adar1.py

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import sys
2+
3+
import plotly.express as px # type: ignore
4+
5+
import malariagen_data
6+
from .anopheles import AnophelesDataResource
7+
8+
MAJOR_VERSION_NUMBER = 1
9+
MAJOR_VERSION_PATH = "v1.0"
10+
CONFIG_PATH = "v1.0-config.json"
11+
GCS_DEFAULT_URL = "gs://vo_adar_release_master_us_central1/"
12+
GCS_DEFAULT_PUBLIC_URL = "gs://vo_adar_release_master_us_central1/"
13+
GCS_REGION_URLS = {
14+
"us-central1": "gs://vo_adar_release_master_us_central1",
15+
}
16+
17+
TAXON_PALETTE = px.colors.qualitative.Plotly
18+
TAXON_COLORS = {
19+
"darlingi": TAXON_PALETTE[0],
20+
}
21+
22+
23+
class Adar1(AnophelesDataResource):
24+
"""Provides access to data from Adar1.0 releases.
25+
26+
Parameters
27+
----------
28+
url : str, optional
29+
Base path to data. Defaults to use Google Cloud Storage, or can
30+
be a local path on your file system if data have been downloaded.
31+
site_filters_analysis : str, optional
32+
Site filters analysis version.
33+
bokeh_output_notebook : bool, optional
34+
If True (default), configure bokeh to output plots to the notebook.
35+
results_cache : str, optional
36+
Path to directory on local file system to save results.
37+
log : str or stream, optional
38+
File path or stream output for logging messages.
39+
debug : bool, optional
40+
Set to True to enable debug level logging.
41+
show_progress : bool, optional
42+
If True, show a progress bar during longer-running computations. The default can be overridden using an environmental variable named MGEN_SHOW_PROGRESS.
43+
check_location : bool, optional
44+
If True, use ipinfo to check the location of the client system.
45+
**kwargs
46+
Passed through to fsspec when setting up file system access.
47+
48+
Examples
49+
--------
50+
Access data from Google Cloud Storage (default):
51+
52+
>>> import malariagen_data
53+
>>> adar1 = malariagen_data.Adar1()
54+
55+
Access data downloaded to a local file system:
56+
57+
>>> adar1 = malariagen_data.Adar1("/local/path/to/vo_adar_release/")
58+
59+
Access data from Google Cloud Storage, with caching on the local file system
60+
in a directory named "gcs_cache":
61+
62+
>>> adar1 = malariagen_data.Adar1(
63+
... "simplecache::gs://vo_adar_release_master_us_central1",
64+
... simplecache=dict(cache_storage="gcs_cache"),
65+
... )
66+
67+
Set up caching of some longer-running computations on the local file system,
68+
in a directory named "results_cache":
69+
70+
>>> adar1 = malariagen_data.Adar1(results_cache="results_cache")
71+
72+
"""
73+
74+
def __init__(
75+
self,
76+
url=None,
77+
public_url=GCS_DEFAULT_PUBLIC_URL,
78+
bokeh_output_notebook=True,
79+
results_cache=None,
80+
log=sys.stdout,
81+
debug=False,
82+
show_progress=None,
83+
check_location=True,
84+
cohorts_analysis=None,
85+
site_filters_analysis=None,
86+
discordant_read_calls_analysis=None,
87+
pre=False,
88+
tqdm_class=None,
89+
unrestricted_use_only=False,
90+
surveillance_use_only=False,
91+
**storage_options,
92+
):
93+
super().__init__(
94+
url=url,
95+
public_url=public_url,
96+
config_path=CONFIG_PATH,
97+
cohorts_analysis=cohorts_analysis,
98+
aim_analysis=None,
99+
aim_metadata_dtype=None,
100+
aim_ids=None,
101+
aim_palettes=None,
102+
site_filters_analysis=site_filters_analysis,
103+
discordant_read_calls_analysis=discordant_read_calls_analysis,
104+
default_site_mask="darlingi",
105+
default_phasing_analysis="darlingi",
106+
default_coverage_calls_analysis="darlingi",
107+
bokeh_output_notebook=bokeh_output_notebook,
108+
results_cache=results_cache,
109+
log=log,
110+
debug=debug,
111+
show_progress=show_progress,
112+
check_location=check_location,
113+
pre=pre,
114+
gcs_default_url=GCS_DEFAULT_URL,
115+
gcs_region_urls=GCS_REGION_URLS,
116+
major_version_number=MAJOR_VERSION_NUMBER,
117+
major_version_path=MAJOR_VERSION_PATH,
118+
gff_gene_type="protein_coding_gene",
119+
gff_gene_name_attribute="Note",
120+
gff_default_attributes=("ID", "Parent", "Note", "description"),
121+
storage_options=storage_options,
122+
tqdm_class=tqdm_class,
123+
taxon_colors=TAXON_COLORS,
124+
virtual_contigs=None,
125+
gene_names=None,
126+
inversion_tag_path=None,
127+
unrestricted_use_only=unrestricted_use_only,
128+
surveillance_use_only=surveillance_use_only,
129+
)
130+
131+
def __repr__(self):
132+
text = (
133+
f"<MalariaGEN Adar1 API client>\n"
134+
f"Storage URL : {self._url}\n"
135+
f"Data releases available : {', '.join(self._available_releases)}\n"
136+
f"Results cache : {self._results_cache}\n"
137+
f"Cohorts analysis : {self._cohorts_analysis}\n"
138+
f"Site filters analysis : {self._site_filters_analysis}\n"
139+
f"Software version : malariagen_data {malariagen_data.__version__}\n"
140+
f"Client location : {self.client_location}\n"
141+
f"Data filtered to unrestricted use only: {self._unrestricted_use_only}\n"
142+
f"Data filtered to surveillance use only: {self._surveillance_use_only}\n"
143+
f"Relevant data releases : {', '.join(self.releases)}\n"
144+
f"---\n"
145+
f"Please note that data are subject to terms of use,\n"
146+
f"for more information see https://www.malariagen.net/data\n"
147+
f"or contact support@malariagen.net. For API documentation see \n"
148+
f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adir1.html"
149+
)
150+
return text
151+
152+
def _repr_html_(self):
153+
html = f"""
154+
<table class="malariagen-adar1">
155+
<thead>
156+
<tr>
157+
<th style="text-align: left" colspan="2">MalariaGEN Adar1 API client</th>
158+
</tr>
159+
<tr><td colspan="2" style="text-align: left">
160+
Please note that data are subject to terms of use,
161+
for more information see <a href="https://www.malariagen.net/data">
162+
the MalariaGEN website</a> or contact support@malariagen.net.
163+
See also the <a href="https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adir1.html">Adir1 API docs</a>.
164+
</td></tr>
165+
</thead>
166+
<tbody>
167+
<tr>
168+
<th style="text-align: left">
169+
Storage URL
170+
</th>
171+
<td>{self._url}</td>
172+
</tr>
173+
<tr>
174+
<th style="text-align: left">
175+
Data releases available
176+
</th>
177+
<td>{', '.join(self._available_releases)}</td>
178+
</tr>
179+
<tr>
180+
<th style="text-align: left">
181+
Results cache
182+
</th>
183+
<td>{self._results_cache}</td>
184+
</tr>
185+
<tr>
186+
<th style="text-align: left">
187+
Cohorts analysis
188+
</th>
189+
<td>{self._cohorts_analysis}</td>
190+
</tr>
191+
<tr>
192+
<th style="text-align: left">
193+
Site filters analysis
194+
</th>
195+
<td>{self._site_filters_analysis}</td>
196+
</tr>
197+
<tr>
198+
<th style="text-align: left">
199+
Software version
200+
</th>
201+
<td>malariagen_data {malariagen_data.__version__}</td>
202+
</tr>
203+
<tr>
204+
<th style="text-align: left">
205+
Client location
206+
</th>
207+
<td>{self.client_location}</td>
208+
</tr>
209+
<tr>
210+
<th style="text-align: left">
211+
Data filtered for unrestricted use only
212+
</th>
213+
<td>{self._unrestricted_use_only}</td>
214+
</tr>
215+
<tr>
216+
<th style="text-align: left">
217+
Data filtered for surveillance use only
218+
</th>
219+
<td>{self._surveillance_use_only}</td>
220+
</tr>
221+
<tr>
222+
<th style="text-align: left">
223+
Relevant data releases
224+
</th>
225+
<td>{', '.join(self.releases)}</td>
226+
</tr>
227+
</tbody>
228+
</table>
229+
"""
230+
return html

0 commit comments

Comments
 (0)