|
| 1 | +import sys |
| 2 | + |
| 3 | +import plotly.express as px # type: ignore |
| 4 | + |
| 5 | +import malariagen_data |
| 6 | +from .anopheles import AnophelesDataResource |
| 7 | + |
| 8 | +MAJOR_VERSION_NUMBER = 1 |
| 9 | +MAJOR_VERSION_PATH = "v1.0" |
| 10 | +CONFIG_PATH = "v1.0-config.json" |
| 11 | +GCS_DEFAULT_URL = "gs://vo_adar_release_master_us_central1/" |
| 12 | +GCS_DEFAULT_PUBLIC_URL = "gs://vo_adar_release_master_us_central1/" |
| 13 | +GCS_REGION_URLS = { |
| 14 | + "us-central1": "gs://vo_adar_release_master_us_central1", |
| 15 | +} |
| 16 | + |
| 17 | +TAXON_PALETTE = px.colors.qualitative.Plotly |
| 18 | +TAXON_COLORS = { |
| 19 | + "darlingi": TAXON_PALETTE[0], |
| 20 | +} |
| 21 | + |
| 22 | +XPEHH_GWSS_CACHE_NAME = "adar1_xpehh_gwss_v1" |
| 23 | +IHS_GWSS_CACHE_NAME = "adar1_ihs_gwss_v1" |
| 24 | +ROH_HMM_CACHE_NAME = "adar1_roh_hmm_v1" |
| 25 | + |
| 26 | + |
| 27 | +class Adar1(AnophelesDataResource): |
| 28 | + """Provides access to data from Adar1.0 releases. |
| 29 | +
|
| 30 | + Parameters |
| 31 | + ---------- |
| 32 | + url : str, optional |
| 33 | + Base path to data. Defaults to use Google Cloud Storage, or can |
| 34 | + be a local path on your file system if data have been downloaded. |
| 35 | + site_filters_analysis : str, optional |
| 36 | + Site filters analysis version. |
| 37 | + bokeh_output_notebook : bool, optional |
| 38 | + If True (default), configure bokeh to output plots to the notebook. |
| 39 | + results_cache : str, optional |
| 40 | + Path to directory on local file system to save results. |
| 41 | + log : str or stream, optional |
| 42 | + File path or stream output for logging messages. |
| 43 | + debug : bool, optional |
| 44 | + Set to True to enable debug level logging. |
| 45 | + show_progress : bool, optional |
| 46 | + If True, show a progress bar during longer-running computations. The default can be overridden using an environmental variable named MGEN_SHOW_PROGRESS. |
| 47 | + check_location : bool, optional |
| 48 | + If True, use ipinfo to check the location of the client system. |
| 49 | + **kwargs |
| 50 | + Passed through to fsspec when setting up file system access. |
| 51 | +
|
| 52 | + Examples |
| 53 | + -------- |
| 54 | + Access data from Google Cloud Storage (default): |
| 55 | +
|
| 56 | + >>> import malariagen_data |
| 57 | + >>> adar1 = malariagen_data.Adar1() |
| 58 | +
|
| 59 | + Access data downloaded to a local file system: |
| 60 | +
|
| 61 | + >>> adar1 = malariagen_data.Adar1("/local/path/to/vo_adar_release/") |
| 62 | +
|
| 63 | + Access data from Google Cloud Storage, with caching on the local file system |
| 64 | + in a directory named "gcs_cache": |
| 65 | +
|
| 66 | + >>> adar1 = malariagen_data.Adar1( |
| 67 | + ... "simplecache::gs://vo_adar_release_master_us_central1", |
| 68 | + ... simplecache=dict(cache_storage="gcs_cache"), |
| 69 | + ... ) |
| 70 | +
|
| 71 | + Set up caching of some longer-running computations on the local file system, |
| 72 | + in a directory named "results_cache": |
| 73 | +
|
| 74 | + >>> adar1 = malariagen_data.Adar1(results_cache="results_cache") |
| 75 | +
|
| 76 | + """ |
| 77 | + |
| 78 | + _xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME |
| 79 | + _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME |
| 80 | + _roh_hmm_cache_name = ROH_HMM_CACHE_NAME |
| 81 | + |
| 82 | + def __init__( |
| 83 | + self, |
| 84 | + url=None, |
| 85 | + public_url=GCS_DEFAULT_PUBLIC_URL, |
| 86 | + bokeh_output_notebook=True, |
| 87 | + results_cache=None, |
| 88 | + log=sys.stdout, |
| 89 | + debug=False, |
| 90 | + show_progress=None, |
| 91 | + check_location=True, |
| 92 | + cohorts_analysis=None, |
| 93 | + site_filters_analysis=None, |
| 94 | + discordant_read_calls_analysis=None, |
| 95 | + pre=False, |
| 96 | + tqdm_class=None, |
| 97 | + unrestricted_use_only=False, |
| 98 | + surveillance_use_only=False, |
| 99 | + **storage_options, |
| 100 | + ): |
| 101 | + super().__init__( |
| 102 | + url=url, |
| 103 | + public_url=public_url, |
| 104 | + config_path=CONFIG_PATH, |
| 105 | + cohorts_analysis=cohorts_analysis, |
| 106 | + aim_analysis=None, |
| 107 | + aim_metadata_dtype=None, |
| 108 | + aim_ids=None, |
| 109 | + aim_palettes=None, |
| 110 | + site_filters_analysis=site_filters_analysis, |
| 111 | + discordant_read_calls_analysis=discordant_read_calls_analysis, |
| 112 | + default_site_mask="darlingi", |
| 113 | + default_phasing_analysis="darlingi", |
| 114 | + default_coverage_calls_analysis="darlingi", |
| 115 | + bokeh_output_notebook=bokeh_output_notebook, |
| 116 | + results_cache=results_cache, |
| 117 | + log=log, |
| 118 | + debug=debug, |
| 119 | + show_progress=show_progress, |
| 120 | + check_location=check_location, |
| 121 | + pre=pre, |
| 122 | + gcs_default_url=GCS_DEFAULT_URL, |
| 123 | + gcs_region_urls=GCS_REGION_URLS, |
| 124 | + major_version_number=MAJOR_VERSION_NUMBER, |
| 125 | + major_version_path=MAJOR_VERSION_PATH, |
| 126 | + gff_gene_type="gene", |
| 127 | + gff_gene_name_attribute="Note", |
| 128 | + gff_default_attributes=("ID", "Parent", "Note", "description"), |
| 129 | + storage_options=storage_options, |
| 130 | + tqdm_class=tqdm_class, |
| 131 | + taxon_colors=TAXON_COLORS, |
| 132 | + virtual_contigs=None, |
| 133 | + gene_names=None, |
| 134 | + inversion_tag_path=None, |
| 135 | + unrestricted_use_only=unrestricted_use_only, |
| 136 | + surveillance_use_only=surveillance_use_only, |
| 137 | + ) |
| 138 | + |
| 139 | + def __repr__(self): |
| 140 | + text = ( |
| 141 | + f"<MalariaGEN Adar1 API client>\n" |
| 142 | + f"Storage URL : {self._url}\n" |
| 143 | + f"Data releases available : {', '.join(self._available_releases)}\n" |
| 144 | + f"Results cache : {self._results_cache}\n" |
| 145 | + f"Cohorts analysis : {self._cohorts_analysis}\n" |
| 146 | + f"Site filters analysis : {self._site_filters_analysis}\n" |
| 147 | + f"Software version : malariagen_data {malariagen_data.__version__}\n" |
| 148 | + f"Client location : {self.client_location}\n" |
| 149 | + f"Data filtered to unrestricted use only: {self._unrestricted_use_only}\n" |
| 150 | + f"Data filtered to surveillance use only: {self._surveillance_use_only}\n" |
| 151 | + f"Relevant data releases : {', '.join(self.releases)}\n" |
| 152 | + f"---\n" |
| 153 | + f"Please note that data are subject to terms of use,\n" |
| 154 | + f"for more information see https://www.malariagen.net/data\n" |
| 155 | + f"or contact support@malariagen.net. For API documentation see \n" |
| 156 | + f"https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adir1.html" |
| 157 | + ) |
| 158 | + return text |
| 159 | + |
| 160 | + def _repr_html_(self): |
| 161 | + html = f""" |
| 162 | + <table class="malariagen-adar1"> |
| 163 | + <thead> |
| 164 | + <tr> |
| 165 | + <th style="text-align: left" colspan="2">MalariaGEN Adar1 API client</th> |
| 166 | + </tr> |
| 167 | + <tr><td colspan="2" style="text-align: left"> |
| 168 | + Please note that data are subject to terms of use, |
| 169 | + for more information see <a href="https://www.malariagen.net/data"> |
| 170 | + the MalariaGEN website</a> or contact support@malariagen.net. |
| 171 | + See also the <a href="https://malariagen.github.io/malariagen-data-python/v{malariagen_data.__version__}/Adir1.html">Adir1 API docs</a>. |
| 172 | + </td></tr> |
| 173 | + </thead> |
| 174 | + <tbody> |
| 175 | + <tr> |
| 176 | + <th style="text-align: left"> |
| 177 | + Storage URL |
| 178 | + </th> |
| 179 | + <td>{self._url}</td> |
| 180 | + </tr> |
| 181 | + <tr> |
| 182 | + <th style="text-align: left"> |
| 183 | + Data releases available |
| 184 | + </th> |
| 185 | + <td>{', '.join(self._available_releases)}</td> |
| 186 | + </tr> |
| 187 | + <tr> |
| 188 | + <th style="text-align: left"> |
| 189 | + Results cache |
| 190 | + </th> |
| 191 | + <td>{self._results_cache}</td> |
| 192 | + </tr> |
| 193 | + <tr> |
| 194 | + <th style="text-align: left"> |
| 195 | + Cohorts analysis |
| 196 | + </th> |
| 197 | + <td>{self._cohorts_analysis}</td> |
| 198 | + </tr> |
| 199 | + <tr> |
| 200 | + <th style="text-align: left"> |
| 201 | + Site filters analysis |
| 202 | + </th> |
| 203 | + <td>{self._site_filters_analysis}</td> |
| 204 | + </tr> |
| 205 | + <tr> |
| 206 | + <th style="text-align: left"> |
| 207 | + Software version |
| 208 | + </th> |
| 209 | + <td>malariagen_data {malariagen_data.__version__}</td> |
| 210 | + </tr> |
| 211 | + <tr> |
| 212 | + <th style="text-align: left"> |
| 213 | + Client location |
| 214 | + </th> |
| 215 | + <td>{self.client_location}</td> |
| 216 | + </tr> |
| 217 | + <tr> |
| 218 | + <th style="text-align: left"> |
| 219 | + Data filtered for unrestricted use only |
| 220 | + </th> |
| 221 | + <td>{self._unrestricted_use_only}</td> |
| 222 | + </tr> |
| 223 | + <tr> |
| 224 | + <th style="text-align: left"> |
| 225 | + Data filtered for surveillance use only |
| 226 | + </th> |
| 227 | + <td>{self._surveillance_use_only}</td> |
| 228 | + </tr> |
| 229 | + <tr> |
| 230 | + <th style="text-align: left"> |
| 231 | + Relevant data releases |
| 232 | + </th> |
| 233 | + <td>{', '.join(self.releases)}</td> |
| 234 | + </tr> |
| 235 | + </tbody> |
| 236 | + </table> |
| 237 | + """ |
| 238 | + return html |
0 commit comments