|
1 | 1 | { |
2 | 2 | "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "id": "f6b86889", |
| 6 | + "metadata": {}, |
| 7 | + "source": [ |
| 8 | + "# Haplotype Network Plotting Examples\n", |
| 9 | + "This notebook demonstrates the `plot_haplotype_network` function from the `malariagen_data` package, showcasing different ways to use the `color` parameter to visualize haplotype networks." |
| 10 | + ] |
| 11 | + }, |
3 | 12 | { |
4 | 13 | "cell_type": "code", |
5 | 14 | "execution_count": null, |
6 | | - "id": "9de62268", |
| 15 | + "id": "1cadfacf", |
7 | 16 | "metadata": {}, |
8 | 17 | "outputs": [], |
9 | 18 | "source": [ |
|
26 | 35 | "metadata": {}, |
27 | 36 | "outputs": [], |
28 | 37 | "source": [ |
| 38 | + "# Initialize Ag3 instance\n", |
29 | 39 | "ag3 = malariagen_data.Ag3(\n", |
30 | 40 | " \"simplecache::gs://vo_agam_release_master_us_central1\",\n", |
31 | 41 | " simplecache=dict(cache_storage=\"../gcs_cache\"),\n", |
|
40 | 50 | "id": "e3ffe116", |
41 | 51 | "metadata": {}, |
42 | 52 | "source": [ |
43 | | - "N.B., manually specifying the server_port parameter doesn't seem to be necessary on colab, but is needed when running locally via Jupyter notebook, otherwise get \"Address already in use\" error and cannot run multiple plots in same notebook. " |
| 53 | + "N.B., manually specifying the server_port parameter doesn't seem to be necessary on colab, but is needed when running locally via Jupyter notebook, otherwise get \"Address already in use\" error and cannot run multiple plots in same notebook." |
| 54 | + ] |
| 55 | + }, |
| 56 | + { |
| 57 | + "cell_type": "markdown", |
| 58 | + "id": "e5687f24", |
| 59 | + "metadata": {}, |
| 60 | + "source": [ |
| 61 | + "## Example 1: Direct Column Name (String)\n", |
| 62 | + "Use a direct column name like 'country' to color nodes by country." |
44 | 63 | ] |
45 | 64 | }, |
46 | 65 | { |
|
50 | 69 | "metadata": {}, |
51 | 70 | "outputs": [], |
52 | 71 | "source": [ |
| 72 | + "# Plot haplotype network with country coloring\n", |
53 | 73 | "ag3.plot_haplotype_network(\n", |
54 | 74 | " region=\"2L:2,358,158-2,431,617\",\n", |
55 | 75 | " analysis=\"gamb_colu\",\n", |
56 | | - " sample_query=\"taxon == 'coluzzii'\",\n", |
57 | 76 | " sample_sets=\"3.0\",\n", |
| 77 | + " sample_query=\"taxon == 'coluzzii'\",\n", |
58 | 78 | " color=\"country\",\n", |
59 | 79 | " max_dist=2,\n", |
60 | 80 | ")" |
61 | 81 | ] |
62 | 82 | }, |
| 83 | + { |
| 84 | + "cell_type": "markdown", |
| 85 | + "id": "2798b459", |
| 86 | + "metadata": {}, |
| 87 | + "source": [ |
| 88 | + "## Example 2: Cohorts Prefix (String)\n", |
| 89 | + "In this example, `\"admin1_iso\"` is used, which the function interprets as `\"cohorts_admin1_iso\"`, a column typically available in cohort-annotated metadata." |
| 90 | + ] |
| 91 | + }, |
63 | 92 | { |
64 | 93 | "cell_type": "code", |
65 | 94 | "execution_count": null, |
66 | 95 | "id": "3206fc04-1074-4f6c-8130-81dadff05c72", |
67 | 96 | "metadata": {}, |
68 | 97 | "outputs": [], |
| 98 | + "source": [ |
| 99 | + "ag3.plot_haplotype_network(\n", |
| 100 | + " region=\"2L:2,358,158-2,431,617\",\n", |
| 101 | + " analysis=\"gamb_colu\",\n", |
| 102 | + " sample_query=\"taxon == 'coluzzii'\",\n", |
| 103 | + " sample_sets=\"3.0\",\n", |
| 104 | + " color=\"admin1_iso\", # Implies \"cohorts_admin1_iso\"\n", |
| 105 | + " max_dist=2,\n", |
| 106 | + ")" |
| 107 | + ] |
| 108 | + }, |
| 109 | + { |
| 110 | + "cell_type": "markdown", |
| 111 | + "id": "44c6a40b", |
| 112 | + "metadata": {}, |
| 113 | + "source": [ |
| 114 | + "This example uses a dictionary to define custom color groups based on conditions applied to the `\"country\"` column." |
| 115 | + ] |
| 116 | + }, |
| 117 | + { |
| 118 | + "cell_type": "code", |
| 119 | + "execution_count": null, |
| 120 | + "id": "8236cd99", |
| 121 | + "metadata": {}, |
| 122 | + "outputs": [], |
| 123 | + "source": [ |
| 124 | + "color_mapping = {\n", |
| 125 | + " \"Ghana\": \"country == 'Ghana'\",\n", |
| 126 | + " \"Other\": \"country != 'Ghana'\"\n", |
| 127 | + "}\n", |
| 128 | + "ag3.plot_haplotype_network(\n", |
| 129 | + " region=\"2L:2,358,158-2,431,617\",\n", |
| 130 | + " analysis=\"gamb_colu\",\n", |
| 131 | + " sample_query=\"taxon == 'coluzzii'\",\n", |
| 132 | + " sample_sets=\"3.0\",\n", |
| 133 | + " color=color_mapping,\n", |
| 134 | + " max_dist=2,\n", |
| 135 | + ")" |
| 136 | + ] |
| 137 | + }, |
| 138 | + { |
| 139 | + "cell_type": "markdown", |
| 140 | + "id": "bd1962ba", |
| 141 | + "metadata": {}, |
| 142 | + "source": [ |
| 143 | + "Setting `color=None` applies the default coloring scheme, typically uniform across all nodes." |
| 144 | + ] |
| 145 | + }, |
| 146 | + { |
| 147 | + "cell_type": "code", |
| 148 | + "execution_count": null, |
| 149 | + "id": "eab4c6fb", |
| 150 | + "metadata": {}, |
| 151 | + "outputs": [], |
| 152 | + "source": [ |
| 153 | + "ag3.plot_haplotype_network(\n", |
| 154 | + " region=\"2L:2,358,158-2,431,617\",\n", |
| 155 | + " analysis=\"gamb_colu\",\n", |
| 156 | + " sample_query=\"taxon == 'coluzzii'\",\n", |
| 157 | + " sample_sets=\"3.0\",\n", |
| 158 | + " color=None,\n", |
| 159 | + " max_dist=2,\n", |
| 160 | + ")" |
| 161 | + ] |
| 162 | + }, |
| 163 | + { |
| 164 | + "cell_type": "markdown", |
| 165 | + "id": "20b54aa0", |
| 166 | + "metadata": {}, |
| 167 | + "source": [ |
| 168 | + "This replicates Example 1 but uses `server_mode=\"external\"`, useful for rendering plots in certain environments." |
| 169 | + ] |
| 170 | + }, |
| 171 | + { |
| 172 | + "cell_type": "code", |
| 173 | + "execution_count": null, |
| 174 | + "id": "698ab518", |
| 175 | + "metadata": {}, |
| 176 | + "outputs": [], |
69 | 177 | "source": [ |
70 | 178 | "ag3.plot_haplotype_network(\n", |
71 | 179 | " region=\"2L:2,358,158-2,431,617\",\n", |
|
94 | 202 | "metadata": {}, |
95 | 203 | "outputs": [], |
96 | 204 | "source": [ |
| 205 | + "# Initialize Af1 instance\n", |
97 | 206 | "af1 = malariagen_data.Af1(\n", |
98 | 207 | " \"simplecache::gs://vo_afun_release_master_us_central1\",\n", |
99 | 208 | " simplecache=dict(cache_storage=\"../gcs_cache\"),\n", |
|
102 | 211 | "af1" |
103 | 212 | ] |
104 | 213 | }, |
| 214 | + { |
| 215 | + "cell_type": "markdown", |
| 216 | + "id": "d8aeab21", |
| 217 | + "metadata": {}, |
| 218 | + "source": [ |
| 219 | + "Here, nodes are colored based on the `\"sample_set\"` column." |
| 220 | + ] |
| 221 | + }, |
105 | 222 | { |
106 | 223 | "cell_type": "code", |
107 | 224 | "execution_count": null, |
|
120 | 237 | ")" |
121 | 238 | ] |
122 | 239 | }, |
| 240 | + { |
| 241 | + "cell_type": "markdown", |
| 242 | + "id": "b1cde074", |
| 243 | + "metadata": {}, |
| 244 | + "source": [ |
| 245 | + "Using `\"year\"` implies the function looks for `\"cohorts_year\"` in the metadata." |
| 246 | + ] |
| 247 | + }, |
| 248 | + { |
| 249 | + "cell_type": "code", |
| 250 | + "execution_count": null, |
| 251 | + "id": "6d7fc155", |
| 252 | + "metadata": {}, |
| 253 | + "outputs": [], |
| 254 | + "source": [ |
| 255 | + "af1.plot_haplotype_network(\n", |
| 256 | + " region=\"2RL:2,358,158-2,431,617\",\n", |
| 257 | + " sample_query=\"country == 'Ghana'\",\n", |
| 258 | + " sample_sets=\"1.0\",\n", |
| 259 | + " color=\"year\", # Implies \"cohorts_year\"\n", |
| 260 | + " max_dist=2,\n", |
| 261 | + ")" |
| 262 | + ] |
| 263 | + }, |
| 264 | + { |
| 265 | + "cell_type": "markdown", |
| 266 | + "id": "e6e60160", |
| 267 | + "metadata": {}, |
| 268 | + "source": [ |
| 269 | + "A dictionary defines custom groups based on the `\"year\"` column (assuming year data is available)." |
| 270 | + ] |
| 271 | + }, |
| 272 | + { |
| 273 | + "cell_type": "code", |
| 274 | + "execution_count": null, |
| 275 | + "id": "882e6b8f", |
| 276 | + "metadata": {}, |
| 277 | + "outputs": [], |
| 278 | + "source": [ |
| 279 | + "color_mapping = {\n", |
| 280 | + " \"2012\": \"year == 2012\",\n", |
| 281 | + " \"2014\": \"year == 2014\"\n", |
| 282 | + "}\n", |
| 283 | + "af1.plot_haplotype_network(\n", |
| 284 | + " region=\"2RL:2,358,158-2,431,617\",\n", |
| 285 | + " sample_query=\"country == 'Ghana'\",\n", |
| 286 | + " sample_sets=\"1.0\",\n", |
| 287 | + " color=color_mapping,\n", |
| 288 | + " max_dist=2,\n", |
| 289 | + ")" |
| 290 | + ] |
| 291 | + }, |
| 292 | + { |
| 293 | + "cell_type": "markdown", |
| 294 | + "id": "485becad", |
| 295 | + "metadata": {}, |
| 296 | + "source": [ |
| 297 | + "With `color=None`, the default coloring is applied." |
| 298 | + ] |
| 299 | + }, |
123 | 300 | { |
124 | 301 | "cell_type": "code", |
125 | 302 | "execution_count": null, |
126 | | - "id": "42af79bc-35a6-4c96-ae5b-62bd46a30ad1", |
| 303 | + "id": "bd013c5c", |
127 | 304 | "metadata": {}, |
128 | 305 | "outputs": [], |
129 | | - "source": [] |
| 306 | + "source": [ |
| 307 | + "af1.plot_haplotype_network(\n", |
| 308 | + " region=\"2RL:2,358,158-2,431,617\",\n", |
| 309 | + " sample_query=\"country == 'Ghana'\",\n", |
| 310 | + " sample_sets=\"1.0\",\n", |
| 311 | + " color=None,\n", |
| 312 | + " max_dist=2,\n", |
| 313 | + ")" |
| 314 | + ] |
130 | 315 | } |
131 | 316 | ], |
132 | 317 | "metadata": { |
|
145 | 330 | "name": "python", |
146 | 331 | "nbconvert_exporter": "python", |
147 | 332 | "pygments_lexer": "ipython3", |
148 | | - "version": "3.10.12" |
149 | | - }, |
150 | | - "vscode": { |
151 | | - "interpreter": { |
152 | | - "hash": "3b9ddb1005cd06989fd869b9e3d566470f1be01faa610bb17d64e58e32302e8b" |
153 | | - } |
| 333 | + "version": "3.12.0" |
154 | 334 | }, |
155 | 335 | "widgets": { |
156 | 336 | "application/vnd.jupyter.widget-state+json": { |
|
0 commit comments