Skip to content

Commit 6822900

Browse files
fix(haplotype_network): fix an isssue with color parameter handling(mapping case); update tests with assertions; update notebook with examples
1 parent d5b0f95 commit 6822900

4 files changed

Lines changed: 385 additions & 15 deletions

File tree

malariagen_data/anopheles.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2256,8 +2256,10 @@ def plot_haplotype_network(
22562256
)
22572257

22582258
# Now use the validated color_column for processing
2259-
df_haps["partition"] = df_haps[color_column].str.replace(
2260-
r"\W", "", regex=True
2259+
df_haps["partition"] = (
2260+
df_haps[color_column]
2261+
.astype(str)
2262+
.str.replace(r"\W", "", regex=True)
22612263
)
22622264

22632265
# extract all unique values of the color column
@@ -2337,7 +2339,7 @@ def plot_haplotype_network(
23372339
ht_distinct_mjn=ht_distinct_mjn,
23382340
ht_counts=ht_counts,
23392341
ht_color_counts=ht_color_counts,
2340-
color=color,
2342+
color="partition" if color is not None else None,
23412343
color_values=color_values,
23422344
edges=edges,
23432345
alt_edges=alt_edges,
@@ -2389,7 +2391,7 @@ def plot_haplotype_network(
23892391
debug("create figure legend")
23902392
if color is not None:
23912393
legend_fig = plotly_discrete_legend(
2392-
color=color,
2394+
color="partition", # Changed from color=color
23932395
color_values=color_values_display,
23942396
color_discrete_map=color_discrete_map_display,
23952397
category_orders=category_orders_prepped,

notebooks/plot_haplotype_networks.ipynb

Lines changed: 191 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "f6b86889",
6+
"metadata": {},
7+
"source": [
8+
"# Haplotype Network Plotting Examples\n",
9+
"This notebook demonstrates the `plot_haplotype_network` function from the `malariagen_data` package, showcasing different ways to use the `color` parameter to visualize haplotype networks."
10+
]
11+
},
312
{
413
"cell_type": "code",
514
"execution_count": null,
6-
"id": "9de62268",
15+
"id": "1cadfacf",
716
"metadata": {},
817
"outputs": [],
918
"source": [
@@ -26,6 +35,7 @@
2635
"metadata": {},
2736
"outputs": [],
2837
"source": [
38+
"# Initialize Ag3 instance\n",
2939
"ag3 = malariagen_data.Ag3(\n",
3040
" \"simplecache::gs://vo_agam_release_master_us_central1\",\n",
3141
" simplecache=dict(cache_storage=\"../gcs_cache\"),\n",
@@ -40,7 +50,16 @@
4050
"id": "e3ffe116",
4151
"metadata": {},
4252
"source": [
43-
"N.B., manually specifying the server_port parameter doesn't seem to be necessary on colab, but is needed when running locally via Jupyter notebook, otherwise get \"Address already in use\" error and cannot run multiple plots in same notebook. "
53+
"N.B., manually specifying the server_port parameter doesn't seem to be necessary on colab, but is needed when running locally via Jupyter notebook, otherwise get \"Address already in use\" error and cannot run multiple plots in same notebook."
54+
]
55+
},
56+
{
57+
"cell_type": "markdown",
58+
"id": "e5687f24",
59+
"metadata": {},
60+
"source": [
61+
"## Example 1: Direct Column Name (String)\n",
62+
"Use a direct column name like 'country' to color nodes by country."
4463
]
4564
},
4665
{
@@ -50,22 +69,111 @@
5069
"metadata": {},
5170
"outputs": [],
5271
"source": [
72+
"# Plot haplotype network with country coloring\n",
5373
"ag3.plot_haplotype_network(\n",
5474
" region=\"2L:2,358,158-2,431,617\",\n",
5575
" analysis=\"gamb_colu\",\n",
56-
" sample_query=\"taxon == 'coluzzii'\",\n",
5776
" sample_sets=\"3.0\",\n",
77+
" sample_query=\"taxon == 'coluzzii'\",\n",
5878
" color=\"country\",\n",
5979
" max_dist=2,\n",
6080
")"
6181
]
6282
},
83+
{
84+
"cell_type": "markdown",
85+
"id": "2798b459",
86+
"metadata": {},
87+
"source": [
88+
"## Example 2: Cohorts Prefix (String)\n",
89+
"In this example, `\"admin1_iso\"` is used, which the function interprets as `\"cohorts_admin1_iso\"`, a column typically available in cohort-annotated metadata."
90+
]
91+
},
6392
{
6493
"cell_type": "code",
6594
"execution_count": null,
6695
"id": "3206fc04-1074-4f6c-8130-81dadff05c72",
6796
"metadata": {},
6897
"outputs": [],
98+
"source": [
99+
"ag3.plot_haplotype_network(\n",
100+
" region=\"2L:2,358,158-2,431,617\",\n",
101+
" analysis=\"gamb_colu\",\n",
102+
" sample_query=\"taxon == 'coluzzii'\",\n",
103+
" sample_sets=\"3.0\",\n",
104+
" color=\"admin1_iso\", # Implies \"cohorts_admin1_iso\"\n",
105+
" max_dist=2,\n",
106+
")"
107+
]
108+
},
109+
{
110+
"cell_type": "markdown",
111+
"id": "44c6a40b",
112+
"metadata": {},
113+
"source": [
114+
"This example uses a dictionary to define custom color groups based on conditions applied to the `\"country\"` column."
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"id": "8236cd99",
121+
"metadata": {},
122+
"outputs": [],
123+
"source": [
124+
"color_mapping = {\n",
125+
" \"Ghana\": \"country == 'Ghana'\",\n",
126+
" \"Other\": \"country != 'Ghana'\"\n",
127+
"}\n",
128+
"ag3.plot_haplotype_network(\n",
129+
" region=\"2L:2,358,158-2,431,617\",\n",
130+
" analysis=\"gamb_colu\",\n",
131+
" sample_query=\"taxon == 'coluzzii'\",\n",
132+
" sample_sets=\"3.0\",\n",
133+
" color=color_mapping,\n",
134+
" max_dist=2,\n",
135+
")"
136+
]
137+
},
138+
{
139+
"cell_type": "markdown",
140+
"id": "bd1962ba",
141+
"metadata": {},
142+
"source": [
143+
"Setting `color=None` applies the default coloring scheme, typically uniform across all nodes."
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": null,
149+
"id": "eab4c6fb",
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"ag3.plot_haplotype_network(\n",
154+
" region=\"2L:2,358,158-2,431,617\",\n",
155+
" analysis=\"gamb_colu\",\n",
156+
" sample_query=\"taxon == 'coluzzii'\",\n",
157+
" sample_sets=\"3.0\",\n",
158+
" color=None,\n",
159+
" max_dist=2,\n",
160+
")"
161+
]
162+
},
163+
{
164+
"cell_type": "markdown",
165+
"id": "20b54aa0",
166+
"metadata": {},
167+
"source": [
168+
"This replicates Example 1 but uses `server_mode=\"external\"`, useful for rendering plots in certain environments."
169+
]
170+
},
171+
{
172+
"cell_type": "code",
173+
"execution_count": null,
174+
"id": "698ab518",
175+
"metadata": {},
176+
"outputs": [],
69177
"source": [
70178
"ag3.plot_haplotype_network(\n",
71179
" region=\"2L:2,358,158-2,431,617\",\n",
@@ -94,6 +202,7 @@
94202
"metadata": {},
95203
"outputs": [],
96204
"source": [
205+
"# Initialize Af1 instance\n",
97206
"af1 = malariagen_data.Af1(\n",
98207
" \"simplecache::gs://vo_afun_release_master_us_central1\",\n",
99208
" simplecache=dict(cache_storage=\"../gcs_cache\"),\n",
@@ -102,6 +211,14 @@
102211
"af1"
103212
]
104213
},
214+
{
215+
"cell_type": "markdown",
216+
"id": "d8aeab21",
217+
"metadata": {},
218+
"source": [
219+
"Here, nodes are colored based on the `\"sample_set\"` column."
220+
]
221+
},
105222
{
106223
"cell_type": "code",
107224
"execution_count": null,
@@ -120,13 +237,81 @@
120237
")"
121238
]
122239
},
240+
{
241+
"cell_type": "markdown",
242+
"id": "b1cde074",
243+
"metadata": {},
244+
"source": [
245+
"Using `\"year\"` implies the function looks for `\"cohorts_year\"` in the metadata."
246+
]
247+
},
248+
{
249+
"cell_type": "code",
250+
"execution_count": null,
251+
"id": "6d7fc155",
252+
"metadata": {},
253+
"outputs": [],
254+
"source": [
255+
"af1.plot_haplotype_network(\n",
256+
" region=\"2RL:2,358,158-2,431,617\",\n",
257+
" sample_query=\"country == 'Ghana'\",\n",
258+
" sample_sets=\"1.0\",\n",
259+
" color=\"year\", # Implies \"cohorts_year\"\n",
260+
" max_dist=2,\n",
261+
")"
262+
]
263+
},
264+
{
265+
"cell_type": "markdown",
266+
"id": "e6e60160",
267+
"metadata": {},
268+
"source": [
269+
"A dictionary defines custom groups based on the `\"year\"` column (assuming year data is available)."
270+
]
271+
},
272+
{
273+
"cell_type": "code",
274+
"execution_count": null,
275+
"id": "882e6b8f",
276+
"metadata": {},
277+
"outputs": [],
278+
"source": [
279+
"color_mapping = {\n",
280+
" \"2012\": \"year == 2012\",\n",
281+
" \"2014\": \"year == 2014\"\n",
282+
"}\n",
283+
"af1.plot_haplotype_network(\n",
284+
" region=\"2RL:2,358,158-2,431,617\",\n",
285+
" sample_query=\"country == 'Ghana'\",\n",
286+
" sample_sets=\"1.0\",\n",
287+
" color=color_mapping,\n",
288+
" max_dist=2,\n",
289+
")"
290+
]
291+
},
292+
{
293+
"cell_type": "markdown",
294+
"id": "485becad",
295+
"metadata": {},
296+
"source": [
297+
"With `color=None`, the default coloring is applied."
298+
]
299+
},
123300
{
124301
"cell_type": "code",
125302
"execution_count": null,
126-
"id": "42af79bc-35a6-4c96-ae5b-62bd46a30ad1",
303+
"id": "bd013c5c",
127304
"metadata": {},
128305
"outputs": [],
129-
"source": []
306+
"source": [
307+
"af1.plot_haplotype_network(\n",
308+
" region=\"2RL:2,358,158-2,431,617\",\n",
309+
" sample_query=\"country == 'Ghana'\",\n",
310+
" sample_sets=\"1.0\",\n",
311+
" color=None,\n",
312+
" max_dist=2,\n",
313+
")"
314+
]
130315
}
131316
],
132317
"metadata": {
@@ -145,12 +330,7 @@
145330
"name": "python",
146331
"nbconvert_exporter": "python",
147332
"pygments_lexer": "ipython3",
148-
"version": "3.10.12"
149-
},
150-
"vscode": {
151-
"interpreter": {
152-
"hash": "3b9ddb1005cd06989fd869b9e3d566470f1be01faa610bb17d64e58e32302e8b"
153-
}
333+
"version": "3.12.0"
154334
},
155335
"widgets": {
156336
"application/vnd.jupyter.widget-state+json": {

0 commit comments

Comments
 (0)