Skip to content

Commit 791dc1c

Browse files
authored
Merge pull request #639 from malariagen/GH473_add_sample_location_scatter_maps
Add sample location scatter maps
2 parents 5343506 + 848ae22 commit 791dc1c

6 files changed

Lines changed: 292 additions & 5 deletions

File tree

docs/source/Af1.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ Sample metadata access
5353
count_samples
5454
plot_samples_bar
5555
plot_samples_interactive_map
56+
plot_sample_location_mapbox
57+
plot_sample_location_geo
5658
wgs_data_catalog
5759
cohorts
5860

docs/source/Ag3.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Sample metadata access
5454
lookup_sample
5555
plot_samples_bar
5656
plot_samples_interactive_map
57+
plot_sample_location_mapbox
58+
plot_sample_location_geo
5759
wgs_data_catalog
5860
cohorts
5961

malariagen_data/anoph/plotly_params.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,33 @@
185185
on the graph.
186186
""",
187187
]
188+
189+
zoom: TypeAlias = Annotated[
190+
Optional[Union[int, float]],
191+
"Initial zoom level.",
192+
]
193+
194+
hover_name: TypeAlias = Annotated[
195+
Optional[Union[str, Mapping]],
196+
"Name of variable to appear in bold in the hover tooltip.",
197+
]
198+
199+
fitbounds: TypeAlias = Annotated[
200+
Optional[Union[bool, str]],
201+
"If False, does not auto-compute view settings to fit trace data. If 'locations', only visible locations are considered in the auto-compute. If 'geojson', the provided geojson is used.",
202+
]
203+
204+
scope: TypeAlias = Annotated[
205+
Optional[
206+
Literal[
207+
"africa",
208+
"asia",
209+
"europe",
210+
"north america",
211+
"south america",
212+
"usa",
213+
"world",
214+
]
215+
],
216+
"Scope of the map.",
217+
]

malariagen_data/anoph/sample_metadata.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,157 @@ def cohorts(
11831183

11841184
return df_cohorts
11851185

1186+
@check_types
1187+
@doc(
1188+
summary="""
1189+
Plot markers on a map showing sample locations
1190+
as a Mapbox scatter plot.
1191+
""",
1192+
parameters=dict(
1193+
kwargs="Passed through to px.scatter_mapbox().",
1194+
),
1195+
)
1196+
def plot_sample_location_mapbox(
1197+
self,
1198+
*,
1199+
sample_sets: Optional[base_params.sample_sets],
1200+
sample_query: Optional[base_params.sample_query] = None,
1201+
sample_query_options: Optional[base_params.sample_query_options] = None,
1202+
marker_size: plotly_params.marker_size = 10,
1203+
color: plotly_params.color = "admin1_name",
1204+
color_discrete_sequence: plotly_params.color_discrete_sequence = px.colors.qualitative.Prism,
1205+
category_orders: plotly_params.category_order = None,
1206+
hover_name: plotly_params.hover_name = "location",
1207+
zoom: plotly_params.zoom = None,
1208+
width: plotly_params.fig_width = 800,
1209+
height: plotly_params.fig_height = 600,
1210+
show: plotly_params.show = True,
1211+
renderer: plotly_params.renderer = None,
1212+
**kwargs,
1213+
) -> plotly_params.figure:
1214+
# Get the sample metadata.
1215+
df_samples = self.sample_metadata(
1216+
sample_sets=sample_sets,
1217+
sample_query=sample_query,
1218+
sample_query_options=sample_query_options,
1219+
)
1220+
1221+
# Set the location columns to use from the sample metadata.
1222+
location_columns = [
1223+
"country",
1224+
"admin1_iso",
1225+
"admin1_name",
1226+
"admin2_name",
1227+
"location",
1228+
"latitude",
1229+
"longitude",
1230+
]
1231+
1232+
# Trim and dedupe the sample locations.
1233+
# Sort by `color` column by default, which can be overridden via category_orders.
1234+
df_locations = df_samples[location_columns].drop_duplicates().sort_values(color)
1235+
1236+
fig = px.scatter_mapbox(
1237+
df_locations,
1238+
lat="latitude",
1239+
lon="longitude",
1240+
mapbox_style="open-street-map",
1241+
zoom=zoom,
1242+
color=color,
1243+
category_orders=category_orders,
1244+
color_discrete_sequence=color_discrete_sequence,
1245+
hover_name=hover_name,
1246+
hover_data=location_columns,
1247+
width=width,
1248+
height=height,
1249+
**kwargs,
1250+
)
1251+
1252+
# Set the size of the markers.
1253+
fig.update_traces(marker=dict(size=marker_size))
1254+
1255+
if show: # pragma: no cover
1256+
fig.show(renderer=renderer)
1257+
return None
1258+
else:
1259+
return fig
1260+
1261+
@check_types
1262+
@doc(
1263+
summary="""
1264+
Plot markers on a map showing sample locations
1265+
as a geographic scatter plot.
1266+
""",
1267+
parameters=dict(
1268+
kwargs="Passed through to px.scatter_mapbox().",
1269+
),
1270+
)
1271+
def plot_sample_location_geo(
1272+
self,
1273+
*,
1274+
sample_sets: Optional[base_params.sample_sets],
1275+
sample_query: Optional[base_params.sample_query] = None,
1276+
sample_query_options: Optional[base_params.sample_query_options] = None,
1277+
marker_size: plotly_params.marker_size = 10,
1278+
color: plotly_params.color = "admin1_name",
1279+
color_discrete_sequence: plotly_params.color_discrete_sequence = px.colors.qualitative.Prism,
1280+
category_orders: plotly_params.category_order = None,
1281+
hover_name: plotly_params.hover_name = "location",
1282+
fitbounds: plotly_params.fitbounds = "locations",
1283+
scope: plotly_params.scope = "world",
1284+
width: plotly_params.fig_width = 800,
1285+
height: plotly_params.fig_height = 600,
1286+
show: plotly_params.show = True,
1287+
renderer: plotly_params.renderer = None,
1288+
**kwargs,
1289+
) -> plotly_params.figure:
1290+
# Get the sample metadata.
1291+
df_samples = self.sample_metadata(
1292+
sample_sets=sample_sets,
1293+
sample_query=sample_query,
1294+
sample_query_options=sample_query_options,
1295+
)
1296+
1297+
# Set the location columns to use from the sample metadata.
1298+
location_columns = [
1299+
"country",
1300+
"admin1_iso",
1301+
"admin1_name",
1302+
"admin2_name",
1303+
"location",
1304+
"latitude",
1305+
"longitude",
1306+
]
1307+
1308+
# Trim and dedupe the sample locations.
1309+
# Sort by `color` column by default, which can be overridden via category_orders.
1310+
df_locations = df_samples[location_columns].drop_duplicates().sort_values(color)
1311+
1312+
fig = px.scatter_geo(
1313+
df_locations,
1314+
lat="latitude",
1315+
lon="longitude",
1316+
scope=scope,
1317+
height=height,
1318+
width=width,
1319+
color=color,
1320+
hover_name=hover_name,
1321+
hover_data=location_columns,
1322+
category_orders=category_orders,
1323+
color_discrete_sequence=color_discrete_sequence,
1324+
fitbounds=fitbounds,
1325+
**kwargs,
1326+
)
1327+
1328+
# Set the size of the markers.
1329+
fig.update_traces(marker=dict(size=marker_size))
1330+
1331+
if show: # pragma: no cover
1332+
fig.show(renderer=renderer)
1333+
return None
1334+
else:
1335+
return fig
1336+
11861337

11871338
def locate_cohorts(*, cohorts, data):
11881339
# Build cohort dictionary where key=cohort_id, value=loc_coh.

notebooks/plot_samples.ipynb

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -109,6 +109,33 @@
109109
")"
110110
]
111111
},
112+
{
113+
"cell_type": "code",
114+
"execution_count": null,
115+
"metadata": {},
116+
"outputs": [],
117+
"source": [
118+
"ag3.plot_sample_location_mapbox(\n",
119+
" color='country',\n",
120+
" sample_sets=[\"3.0\"],\n",
121+
" sample_query=\"year > 2012\",\n",
122+
" zoom=2\n",
123+
")"
124+
]
125+
},
126+
{
127+
"cell_type": "code",
128+
"execution_count": null,
129+
"metadata": {},
130+
"outputs": [],
131+
"source": [
132+
"ag3.plot_sample_location_geo(\n",
133+
" color='country',\n",
134+
" sample_sets=[\"3.0\"],\n",
135+
" sample_query=\"year > 2012\",\n",
136+
")"
137+
]
138+
},
112139
{
113140
"attachments": {},
114141
"cell_type": "markdown",
@@ -168,7 +195,52 @@
168195
"metadata": {},
169196
"outputs": [],
170197
"source": [
171-
"help(ag3.plot_samples_bar)"
198+
"af1.plot_sample_location_mapbox(\n",
199+
" color='country',\n",
200+
" sample_sets=[\"1.0\"],\n",
201+
" sample_query=\"year > 2015\",\n",
202+
" zoom=2\n",
203+
")"
204+
]
205+
},
206+
{
207+
"cell_type": "code",
208+
"execution_count": null,
209+
"metadata": {},
210+
"outputs": [],
211+
"source": [
212+
"af1.plot_sample_location_geo(\n",
213+
" color='country',\n",
214+
" sample_sets=[\"1.0\"],\n",
215+
" sample_query=\"year > 2015\",\n",
216+
")"
217+
]
218+
},
219+
{
220+
"cell_type": "code",
221+
"execution_count": null,
222+
"metadata": {},
223+
"outputs": [],
224+
"source": [
225+
"help(af1.plot_samples_bar)"
226+
]
227+
},
228+
{
229+
"cell_type": "code",
230+
"execution_count": null,
231+
"metadata": {},
232+
"outputs": [],
233+
"source": [
234+
"help(af1.plot_samples_interactive_map)"
235+
]
236+
},
237+
{
238+
"cell_type": "code",
239+
"execution_count": null,
240+
"metadata": {},
241+
"outputs": [],
242+
"source": [
243+
"help(af1.plot_sample_location_mapbox)"
172244
]
173245
},
174246
{
@@ -177,7 +249,7 @@
177249
"metadata": {},
178250
"outputs": [],
179251
"source": [
180-
"help(ag3.plot_samples_interactive_map)"
252+
"help(af1.plot_sample_location_geo)"
181253
]
182254
},
183255
{
@@ -190,7 +262,7 @@
190262
],
191263
"metadata": {
192264
"kernelspec": {
193-
"display_name": "Python 3 (ipykernel)",
265+
"display_name": "mgen_data_py3.11",
194266
"language": "python",
195267
"name": "python3"
196268
},
@@ -204,7 +276,7 @@
204276
"name": "python",
205277
"nbconvert_exporter": "python",
206278
"pygments_lexer": "ipython3",
207-
"version": "3.10.12"
279+
"version": "3.11.5"
208280
},
209281
"widgets": {
210282
"application/vnd.jupyter.widget-state+json": {

tests/anoph/test_sample_metadata.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -937,6 +937,36 @@ def test_plot_samples_bar(fixture, api):
937937
assert isinstance(fig, go.Figure)
938938

939939

940+
@parametrize_with_cases("fixture,api", cases=".")
941+
def test_plot_sample_location_mapbox(fixture, api):
942+
# Get test sample_sets.
943+
df_sample_sets = api.sample_sets().set_index("sample_set")
944+
all_sample_sets = df_sample_sets.index.to_list()
945+
sample_sets = random.sample(all_sample_sets, 2)
946+
947+
fig = api.plot_sample_location_mapbox(
948+
sample_sets=sample_sets,
949+
show=False,
950+
)
951+
952+
assert isinstance(fig, go.Figure)
953+
954+
955+
@parametrize_with_cases("fixture,api", cases=".")
956+
def test_plot_sample_location_geo(fixture, api):
957+
# Get test sample_sets.
958+
df_sample_sets = api.sample_sets().set_index("sample_set")
959+
all_sample_sets = df_sample_sets.index.to_list()
960+
sample_sets = random.sample(all_sample_sets, 2)
961+
962+
fig = api.plot_sample_location_geo(
963+
sample_sets=sample_sets,
964+
show=False,
965+
)
966+
967+
assert isinstance(fig, go.Figure)
968+
969+
940970
@parametrize_with_cases("fixture,api", cases=".")
941971
def test_lookup_sample(fixture, api):
942972
# Set up test.

0 commit comments

Comments
 (0)