Skip to content

Commit 53f5144

Browse files
authored
Merge branch 'master' into GH903-api-introspection
2 parents a07c0fa + 2d3d2f9 commit 53f5144

5 files changed

Lines changed: 57 additions & 29 deletions

File tree

malariagen_data/anoph/dipclust.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -542,10 +542,10 @@ def _dipclust_concat_subplots(
542542
if isinstance(figure, go.Figure):
543543
# This is a figure, access the traces within it.
544544
for trace in range(len(figure["data"])):
545-
fig.append_trace(figure["data"][trace], row=i + 1, col=1)
545+
fig.add_trace(figure["data"][trace], row=i + 1, col=1)
546546
else:
547547
# Assume this is a trace, add directly.
548-
fig.append_trace(figure, row=i + 1, col=1)
548+
fig.add_trace(figure, row=i + 1, col=1)
549549

550550
fig.update_xaxes(visible=False)
551551
fig.update_layout(

malariagen_data/anoph/sample_metadata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,11 +1569,11 @@ def plot_sample_location_mapbox(
15691569
# Sort by `color` column by default, which can be overridden via category_orders.
15701570
df_locations = df_samples[location_columns].drop_duplicates().sort_values(color)
15711571

1572-
fig = px.scatter_mapbox(
1572+
fig = px.scatter_map(
15731573
df_locations,
15741574
lat="latitude",
15751575
lon="longitude",
1576-
mapbox_style="open-street-map",
1576+
map_style="open-street-map",
15771577
zoom=zoom,
15781578
color=color,
15791579
category_orders=category_orders,

malariagen_data/plasmodium.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22

3+
import os
34
import dask.array as da
45
import pandas as pd
56
import xarray
@@ -59,7 +60,7 @@ def sample_metadata(self):
5960
One row per sample.
6061
"""
6162
if self._cache_sample_metadata is None:
62-
path = f"{self._path}/{self.CONF['metadata_path']}"
63+
path = os.path.join(self._path, self.CONF["metadata_path"])
6364
with self._fs.open(path) as f:
6465
self._cache_sample_metadata = pd.read_csv(f, sep="\t", na_values="")
6566
return self._cache_sample_metadata
@@ -74,7 +75,7 @@ def _open_variant_calls_zarr(self):
7475
7576
"""
7677
if self._cache_variant_calls_zarr is None:
77-
path = f"{self._path}/{self.CONF['variant_calls_zarr_path']}"
78+
path = os.path.join(self._path, self.CONF["variant_calls_zarr_path"])
7879
store = _init_zarr_store(fs=self._fs, path=path)
7980
self._cache_variant_calls_zarr = zarr.open_consolidated(store=store)
8081
return self._cache_variant_calls_zarr
@@ -204,7 +205,7 @@ def open_genome(self):
204205
205206
"""
206207
if self._cache_genome is None:
207-
path = f"{self._path}/{self.CONF['reference_path']}"
208+
path = os.path.join(self._path, self.CONF["reference_path"])
208209
store = _init_zarr_store(fs=self._fs, path=path)
209210
self._cache_genome = zarr.open_consolidated(store=store)
210211
return self._cache_genome
@@ -316,7 +317,7 @@ def genome_features(self, attributes=("ID", "Parent", "Name")):
316317
try:
317318
df = self._cache_genome_features[attributes]
318319
except KeyError:
319-
path = f"{self._path}/{self.CONF['annotations_path']}"
320+
path = os.path.join(self._path, self.CONF["annotations_path"])
320321
with self._fs.open(path, mode="rb") as f:
321322
df = _read_gff3(f, compression="gzip")
322323
if attributes is not None:

pyproject.toml

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,10 @@ readme = "README.md"
66
documentation = "https://malariagen.github.io/malariagen-data-python/latest/"
77
repository = "https://github.com/malariagen/malariagen-data-python"
88
authors = [
9-
"Alistair Miles <alistair.miles@sanger.ac.uk>",
10-
"Chris Clarkson <chris.clarkson@sanger.ac.uk>",
11-
"Anastasia Hernandez-Koutoucheva <ah32@sanger.ac.uk>",
12-
"Lee Hart <lee.hart@sanger.ac.uk>",
13-
"Kathryn Murie <km22@sanger.ac.uk>",
14-
"Nace Kranjc <n.kranjc@imperial.ac.uk>",
15-
"Kelly Bennett <kb25@sanger.ac.uk>",
16-
"Jon Brenas <jb52@sanger.ac.uk>",
17-
"Sanjay Nagi <sanjay.nagi@lstmed.ac.uk>",
9+
"Chris Clarkson <Chris.Clarkson@lstmed.ac.uk>",
10+
"Anastasia Hernandez-Koutoucheva <Anastasia.Hernandez-Koutoucheva@lstmed.ac.uk>",
11+
"Kelly Bennett <Kelly.Bennett@lstmed.ac.uk>",
12+
"Jon Brenas <Jon.Brenas@lstmed.ac.uk>",
1813
"Tristan Dennis <Tristan.Dennis@lstmed.ac.uk>",
1914
]
2015
license = "MIT"

tests/anoph/test_dipclust.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,11 @@ def case_af1_sim(af1_sim_fixture, af1_sim_api):
8080
return af1_sim_fixture, af1_sim_api
8181

8282

83+
@pytest.mark.parametrize("sample_query", [None, "sex_call == 'F'"])
8384
@pytest.mark.parametrize("distance_metric", ["cityblock", "euclidean"])
8485
@parametrize_with_cases("fixture,api", cases=".")
8586
def test_plot_diplotype_clustering(
86-
fixture, api: AnophelesDipClustAnalysis, distance_metric
87+
fixture, api: AnophelesDipClustAnalysis, distance_metric, sample_query
8788
):
8889
# Set up test parameters.
8990
all_sample_sets = api.sample_sets()["sample_set"].to_list()
@@ -96,24 +97,32 @@ def test_plot_diplotype_clustering(
9697
"median",
9798
"ward",
9899
)
99-
sample_queries = (None, "sex_call == 'F'")
100100
dipclust_params = dict(
101101
region=fixture.random_region_str(region_size=5000),
102102
sample_sets=[random.choice(all_sample_sets)],
103103
linkage_method=random.choice(linkage_methods),
104104
distance_metric=distance_metric,
105-
sample_query=random.choice(sample_queries),
105+
sample_query=sample_query,
106106
show=False,
107107
)
108108

109+
# Check if any samples match the query.
110+
if sample_query is not None:
111+
df_samples = api.sample_metadata().query(sample_query)
112+
if len(df_samples) == 0:
113+
with pytest.raises(ValueError):
114+
api.plot_diplotype_clustering(**dipclust_params)
115+
return
116+
109117
# Run checks.
110118
api.plot_diplotype_clustering(**dipclust_params)
111119

112120

121+
@pytest.mark.parametrize("sample_query", [None, "sex_call == 'F'"])
113122
@pytest.mark.parametrize("distance_metric", ["cityblock", "euclidean"])
114123
@parametrize_with_cases("fixture,api", cases=".")
115124
def test_plot_diplotype_clustering_advanced(
116-
fixture, api: AnophelesDipClustAnalysis, distance_metric
125+
fixture, api: AnophelesDipClustAnalysis, distance_metric, sample_query
117126
):
118127
# Set up test parameters.
119128
all_sample_sets = api.sample_sets()["sample_set"].to_list()
@@ -126,24 +135,32 @@ def test_plot_diplotype_clustering_advanced(
126135
"median",
127136
"ward",
128137
)
129-
sample_queries = (None, "sex_call == 'F'")
130138
dipclust_params = dict(
131139
region=fixture.random_region_str(region_size=5000),
132140
sample_sets=[random.choice(all_sample_sets)],
133141
linkage_method=random.choice(linkage_methods),
134142
distance_metric=distance_metric,
135-
sample_query=random.choice(sample_queries),
143+
sample_query=sample_query,
136144
show=False,
137145
)
138146

147+
# Check if any samples match the query.
148+
if sample_query is not None:
149+
df_samples = api.sample_metadata().query(sample_query)
150+
if len(df_samples) == 0:
151+
with pytest.raises(ValueError):
152+
api.plot_diplotype_clustering_advanced(**dipclust_params)
153+
return
154+
139155
# Run checks.
140156
api.plot_diplotype_clustering_advanced(**dipclust_params)
141157

142158

159+
@pytest.mark.parametrize("sample_query", [None, "sex_call == 'F'"])
143160
@pytest.mark.parametrize("n", [1, 2])
144161
@parametrize_with_cases("fixture,api", cases=".")
145162
def test_plot_diplotype_clustering_advanced_with_transcript(
146-
fixture, api: AnophelesDipClustAnalysis, n
163+
fixture, api: AnophelesDipClustAnalysis, n, sample_query
147164
):
148165
# Set up test parameters.
149166
contig = fixture.random_contig()
@@ -158,24 +175,32 @@ def test_plot_diplotype_clustering_advanced_with_transcript(
158175
"median",
159176
"ward",
160177
)
161-
sample_queries = (None, "sex_call == 'F'")
162178
dipclust_params = dict(
163179
region=contig,
164180
snp_transcript=transcripts,
165181
sample_sets=[random.choice(all_sample_sets)],
166182
linkage_method=random.choice(linkage_methods),
167183
distance_metric="cityblock",
168-
sample_query=random.choice(sample_queries),
184+
sample_query=sample_query,
169185
show=False,
170186
)
171187

188+
# Check if any samples match the query.
189+
if sample_query is not None:
190+
df_samples = api.sample_metadata().query(sample_query)
191+
if len(df_samples) == 0:
192+
with pytest.raises(ValueError):
193+
api.plot_diplotype_clustering_advanced(**dipclust_params)
194+
return
195+
172196
# Run checks.
173197
api.plot_diplotype_clustering_advanced(**dipclust_params)
174198

175199

200+
@pytest.mark.parametrize("sample_query", [None, "sex_call == 'F'"])
176201
@parametrize_with_cases("fixture,api", cases=".")
177202
def test_plot_diplotype_clustering_advanced_with_cnv_region(
178-
fixture, api: AnophelesDipClustAnalysis
203+
fixture, api: AnophelesDipClustAnalysis, sample_query
179204
):
180205
# Set up test parameters.
181206
region = fixture.random_region_str(region_size=5000)
@@ -189,16 +214,23 @@ def test_plot_diplotype_clustering_advanced_with_cnv_region(
189214
"median",
190215
"ward",
191216
)
192-
sample_queries = (None, "sex_call == 'F'")
193217
dipclust_params = dict(
194218
region=region,
195219
cnv_region=region,
196220
sample_sets=[random.choice(all_sample_sets)],
197221
linkage_method=random.choice(linkage_methods),
198222
distance_metric="cityblock",
199-
sample_query=random.choice(sample_queries),
223+
sample_query=sample_query,
200224
show=False,
201225
)
202226

227+
# Check if any samples match the query.
228+
if sample_query is not None:
229+
df_samples = api.sample_metadata().query(sample_query)
230+
if len(df_samples) == 0:
231+
with pytest.raises(ValueError):
232+
api.plot_diplotype_clustering_advanced(**dipclust_params)
233+
return
234+
203235
# Run checks.
204236
api.plot_diplotype_clustering_advanced(**dipclust_params)

0 commit comments

Comments
 (0)