Skip to content

Commit 57c17d4

Browse files
committed
fixing and adding tests and remove duplicate assertions
1 parent eed5276 commit 57c17d4

3 files changed

Lines changed: 38 additions & 12 deletions

File tree

malariagen_data/anoph/hapclust.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,12 +262,15 @@ def haplotype_pairwise_distances(
262262
from scipy.spatial.distance import squareform
263263

264264
dist_square = squareform(dist)
265+
# Each phased sample contributes 2 haplotypes; create
266+
# haplotype-level labels to match the distance matrix.
267+
hap_labels = np.repeat(phased_samples, 2)
265268
ds = xr.Dataset(
266269
data_vars={
267270
"dist": (("sample_x", "sample_y"), dist_square),
268271
},
269272
coords={
270-
"sample_id": ("sample_x", phased_samples),
273+
"sample_id": ("sample_x", hap_labels),
271274
},
272275
attrs={"n_snps": n_snps},
273276
)

tests/anoph/test_distance.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,17 +124,6 @@ def check_biallelic_diplotype_pairwise_distance(*, api, data_params, metric):
124124
assert n_snps_used >= n_snps
125125
assert n_snps_used <= n_snps_available
126126

127-
assert isinstance(dist, np.ndarray)
128-
assert isinstance(samples, np.ndarray)
129-
assert isinstance(n_snps_used, int)
130-
131-
assert dist.ndim == 1 # condensed form distance matrix
132-
assert dist.shape[0] == int((n_samples * (n_samples - 1)) / 2)
133-
assert samples.ndim == 1
134-
assert samples.shape[0] == n_samples
135-
assert n_snps_used >= n_snps
136-
assert n_snps_used <= n_snps_available
137-
138127
ds = api.biallelic_diplotype_pairwise_distances(
139128
n_snps=n_snps,
140129
metric=metric,

tests/anoph/test_hapclust.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,37 @@ def test_plot_haplotype_sharing_chord(fixture, api: AnophelesHapClustAnalysis):
124124
show=False,
125125
)
126126
assert fig is not None
127+
128+
129+
@parametrize_with_cases("fixture,api", cases=".")
130+
def test_haplotype_pairwise_distances(fixture, api: AnophelesHapClustAnalysis):
131+
import xarray as xr
132+
133+
all_sample_sets = api.sample_sets()["sample_set"].to_list()
134+
region = fixture.random_region_str(region_size=5000)
135+
sample_sets = [str(np.random.choice(all_sample_sets))]
136+
137+
# Test legacy tuple return.
138+
dist, phased_samples, n_snps = api.haplotype_pairwise_distances(
139+
region=region,
140+
sample_sets=sample_sets,
141+
)
142+
assert isinstance(dist, np.ndarray)
143+
assert isinstance(phased_samples, np.ndarray)
144+
assert isinstance(n_snps, int)
145+
assert dist.ndim == 1 # condensed form
146+
# Each phased sample contributes 2 haplotypes.
147+
n_haps = 2 * len(phased_samples)
148+
assert dist.shape[0] == int((n_haps * (n_haps - 1)) / 2)
149+
150+
# Test dataset return mode.
151+
ds = api.haplotype_pairwise_distances(
152+
region=region,
153+
sample_sets=sample_sets,
154+
return_dataset=True,
155+
)
156+
assert isinstance(ds, xr.Dataset)
157+
assert "dist" in ds
158+
assert "sample_id" in ds.coords
159+
assert ds["dist"].shape == (n_haps, n_haps)
160+
assert ds.attrs["n_snps"] == n_snps

0 commit comments

Comments
 (0)