Skip to content

Commit 5f8e855

Browse files
committed
Last set of easy changes
1 parent a168e03 commit 5f8e855

1 file changed

Lines changed: 10 additions & 12 deletions

File tree

malariagen_data/anoph/hap_frq.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import numpy as np
55
import xarray as xr
66
import allel
7-
import dask.array as da
87
from numpydoc_decorator import doc # type: ignore
98

109
from ..util import check_types, haplotype_frequencies
@@ -95,8 +94,8 @@ def haplotypes_frequencies(
9594

9695
n_samples = np.count_nonzero(loc_coh)
9796
assert n_samples >= min_cohort_size
98-
gt_coh = allel.GenotypeDaskArray(da.compress(loc_coh, gt, axis=1))
99-
gt_hap = gt_coh.to_haplotypes().compute()
97+
gt_coh = gt.compress(loc_coh, axis=1)
98+
gt_hap = gt_coh.to_haplotypes()
10099
f, _, _ = haplotype_frequencies(gt_hap)
101100
# The frequencies of the observed haplotypes are then updated
102101
hap_dict.update(f)
@@ -171,12 +170,6 @@ def haplotypes_frequencies_advanced(
171170
min_cohort_size=min_cohort_size,
172171
)
173172

174-
# Early check for no cohorts.
175-
if len(df_cohorts) == 0:
176-
raise ValueError(
177-
"No cohorts available for the given sample selection parameters and minimum cohort size."
178-
)
179-
180173
# Access haplotypes.
181174
ds_haps = self.haplotypes(
182175
region=region,
@@ -220,9 +213,8 @@ def haplotypes_frequencies_advanced(
220213
hap_nob = {k: 2 * n_samples for k in f_all.keys()}
221214
assert n_samples >= min_cohort_size
222215
sample_indices = group_samples_by_cohort.indices[cohort_key]
223-
loc_coh = [i in sample_indices for i in range(0, gt.shape[1])]
224-
gt_coh = allel.GenotypeDaskArray(da.compress(loc_coh, gt, axis=1))
225-
gt_hap = gt_coh.to_haplotypes().compute()
216+
gt_coh = gt.take(sample_indices, axis=1)
217+
gt_hap = gt_coh.to_haplotypes()
226218
f, c, o = haplotype_frequencies(gt_hap)
227219
# The frequencies and counts of the observed haplotypes are then updated, so are the nobs but the values should actually stay the same
228220
hap_freq.update(f)
@@ -342,6 +334,12 @@ def _build_cohorts_from_sample_grouping(*, group_samples_by_cohort, min_cohort_s
342334
# Apply minimum cohort size.
343335
df_cohorts = df_cohorts.query(f"size >= {min_cohort_size}").reset_index(drop=True)
344336

337+
# Early check for no cohorts.
338+
if len(df_cohorts) == 0:
339+
raise ValueError(
340+
"No cohorts available for the given sample selection parameters and minimum cohort size."
341+
)
342+
345343
return df_cohorts
346344

347345

0 commit comments

Comments
 (0)