Skip to content

Commit fbf4039

Browse files
committed
Added some spinners and progresses
1 parent 758f42a commit fbf4039

1 file changed

Lines changed: 29 additions & 21 deletions

File tree

malariagen_data/anoph/hap_freq.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -71,28 +71,32 @@ def haplotype_frequencies(
7171
"No cohorts available for the given sample selection parameters and minimum cohort size."
7272
)
7373

74-
# Access SNP data.
75-
ds_hap = self.haplotypes(
76-
region=region,
77-
sample_sets=sample_sets,
78-
sample_query=sample_query,
79-
chunks=chunks,
80-
inline_array=inline_array,
81-
)
74+
# Access haplotypes.
75+
with self._dask_progress(desc="Load haplotypes"):
76+
ds_hap = self.haplotypes(
77+
region=region,
78+
sample_sets=sample_sets,
79+
sample_query=sample_query,
80+
chunks=chunks,
81+
inline_array=inline_array,
82+
)
8283

8384
# Early check for no SNPs.
8485
if ds_hap.sizes["variants"] == 0: # pragma: no cover
8586
raise ValueError("No SNPs available for the given region.")
8687

8788
# Access genotypes.
8889
gt = ds_hap["call_genotype"].data
89-
gt = gt.compute()
90+
with self._dask_progress(desc="Compute haplotypes"):
91+
gt = gt.compute()
9092

9193
# Count haplotypes.
9294
count_rows: dict[str, int] = dict()
9395
freq_rows = dict()
9496
freq_cols = dict()
95-
cohorts_iterator = coh_dict.items()
97+
cohorts_iterator = self._progress(
98+
coh_dict.items(), desc="Compute allele frequencies"
99+
)
96100
for coh, loc_coh in cohorts_iterator:
97101
count_rows = {k: 0 for k in count_rows.keys()}
98102
n_samples = np.count_nonzero(loc_coh)
@@ -185,31 +189,35 @@ def haplotype_frequencies_advanced(
185189
"No cohorts available for the given sample selection parameters and minimum cohort size."
186190
)
187191

188-
# Access SNP calls.
189-
ds_haps = self.haplotypes(
190-
region=region,
191-
sample_sets=sample_sets,
192-
sample_query=sample_query,
193-
chunks=chunks,
194-
inline_array=inline_array,
195-
)
192+
# Access haplotypes.
193+
with self._dask_progress(desc="Load haplotypes"):
194+
ds_haps = self.haplotypes(
195+
region=region,
196+
sample_sets=sample_sets,
197+
sample_query=sample_query,
198+
chunks=chunks,
199+
inline_array=inline_array,
200+
)
196201

197202
# Early check for no SNPs.
198203
if ds_haps.sizes["variants"] == 0: # pragma: no cover
199204
raise ValueError("No SNPs available for the given region.")
200205

201206
# Access genotypes.
202207
gt = ds_haps["call_genotype"].data
203-
gt = gt.compute()
208+
with self._dask_progress(desc="Compute haplotypes"):
209+
gt = gt.compute()
204210

205211
# Count haplotypes.
206212
count_rows: dict[str, int] = dict()
207213
freq_rows = dict()
208214
freq_cols = dict()
209215
count_cols = dict()
210216
nobs_cols = dict()
211-
cohorts_iterator = enumerate(df_cohorts.itertuples())
212-
for coh, cohort in cohorts_iterator:
217+
cohorts_iterator = self._progress(
218+
df_cohorts.itertuples(), desc="Compute allele frequencies"
219+
)
220+
for cohort in cohorts_iterator:
213221
cohort_key = cohort.taxon, cohort.area, cohort.period
214222
cohort_key_str = cohort.taxon + "_" + cohort.area + "_" + str(cohort.period)
215223
count_rows = {k: 0 for k in count_rows.keys()}

0 commit comments

Comments
 (0)