Skip to content

Commit 12ac956

Browse files
authored
Merge branch 'master' into 367-haps-freq
2 parents 2d85d2e + e3166e8 commit 12ac956

28 files changed

Lines changed: 2848 additions & 41039 deletions

docs/source/Af1.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ Sample metadata access
5353
count_samples
5454
plot_samples_bar
5555
plot_samples_interactive_map
56+
plot_sample_location_mapbox
57+
plot_sample_location_geo
5658
wgs_data_catalog
5759
cohorts
5860

@@ -132,6 +134,7 @@ Genetic distance and neighbour-joining trees (NJT)
132134
:toctree: generated/
133135

134136
plot_njt
137+
njt
135138
biallelic_diplotype_pairwise_distances
136139

137140
Heterozygosity analysis
@@ -161,6 +164,8 @@ Genome-wide selection scans
161164
plot_h12_calibration
162165
h12_gwss
163166
plot_h12_gwss
167+
plot_h12_gwss_multi_panel
168+
plot_h12_gwss_multi_overlay
164169
h1x_gwss
165170
plot_h1x_gwss
166171
g123_calibration

docs/source/Ag3.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Sample metadata access
5454
lookup_sample
5555
plot_samples_bar
5656
plot_samples_interactive_map
57+
plot_sample_location_mapbox
58+
plot_sample_location_geo
5759
wgs_data_catalog
5860
cohorts
5961

@@ -142,6 +144,7 @@ Genetic distance and neighbour-joining trees (NJT)
142144
:toctree: generated/
143145

144146
plot_njt
147+
njt
145148
biallelic_diplotype_pairwise_distances
146149

147150
Heterozygosity analysis
@@ -171,6 +174,8 @@ Genome-wide selection scans
171174
plot_h12_calibration
172175
h12_gwss
173176
plot_h12_gwss
177+
plot_h12_gwss_multi_panel
178+
plot_h12_gwss_multi_overlay
174179
h1x_gwss
175180
plot_h1x_gwss
176181
g123_calibration

malariagen_data/af1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class Af1(AnophelesDataResource):
4040
debug : bool, optional
4141
Set to True to enable debug level logging.
4242
show_progress : bool, optional
43-
If True, show a progress bar during longer-running computations.
43+
If True, show a progress bar during longer-running computations. The default can be overridden using an environmental variable named MGEN_SHOW_PROGRESS.
4444
check_location : bool, optional
4545
If True, use ipinfo to check the location of the client system.
4646
**kwargs
@@ -82,7 +82,7 @@ def __init__(
8282
results_cache=None,
8383
log=sys.stdout,
8484
debug=False,
85-
show_progress=True,
85+
show_progress=None,
8686
check_location=True,
8787
cohorts_analysis=None,
8888
site_filters_analysis=None,

malariagen_data/ag3.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ class Ag3(AnophelesDataResource):
112112
debug : bool, optional
113113
Set to True to enable debug level logging.
114114
show_progress : bool, optional
115-
If True, show a progress bar during longer-running computations.
115+
If True, show a progress bar during longer-running computations. The default can be overridden using an environmental variable named MGEN_SHOW_PROGRESS.
116116
check_location : bool, optional
117117
If True, use ipinfo to check the location of the client system.
118118
**kwargs
@@ -154,7 +154,7 @@ def __init__(
154154
results_cache=None,
155155
log=sys.stdout,
156156
debug=False,
157-
show_progress=True,
157+
show_progress=None,
158158
check_location=True,
159159
cohorts_analysis=None,
160160
aim_analysis=None,
@@ -378,6 +378,7 @@ def karyotype(
378378
inversion: inversion_param,
379379
sample_sets: Optional[base_params.sample_sets] = None,
380380
sample_query: Optional[base_params.sample_query] = None,
381+
sample_query_options: Optional[base_params.sample_query_options] = None,
381382
) -> pd.DataFrame:
382383
# load tag snp data
383384
df_tagsnps = self.load_inversion_tags(inversion=inversion)
@@ -390,19 +391,25 @@ def karyotype(
390391
region = f"{contig}:{start}-{end}"
391392

392393
ds_snps = self.snp_calls(
393-
region=region, sample_sets=sample_sets, sample_query=sample_query
394+
region=region,
395+
sample_sets=sample_sets,
396+
sample_query=sample_query,
397+
sample_query_options=sample_query_options,
394398
)
395-
geno = allel.GenotypeDaskArray(ds_snps["call_genotype"].data)
396-
pos = allel.SortedIndex(ds_snps["variant_position"].values)
397-
samples = ds_snps["sample_id"].values
398-
alts = ds_snps["variant_allele"].values.astype(str)
399-
400-
# subset to position of inversion tags
401-
mask = pos.locate_intersection(inversion_pos)[0]
402-
alts = alts[mask]
403-
geno = geno.compress(mask, axis=0).compute()
404399

405400
with self._spinner("Inferring karyotype from tag SNPs"):
401+
# access variables we need
402+
geno = allel.GenotypeDaskArray(ds_snps["call_genotype"].data)
403+
pos = allel.SortedIndex(ds_snps["variant_position"].values)
404+
samples = ds_snps["sample_id"].values
405+
alts = ds_snps["variant_allele"].values.astype(str)
406+
407+
# subset to position of inversion tags
408+
mask = pos.locate_intersection(inversion_pos)[0]
409+
alts = alts[mask]
410+
geno = geno.compress(mask, axis=0).compute()
411+
412+
# infer karyotype
406413
gn_alt = _karyotype_tags_n_alt(
407414
gt=geno, alts=alts, inversion_alts=inversion_alts
408415
)
@@ -422,7 +429,8 @@ def karyotype(
422429
"total_tag_snps": total_sites,
423430
},
424431
)
425-
kt_dtype = CategoricalDtype(categories=[0, 1, 2], ordered=True)
432+
# Allow filling missing values with "<NA>" visible placeholder.
433+
kt_dtype = CategoricalDtype(categories=[0, 1, 2, "<NA>"], ordered=True)
426434
df[f"karyotype_{inversion}"] = df[f"karyotype_{inversion}"].astype(kt_dtype)
427435

428436
return df

malariagen_data/anoph/base.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os
2+
13
import json
24
from contextlib import nullcontext
35
from datetime import date
@@ -54,12 +56,24 @@ def __init__(
5456
bokeh_output_notebook: bool = False,
5557
log: Optional[Union[str, IO]] = None,
5658
debug: bool = False,
57-
show_progress: bool = False,
59+
show_progress: Optional[bool] = None,
5860
check_location: bool = False,
5961
storage_options: Optional[Mapping] = None,
6062
results_cache: Optional[str] = None,
6163
tqdm_class=None,
6264
):
65+
# If show_progress has not been specified, then determine the default.
66+
if show_progress is None:
67+
# Get the env var, if it exists.
68+
show_progress_env = os.getenv("MGEN_SHOW_PROGRESS")
69+
70+
# If the env var does not exist, then use the class default.
71+
# Otherwise, convert the env var value to a boolean and use that.
72+
if show_progress_env is None:
73+
show_progress = True
74+
else:
75+
show_progress = show_progress_env.lower() in ("true", "1", "yes", "on")
76+
6377
self._config_path = config_path
6478
self._pre = pre
6579
self._gcs_default_url = gcs_default_url

malariagen_data/anoph/dipclust_params.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Parameters for diplotype clustering functions."""
22

3-
from .diplotype_distance_params import distance_metric
3+
from .distance_params import distance_metric
44
from .clustering_params import linkage_method
55

66

0 commit comments

Comments
 (0)