Skip to content

Commit 8e84a28

Browse files
authored
Merge branch 'master' into standardise-biallelic-diplotypes-471
2 parents 2cda57b + 51bd38f commit 8e84a28

8 files changed

Lines changed: 56 additions & 6 deletions

File tree

malariagen_data/adir1.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
"dirus": TAXON_PALETTE[0],
2020
}
2121

22+
XPEHH_GWSS_CACHE_NAME = "adir1_xpehh_gwss_v1"
23+
IHS_GWSS_CACHE_NAME = "adir1_ihs_gwss_v1"
24+
ROH_HMM_CACHE_NAME = "adir1_roh_hmm_v1"
25+
2226

2327
class Adir1(AnophelesDataResource):
2428
"""Provides access to data from Adir1.0 releases.
@@ -71,6 +75,10 @@ class Adir1(AnophelesDataResource):
7175
7276
"""
7377

78+
_xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME
79+
_ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME
80+
_roh_hmm_cache_name = ROH_HMM_CACHE_NAME
81+
7482
def __init__(
7583
self,
7684
url=None,

malariagen_data/af1.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
"funestus": TAXON_PALETTE[0],
2222
}
2323

24+
XPEHH_GWSS_CACHE_NAME = "af1_xpehh_gwss_v1"
25+
IHS_GWSS_CACHE_NAME = "af1_ihs_gwss_v1"
26+
ROH_HMM_CACHE_NAME = "af1_roh_hmm_v1"
27+
2428

2529
class Af1(AnophelesDataResource):
2630
"""Provides access to data from Af1.x releases.
@@ -75,6 +79,7 @@ class Af1(AnophelesDataResource):
7579

7680
_xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME
7781
_ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME
82+
_roh_hmm_cache_name = ROH_HMM_CACHE_NAME
7883

7984
def __init__(
8085
self,

malariagen_data/ag3.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ def _setup_aim_palettes():
9595
"aim_species": "object",
9696
}
9797

98+
XPEHH_GWSS_CACHE_NAME = "ag3_xpehh_gwss_v1"
99+
IHS_GWSS_CACHE_NAME = "ag3_ihs_gwss_v1"
100+
ROH_HMM_CACHE_NAME = "ag3_roh_hmm_v1"
101+
98102

99103
class Ag3(AnophelesDataResource):
100104
"""Provides access to data from Ag3.x releases.
@@ -153,6 +157,7 @@ class Ag3(AnophelesDataResource):
153157

154158
_xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME
155159
_ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME
160+
_roh_hmm_cache_name = ROH_HMM_CACHE_NAME
156161

157162
def __init__(
158163
self,

malariagen_data/amin1.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
"dirus": TAXON_PALETTE[0],
2020
}
2121

22+
XPEHH_GWSS_CACHE_NAME = "amin1_xpehh_gwss_v1"
23+
IHS_GWSS_CACHE_NAME = "amin1_ihs_gwss_v1"
24+
ROH_HMM_CACHE_NAME = "amin1_roh_hmm_v1"
25+
2226

2327
class Amin1(AnophelesDataResource):
2428
"""Provides access to data from Amin1.0 releases.
@@ -71,8 +75,9 @@ class Amin1(AnophelesDataResource):
7175
7276
"""
7377

74-
# _xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME
75-
# _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME
78+
_xpehh_gwss_cache_name = XPEHH_GWSS_CACHE_NAME
79+
_ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME
80+
_roh_hmm_cache_name = ROH_HMM_CACHE_NAME
7681

7782
def __init__(
7883
self,

malariagen_data/anoph/fst.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def average_fst(
363363
region: base_params.region,
364364
cohort1_query: base_params.sample_query,
365365
cohort2_query: base_params.sample_query,
366-
sample_query_options: Optional[base_params.sample_query] = None,
366+
sample_query_options: Optional[base_params.sample_query_options] = None,
367367
sample_sets: Optional[base_params.sample_sets] = None,
368368
cohort_size: Optional[base_params.cohort_size] = fst_params.cohort_size_default,
369369
min_cohort_size: Optional[

malariagen_data/anoph/g123.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,13 @@ def _g123_gwss(
133133
chunks=chunks,
134134
)
135135

136+
if gt.shape[0] < window_size:
137+
raise ValueError(
138+
f"Not enough sites ({gt.shape[0]}) for window size "
139+
f"({window_size}). Please reduce the window size or "
140+
f"use different site selection criteria."
141+
)
142+
136143
with self._spinner("Compute G123"):
137144
g123 = allel.moving_statistic(gt, statistic=_garud_g123, size=window_size)
138145
x = allel.moving_statistic(pos, statistic=np.mean, size=window_size)
@@ -240,6 +247,12 @@ def _g123_calibration(
240247

241248
calibration_runs: Dict[str, np.ndarray] = dict()
242249
for window_size in self._progress(window_sizes, desc="Compute G123"):
250+
if gt.shape[0] < window_size:
251+
raise ValueError(
252+
f"Not enough sites ({gt.shape[0]}) for window size "
253+
f"({window_size}). Please reduce the window size or "
254+
f"use different site selection criteria."
255+
)
243256
g123 = allel.moving_statistic(gt, statistic=_garud_g123, size=window_size)
244257
calibration_runs[str(window_size)] = g123
245258

malariagen_data/veff.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,21 @@ def _get_within_intron_effect(base_effect, intron):
548548
effect = base_effect._replace(effect="INTRONIC", impact="MODIFIER")
549549

550550
else:
551-
# TODO intronic INDELs and MNPs
552-
effect = base_effect._replace(effect="TODO intronic indels and MNPs")
551+
# INDELs and MNPs — use the closest edge of the variant to the splice site
552+
if strand == "+":
553+
dist_5prime = ref_start - (intron_start - 1)
554+
dist_3prime = -(ref_stop - (intron_stop + 1))
555+
else:
556+
dist_5prime = (intron_stop + 1) - ref_stop
557+
dist_3prime = -((intron_start - 1) - ref_start)
558+
559+
indel_min_dist = min(dist_5prime, dist_3prime)
560+
561+
if indel_min_dist <= 2:
562+
effect = base_effect._replace(effect="SPLICE_CORE", impact="HIGH")
563+
elif indel_min_dist <= 7:
564+
effect = base_effect._replace(effect="SPLICE_REGION", impact="MODERATE")
565+
else:
566+
effect = base_effect._replace(effect="INTRONIC", impact="MODIFIER")
553567

554568
return effect

tests/anoph/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2260,7 +2260,7 @@ def init_hap_sites(self):
22602260
path=path,
22612261
contigs=self.contigs,
22622262
snp_sites=self.snp_sites,
2263-
p_site=np.random.random(),
2263+
p_site=np.random.uniform(0.5, 1.0),
22642264
)
22652265

22662266
def init_haplotypes(self):

0 commit comments

Comments
 (0)