Skip to content

Commit bb81ca5

Browse files
authored
Merge pull request #870 from blankirigaya/master
Fix g123 test failures on Adir1/Amin1 by falling back to segregating sites when no phasing analysis is available
2 parents 71c0b23 + 8b6f4f0 commit bb81ca5

5 files changed

Lines changed: 71 additions & 10 deletions

File tree

malariagen_data/amin1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def __init__(
105105
site_filters_analysis=site_filters_analysis,
106106
discordant_read_calls_analysis=discordant_read_calls_analysis,
107107
default_site_mask="minimus",
108-
default_phasing_analysis="minimus_noneyet",
108+
default_phasing_analysis=None,
109109
default_coverage_calls_analysis="minimus_noneyet",
110110
bokeh_output_notebook=bokeh_output_notebook,
111111
results_cache=results_cache,

malariagen_data/anoph/g123.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def g123_gwss(
153153
self,
154154
contig: base_params.contig,
155155
window_size: g123_params.window_size,
156-
sites: g123_params.sites = base_params.DEFAULT,
156+
sites: g123_params.sites = g123_params.DEFAULT_SITE_PARAMETER,
157157
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
158158
sample_sets: Optional[base_params.sample_sets] = None,
159159
sample_query: Optional[base_params.sample_query] = None,
@@ -172,9 +172,6 @@ def g123_gwss(
172172
# invalidate any previously cached data.
173173
name = "g123_gwss_v1"
174174

175-
if sites == base_params.DEFAULT:
176-
assert self._default_phasing_analysis is not None
177-
sites = self._default_phasing_analysis
178175
valid_sites = self.phasing_analysis_ids + ("all", "segregating")
179176
if sites not in valid_sites:
180177
raise ValueError(
@@ -259,7 +256,7 @@ def _g123_calibration(
259256
def g123_calibration(
260257
self,
261258
contig: base_params.contig,
262-
sites: g123_params.sites = base_params.DEFAULT,
259+
sites: g123_params.sites = g123_params.DEFAULT_SITE_PARAMETER,
263260
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
264261
sample_query: Optional[base_params.sample_query] = None,
265262
sample_query_options: Optional[base_params.sample_query_options] = None,
@@ -279,6 +276,12 @@ def g123_calibration(
279276
# invalidate any previously cached data.
280277
name = "g123_calibration_v1"
281278

279+
valid_sites = self.phasing_analysis_ids + ("all", "segregating")
280+
if sites not in valid_sites:
281+
raise ValueError(
282+
f"Invalid value for `sites` parameter, must be one of {valid_sites}."
283+
)
284+
282285
params = dict(
283286
contig=contig,
284287
sites=sites,
@@ -314,7 +317,7 @@ def plot_g123_gwss_track(
314317
self,
315318
contig: base_params.contig,
316319
window_size: g123_params.window_size,
317-
sites: g123_params.sites = base_params.DEFAULT,
320+
sites: g123_params.sites = g123_params.DEFAULT_SITE_PARAMETER,
318321
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
319322
sample_sets: Optional[base_params.sample_sets] = None,
320323
sample_query: Optional[base_params.sample_query] = None,
@@ -417,7 +420,7 @@ def plot_g123_gwss(
417420
self,
418421
contig: base_params.contig,
419422
window_size: g123_params.window_size,
420-
sites: g123_params.sites = base_params.DEFAULT,
423+
sites: g123_params.sites = g123_params.DEFAULT_SITE_PARAMETER,
421424
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
422425
sample_sets: Optional[base_params.sample_sets] = None,
423426
sample_query: Optional[base_params.sample_query] = None,

malariagen_data/anoph/g123_params.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
""",
3535
]
3636

37+
DEFAULT_SITE_PARAMETER: sites = "segregating"
38+
3739
min_cohort_size_default: base_params.min_cohort_size = 20
3840

3941
max_cohort_size_default: base_params.max_cohort_size = 50

tests/anoph/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2464,7 +2464,7 @@ def init_config(self):
24642464
"DEFAULT_COHORTS_ANALYSIS": "20250710",
24652465
"DEFAULT_DISCORDANT_READ_CALLS_ANALYSIS": "",
24662466
"SITE_MASK_IDS": ["dirus"],
2467-
"PHASING_ANALYSIS_IDS": ["dirus_noneyet"],
2467+
"PHASING_ANALYSIS_IDS": [],
24682468
}
24692469
config_path = self.bucket_path / "v1.0-config.json"
24702470
with config_path.open(mode="w") as f:
@@ -2786,7 +2786,7 @@ def init_config(self):
27862786
"DEFAULT_COHORTS_ANALYSIS": "20251019",
27872787
"DEFAULT_DISCORDANT_READ_CALLS_ANALYSIS": "",
27882788
"SITE_MASK_IDS": ["minimus"],
2789-
"PHASING_ANALYSIS_IDS": ["minimus_noneyet"],
2789+
"PHASING_ANALYSIS_IDS": [],
27902790
}
27912791
config_path = self.bucket_path / "v1.0-config.json"
27922792
with config_path.open(mode="w") as f:

tests/anoph/test_g123.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from malariagen_data import af1 as _af1
88
from malariagen_data import ag3 as _ag3
9+
from malariagen_data import adir1 as _adir1
10+
from malariagen_data import amin1 as _amin1
911
from malariagen_data.anoph.g123 import AnophelesG123Analysis
1012

1113

@@ -56,6 +58,44 @@ def af1_sim_api(af1_sim_fixture):
5658
)
5759

5860

61+
@pytest.fixture
62+
def adir1_sim_api(adir1_sim_fixture):
63+
return AnophelesG123Analysis(
64+
url=adir1_sim_fixture.url,
65+
public_url=adir1_sim_fixture.url,
66+
config_path=_adir1.CONFIG_PATH,
67+
major_version_number=_adir1.MAJOR_VERSION_NUMBER,
68+
major_version_path=_adir1.MAJOR_VERSION_PATH,
69+
pre=False,
70+
gff_gene_type="protein_coding_gene",
71+
gff_gene_name_attribute="Note",
72+
gff_default_attributes=("ID", "Parent", "Note", "description"),
73+
default_site_mask="dirus",
74+
results_cache=adir1_sim_fixture.results_cache_path.as_posix(),
75+
taxon_colors=_adir1.TAXON_COLORS,
76+
default_phasing_analysis=None,
77+
)
78+
79+
80+
@pytest.fixture
81+
def amin1_sim_api(amin1_sim_fixture):
82+
return AnophelesG123Analysis(
83+
url=amin1_sim_fixture.url,
84+
public_url=amin1_sim_fixture.url,
85+
config_path=_amin1.CONFIG_PATH,
86+
major_version_number=_amin1.MAJOR_VERSION_NUMBER,
87+
major_version_path=_amin1.MAJOR_VERSION_PATH,
88+
pre=False,
89+
gff_gene_type="protein_coding_gene",
90+
gff_gene_name_attribute="Note",
91+
gff_default_attributes=("ID", "Parent", "Note", "description"),
92+
default_site_mask="minimus",
93+
results_cache=amin1_sim_fixture.results_cache_path.as_posix(),
94+
taxon_colors=_amin1.TAXON_COLORS,
95+
default_phasing_analysis=None,
96+
)
97+
98+
5999
# N.B., here we use pytest_cases to parametrize tests. Each
60100
# function whose name begins with "case_" defines a set of
61101
# inputs to the test functions. See the documentation for
@@ -76,6 +116,14 @@ def case_af1_sim(af1_sim_fixture, af1_sim_api):
76116
return af1_sim_fixture, af1_sim_api
77117

78118

119+
def case_adir1_sim(adir1_sim_fixture, adir1_sim_api):
120+
return adir1_sim_fixture, adir1_sim_api
121+
122+
123+
def case_amin1_sim(amin1_sim_fixture, amin1_sim_api):
124+
return amin1_sim_fixture, amin1_sim_api
125+
126+
79127
def check_g123_gwss(*, api, g123_params):
80128
# Run main gwss function under test.
81129
x, g123 = api.g123_gwss(**g123_params)
@@ -115,6 +163,10 @@ def test_g123_gwss_with_default_sites(fixture, api: AnophelesG123Analysis):
115163

116164
@parametrize_with_cases("fixture,api", cases=".")
117165
def test_g123_gwss_with_phased_sites(fixture, api: AnophelesG123Analysis):
166+
# Skip if this dataset has no phasing analyses (e.g., Adir1, Amin1).
167+
if not api.phasing_analysis_ids:
168+
pytest.skip("No phasing analyses available for this dataset.")
169+
118170
# Set up test parameters.
119171
all_sample_sets = api.sample_sets()["sample_set"].to_list()
120172
g123_params = dict(
@@ -182,6 +234,10 @@ def test_g123_gwss_with_bad_sites(fixture, api: AnophelesG123Analysis):
182234

183235
@parametrize_with_cases("fixture,api", cases=".")
184236
def test_g123_calibration(fixture, api: AnophelesG123Analysis):
237+
# Skip if this dataset has no phasing analyses (e.g., Adir1, Amin1).
238+
if not api.phasing_analysis_ids:
239+
pytest.skip("No phasing analyses available for this dataset.")
240+
185241
# Set up test parameters.
186242
all_sample_sets = api.sample_sets()["sample_set"].to_list()
187243
window_sizes = np.random.randint(100, 500, size=random.randint(2, 5)).tolist()

0 commit comments

Comments
 (0)