Skip to content

Commit 03b9c86

Browse files
committed
use dynamic window_size
1 parent 7f879d2 commit 03b9c86

1 file changed

Lines changed: 43 additions & 45 deletions

File tree

tests/anoph/test_xpehh.py

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -61,30 +61,56 @@ def case_af1_sim(af1_sim_fixture, af1_sim_api):
6161
return af1_sim_fixture, af1_sim_api
6262

6363

64-
@parametrize_with_cases("fixture,api", cases=".")
65-
def test_xpehh_gwss(fixture, api: AnophelesXpehhAnalysis):
66-
# Set up test parameters.
64+
def _setup_cohorts(api):
65+
"""Helper to set up contig, sample_set, and cohort queries."""
6766
contig = str(np.random.choice(api.contigs))
6867
all_sample_sets = api.sample_sets()["sample_set"].to_list()
6968
sample_set = str(np.random.choice(all_sample_sets))
7069
df_samples = api.sample_metadata(sample_sets=sample_set)
7170

72-
# Need at least 2 samples for two cohorts.
7371
if len(df_samples) < 2:
7472
pytest.skip("Not enough samples for two cohorts")
7573

7674
sample_ids = df_samples["sample_id"].to_list()
7775
mid = len(sample_ids) // 2
7876
cohort1_query = f"sample_id in {sample_ids[:mid]}"
7977
cohort2_query = f"sample_id in {sample_ids[mid:]}"
78+
return contig, sample_set, cohort1_query, cohort2_query
8079

81-
# Run function under test.
80+
81+
def _safe_window_size(api, contig, sample_set, cohort1_query, cohort2_query):
82+
"""Determine a safe window_size by first running without windowing."""
83+
x_raw, _ = api.xpehh_gwss(
84+
contig=contig,
85+
sample_sets=sample_set,
86+
cohort1_query=cohort1_query,
87+
cohort2_query=cohort2_query,
88+
window_size=None,
89+
min_cohort_size=2,
90+
)
91+
n_variants = len(x_raw)
92+
if n_variants < 2:
93+
pytest.skip(f"Only {n_variants} variants available, need at least 2")
94+
# Use half the available variants, clamped to [2, n_variants].
95+
return max(2, n_variants // 2)
96+
97+
98+
@parametrize_with_cases("fixture,api", cases=".")
99+
def test_xpehh_gwss(fixture, api: AnophelesXpehhAnalysis):
100+
contig, sample_set, cohort1_query, cohort2_query = _setup_cohorts(api)
101+
102+
# Determine a safe window size from the actual data.
103+
window_size = _safe_window_size(
104+
api, contig, sample_set, cohort1_query, cohort2_query
105+
)
106+
107+
# Run function under test with windowing.
82108
x, xpehh = api.xpehh_gwss(
83109
contig=contig,
84110
sample_sets=sample_set,
85111
cohort1_query=cohort1_query,
86112
cohort2_query=cohort2_query,
87-
window_size=5,
113+
window_size=window_size,
88114
min_cohort_size=2,
89115
)
90116

@@ -98,19 +124,7 @@ def test_xpehh_gwss(fixture, api: AnophelesXpehhAnalysis):
98124

99125
@parametrize_with_cases("fixture,api", cases=".")
100126
def test_xpehh_gwss_no_window(fixture, api: AnophelesXpehhAnalysis):
101-
# Set up test parameters.
102-
contig = str(np.random.choice(api.contigs))
103-
all_sample_sets = api.sample_sets()["sample_set"].to_list()
104-
sample_set = str(np.random.choice(all_sample_sets))
105-
df_samples = api.sample_metadata(sample_sets=sample_set)
106-
107-
if len(df_samples) < 2:
108-
pytest.skip("Not enough samples for two cohorts")
109-
110-
sample_ids = df_samples["sample_id"].to_list()
111-
mid = len(sample_ids) // 2
112-
cohort1_query = f"sample_id in {sample_ids[:mid]}"
113-
cohort2_query = f"sample_id in {sample_ids[mid:]}"
127+
contig, sample_set, cohort1_query, cohort2_query = _setup_cohorts(api)
114128

115129
# Run function under test with no windowing.
116130
x, xpehh = api.xpehh_gwss(
@@ -132,27 +146,19 @@ def test_xpehh_gwss_no_window(fixture, api: AnophelesXpehhAnalysis):
132146

133147
@parametrize_with_cases("fixture,api", cases=".")
134148
def test_plot_xpehh_gwss_track(fixture, api: AnophelesXpehhAnalysis):
135-
# Set up test parameters.
136-
contig = str(np.random.choice(api.contigs))
137-
all_sample_sets = api.sample_sets()["sample_set"].to_list()
138-
sample_set = str(np.random.choice(all_sample_sets))
139-
df_samples = api.sample_metadata(sample_sets=sample_set)
149+
contig, sample_set, cohort1_query, cohort2_query = _setup_cohorts(api)
140150

141-
if len(df_samples) < 2:
142-
pytest.skip("Not enough samples for two cohorts")
143-
144-
sample_ids = df_samples["sample_id"].to_list()
145-
mid = len(sample_ids) // 2
146-
cohort1_query = f"sample_id in {sample_ids[:mid]}"
147-
cohort2_query = f"sample_id in {sample_ids[mid:]}"
151+
window_size = _safe_window_size(
152+
api, contig, sample_set, cohort1_query, cohort2_query
153+
)
148154

149155
# Run function under test.
150156
fig = api.plot_xpehh_gwss_track(
151157
contig=contig,
152158
sample_sets=sample_set,
153159
cohort1_query=cohort1_query,
154160
cohort2_query=cohort2_query,
155-
window_size=5,
161+
window_size=window_size,
156162
min_cohort_size=2,
157163
show=False,
158164
)
@@ -163,27 +169,19 @@ def test_plot_xpehh_gwss_track(fixture, api: AnophelesXpehhAnalysis):
163169

164170
@parametrize_with_cases("fixture,api", cases=".")
165171
def test_plot_xpehh_gwss(fixture, api: AnophelesXpehhAnalysis):
166-
# Set up test parameters.
167-
contig = str(np.random.choice(api.contigs))
168-
all_sample_sets = api.sample_sets()["sample_set"].to_list()
169-
sample_set = str(np.random.choice(all_sample_sets))
170-
df_samples = api.sample_metadata(sample_sets=sample_set)
172+
contig, sample_set, cohort1_query, cohort2_query = _setup_cohorts(api)
171173

172-
if len(df_samples) < 2:
173-
pytest.skip("Not enough samples for two cohorts")
174-
175-
sample_ids = df_samples["sample_id"].to_list()
176-
mid = len(sample_ids) // 2
177-
cohort1_query = f"sample_id in {sample_ids[:mid]}"
178-
cohort2_query = f"sample_id in {sample_ids[mid:]}"
174+
window_size = _safe_window_size(
175+
api, contig, sample_set, cohort1_query, cohort2_query
176+
)
179177

180178
# Run function under test.
181179
fig = api.plot_xpehh_gwss(
182180
contig=contig,
183181
sample_sets=sample_set,
184182
cohort1_query=cohort1_query,
185183
cohort2_query=cohort2_query,
186-
window_size=5,
184+
window_size=window_size,
187185
min_cohort_size=2,
188186
show=False,
189187
)

0 commit comments

Comments
 (0)