@@ -61,30 +61,56 @@ def case_af1_sim(af1_sim_fixture, af1_sim_api):
6161 return af1_sim_fixture , af1_sim_api
6262
6363
64- @parametrize_with_cases ("fixture,api" , cases = "." )
65- def test_xpehh_gwss (fixture , api : AnophelesXpehhAnalysis ):
66- # Set up test parameters.
64+ def _setup_cohorts (api ):
65+ """Helper to set up contig, sample_set, and cohort queries."""
6766 contig = str (np .random .choice (api .contigs ))
6867 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
6968 sample_set = str (np .random .choice (all_sample_sets ))
7069 df_samples = api .sample_metadata (sample_sets = sample_set )
7170
72- # Need at least 2 samples for two cohorts.
7371 if len (df_samples ) < 2 :
7472 pytest .skip ("Not enough samples for two cohorts" )
7573
7674 sample_ids = df_samples ["sample_id" ].to_list ()
7775 mid = len (sample_ids ) // 2
7876 cohort1_query = f"sample_id in { sample_ids [:mid ]} "
7977 cohort2_query = f"sample_id in { sample_ids [mid :]} "
78+ return contig , sample_set , cohort1_query , cohort2_query
8079
81- # Run function under test.
80+
81+ def _safe_window_size (api , contig , sample_set , cohort1_query , cohort2_query ):
82+ """Determine a safe window_size by first running without windowing."""
83+ x_raw , _ = api .xpehh_gwss (
84+ contig = contig ,
85+ sample_sets = sample_set ,
86+ cohort1_query = cohort1_query ,
87+ cohort2_query = cohort2_query ,
88+ window_size = None ,
89+ min_cohort_size = 2 ,
90+ )
91+ n_variants = len (x_raw )
92+ if n_variants < 2 :
93+ pytest .skip (f"Only { n_variants } variants available, need at least 2" )
94+ # Use half the available variants, clamped to [2, n_variants].
95+ return max (2 , n_variants // 2 )
96+
97+
98+ @parametrize_with_cases ("fixture,api" , cases = "." )
99+ def test_xpehh_gwss (fixture , api : AnophelesXpehhAnalysis ):
100+ contig , sample_set , cohort1_query , cohort2_query = _setup_cohorts (api )
101+
102+ # Determine a safe window size from the actual data.
103+ window_size = _safe_window_size (
104+ api , contig , sample_set , cohort1_query , cohort2_query
105+ )
106+
107+ # Run function under test with windowing.
82108 x , xpehh = api .xpehh_gwss (
83109 contig = contig ,
84110 sample_sets = sample_set ,
85111 cohort1_query = cohort1_query ,
86112 cohort2_query = cohort2_query ,
87- window_size = 5 ,
113+ window_size = window_size ,
88114 min_cohort_size = 2 ,
89115 )
90116
@@ -98,19 +124,7 @@ def test_xpehh_gwss(fixture, api: AnophelesXpehhAnalysis):
98124
99125@parametrize_with_cases ("fixture,api" , cases = "." )
100126def test_xpehh_gwss_no_window (fixture , api : AnophelesXpehhAnalysis ):
101- # Set up test parameters.
102- contig = str (np .random .choice (api .contigs ))
103- all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
104- sample_set = str (np .random .choice (all_sample_sets ))
105- df_samples = api .sample_metadata (sample_sets = sample_set )
106-
107- if len (df_samples ) < 2 :
108- pytest .skip ("Not enough samples for two cohorts" )
109-
110- sample_ids = df_samples ["sample_id" ].to_list ()
111- mid = len (sample_ids ) // 2
112- cohort1_query = f"sample_id in { sample_ids [:mid ]} "
113- cohort2_query = f"sample_id in { sample_ids [mid :]} "
127+ contig , sample_set , cohort1_query , cohort2_query = _setup_cohorts (api )
114128
115129 # Run function under test with no windowing.
116130 x , xpehh = api .xpehh_gwss (
@@ -132,27 +146,19 @@ def test_xpehh_gwss_no_window(fixture, api: AnophelesXpehhAnalysis):
132146
133147@parametrize_with_cases ("fixture,api" , cases = "." )
134148def test_plot_xpehh_gwss_track (fixture , api : AnophelesXpehhAnalysis ):
135- # Set up test parameters.
136- contig = str (np .random .choice (api .contigs ))
137- all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
138- sample_set = str (np .random .choice (all_sample_sets ))
139- df_samples = api .sample_metadata (sample_sets = sample_set )
149+ contig , sample_set , cohort1_query , cohort2_query = _setup_cohorts (api )
140150
141- if len (df_samples ) < 2 :
142- pytest .skip ("Not enough samples for two cohorts" )
143-
144- sample_ids = df_samples ["sample_id" ].to_list ()
145- mid = len (sample_ids ) // 2
146- cohort1_query = f"sample_id in { sample_ids [:mid ]} "
147- cohort2_query = f"sample_id in { sample_ids [mid :]} "
151+ window_size = _safe_window_size (
152+ api , contig , sample_set , cohort1_query , cohort2_query
153+ )
148154
149155 # Run function under test.
150156 fig = api .plot_xpehh_gwss_track (
151157 contig = contig ,
152158 sample_sets = sample_set ,
153159 cohort1_query = cohort1_query ,
154160 cohort2_query = cohort2_query ,
155- window_size = 5 ,
161+ window_size = window_size ,
156162 min_cohort_size = 2 ,
157163 show = False ,
158164 )
@@ -163,27 +169,19 @@ def test_plot_xpehh_gwss_track(fixture, api: AnophelesXpehhAnalysis):
163169
164170@parametrize_with_cases ("fixture,api" , cases = "." )
165171def test_plot_xpehh_gwss (fixture , api : AnophelesXpehhAnalysis ):
166- # Set up test parameters.
167- contig = str (np .random .choice (api .contigs ))
168- all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
169- sample_set = str (np .random .choice (all_sample_sets ))
170- df_samples = api .sample_metadata (sample_sets = sample_set )
172+ contig , sample_set , cohort1_query , cohort2_query = _setup_cohorts (api )
171173
172- if len (df_samples ) < 2 :
173- pytest .skip ("Not enough samples for two cohorts" )
174-
175- sample_ids = df_samples ["sample_id" ].to_list ()
176- mid = len (sample_ids ) // 2
177- cohort1_query = f"sample_id in { sample_ids [:mid ]} "
178- cohort2_query = f"sample_id in { sample_ids [mid :]} "
174+ window_size = _safe_window_size (
175+ api , contig , sample_set , cohort1_query , cohort2_query
176+ )
179177
180178 # Run function under test.
181179 fig = api .plot_xpehh_gwss (
182180 contig = contig ,
183181 sample_sets = sample_set ,
184182 cohort1_query = cohort1_query ,
185183 cohort2_query = cohort2_query ,
186- window_size = 5 ,
184+ window_size = window_size ,
187185 min_cohort_size = 2 ,
188186 show = False ,
189187 )
0 commit comments