1- import random
2-
31import numpy as np
42import pandas as pd
53import xarray as xr
@@ -95,10 +93,10 @@ def test_gene_cnv_frequencies_with_str_cohorts(
9593 api : AnophelesCnvFrequencyAnalysis ,
9694 cohorts ,
9795):
98- region = random .choice (api .contigs )
96+ region = str ( np . random .choice (api .contigs ) )
9997 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
100- sample_sets = random .choice (all_sample_sets )
101- min_cohort_size = random .randint (0 , 2 )
98+ sample_sets = str ( np . random .choice (all_sample_sets ) )
99+ min_cohort_size = int ( np . random .randint (0 , 3 ) )
102100
103101 # Set up call params.
104102 params = dict (
@@ -148,8 +146,8 @@ def test_gene_cnv_frequencies_with_min_cohort_size(
148146):
149147 # Pick test parameters at random.
150148 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
151- sample_sets = random .choice (all_sample_sets )
152- region = random .choice (api .contigs )
149+ sample_sets = str ( np . random .choice (all_sample_sets ) )
150+ region = str ( np . random .choice (api .contigs ) )
153151 cohorts = "admin1_year"
154152
155153 # Set up call params.
@@ -199,13 +197,13 @@ def test_gene_cnv_frequencies_with_str_cohorts_and_sample_query(
199197 # Pick test parameters at random.
200198 sample_sets = None
201199 min_cohort_size = 0
202- region = random .choice (api .contigs )
203- cohorts = random . choice (
204- ["admin1_year" , "admin1_month" , "admin2_year" , "admin2_month" ]
200+ region = str ( np . random .choice (api .contigs ) )
201+ cohorts = str (
202+ np . random . choice ( ["admin1_year" , "admin1_month" , "admin2_year" , "admin2_month" ])
205203 )
206204 df_samples = api .sample_metadata (sample_sets = sample_sets )
207205 countries = df_samples ["country" ].unique ()
208- country = random .choice (countries )
206+ country = str ( np . random .choice (countries ) )
209207 sample_query = f"country == '{ country } '"
210208
211209 # Figure out expected cohort labels.
@@ -247,13 +245,13 @@ def test_gene_cnv_frequencies_with_str_cohorts_and_sample_query_options(
247245 # Pick test parameters at random.
248246 sample_sets = None
249247 min_cohort_size = 0
250- region = random .choice (api .contigs )
251- cohorts = random . choice (
252- ["admin1_year" , "admin1_month" , "admin2_year" , "admin2_month" ]
248+ region = str ( np . random .choice (api .contigs ) )
249+ cohorts = str (
250+ np . random . choice ( ["admin1_year" , "admin1_month" , "admin2_year" , "admin2_month" ])
253251 )
254252 df_samples = api .sample_metadata (sample_sets = sample_sets )
255253 countries = df_samples ["country" ].unique ().tolist ()
256- countries_list = random .sample (countries , 2 )
254+ countries_list = np . random .choice (countries , size = 2 , replace = False ). tolist ( )
257255 sample_query_options = {
258256 "local_dict" : {
259257 "countries_list" : countries_list ,
@@ -303,8 +301,8 @@ def test_gene_cnv_frequencies_with_dict_cohorts(
303301):
304302 # Pick test parameters at random.
305303 sample_sets = None # all sample sets
306- min_cohort_size = random .randint (0 , 2 )
307- region = random .choice (api .contigs )
304+ min_cohort_size = int ( np . random .randint (0 , 3 ) )
305+ region = str ( np . random .choice (api .contigs ) )
308306
309307 # Create cohorts by country.
310308 df_samples = api .sample_metadata (sample_sets = sample_sets )
@@ -343,10 +341,10 @@ def test_gene_cnv_frequencies_without_drop_invariant(
343341):
344342 # Pick test parameters at random.
345343 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
346- sample_sets = random .choice (all_sample_sets )
347- min_cohort_size = random .randint (0 , 2 )
348- region = random .choice (api .contigs )
349- cohorts = random .choice (["admin1_year" , "admin2_month" , "country" ])
344+ sample_sets = str ( np . random .choice (all_sample_sets ) )
345+ min_cohort_size = int ( np . random .randint (0 , 3 ) )
346+ region = str ( np . random .choice (api .contigs ) )
347+ cohorts = str ( np . random .choice (["admin1_year" , "admin2_month" , "country" ]) )
350348
351349 # Figure out expected cohort labels.
352350 df_samples = api .sample_metadata (sample_sets = sample_sets )
@@ -398,9 +396,9 @@ def test_gene_cnv_frequencies_with_bad_region(
398396):
399397 # Pick test parameters at random.
400398 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
401- sample_sets = random .choice (all_sample_sets )
402- min_cohort_size = random .randint (0 , 2 )
403- cohorts = random .choice (["admin1_year" , "admin2_month" , "country" ])
399+ sample_sets = str ( np . random .choice (all_sample_sets ) )
400+ min_cohort_size = int ( np . random .randint (0 , 3 ) )
401+ cohorts = str ( np . random .choice (["admin1_year" , "admin2_month" , "country" ]) )
404402
405403 # Set up call params.
406404 params = dict (
@@ -424,9 +422,9 @@ def test_gene_cnv_frequencies_with_max_coverage_variance(
424422 max_coverage_variance ,
425423):
426424 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
427- sample_sets = random .choice (all_sample_sets )
428- cohorts = random .choice (["admin1_year" , "admin2_month" , "country" ])
429- region = random .choice (api .contigs )
425+ sample_sets = str ( np . random .choice (all_sample_sets ) )
426+ cohorts = str ( np . random .choice (["admin1_year" , "admin2_month" , "country" ]) )
427+ region = str ( np . random .choice (api .contigs ) )
430428
431429 params = dict (
432430 region = region ,
@@ -503,7 +501,7 @@ def test_gene_cnv_frequencies_advanced_with_sample_query(
503501 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
504502 df_samples = api .sample_metadata (sample_sets = all_sample_sets )
505503 countries = df_samples ["country" ].unique ()
506- country = random .choice (countries )
504+ country = str ( np . random .choice (countries ) )
507505 sample_query = f"country == '{ country } '"
508506
509507 check_gene_cnv_frequencies_advanced (
@@ -522,7 +520,7 @@ def test_gene_cnv_frequencies_advanced_with_sample_query_options(
522520 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
523521 df_samples = api .sample_metadata (sample_sets = all_sample_sets )
524522 countries = df_samples ["country" ].unique ().tolist ()
525- countries_list = random .sample (countries , 2 )
523+ countries_list = np . random .choice (countries , size = 2 , replace = False ). tolist ( )
526524 sample_query_options = {
527525 "local_dict" : {
528526 "countries_list" : countries_list ,
@@ -549,7 +547,7 @@ def test_gene_cnv_frequencies_advanced_with_min_cohort_size(
549547 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
550548 area_by = "admin1_iso"
551549 period_by = "year"
552- region = random .choice (api .contigs )
550+ region = str ( np . random .choice (api .contigs ) )
553551
554552 if min_cohort_size <= 10 :
555553 # Expect this to find at least one cohort, so go ahead with full
@@ -585,7 +583,7 @@ def test_gene_cnv_frequencies_advanced_with_max_coverage_variance(
585583 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
586584 area_by = "admin1_iso"
587585 period_by = "year"
588- region = random .choice (api .contigs )
586+ region = str ( np . random .choice (api .contigs ) )
589587
590588 if max_coverage_variance >= 0.4 :
591589 # Expect this to find at least one cohort, so go ahead with full
@@ -620,7 +618,7 @@ def test_gene_cnv_frequencies_advanced_with_nobs_mode(
620618 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
621619 area_by = "admin1_iso"
622620 period_by = "year"
623- region = random .choice (api .contigs )
621+ region = str ( np . random .choice (api .contigs ) )
624622
625623 check_gene_cnv_frequencies_advanced (
626624 api = api ,
@@ -642,7 +640,7 @@ def test_gene_cnv_frequencies_advanced_with_variant_query(
642640 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
643641 area_by = "admin1_iso"
644642 period_by = "year"
645- region = random .choice (api .contigs )
643+ region = str ( np . random .choice (api .contigs ) )
646644 variant_query = f"cnv_type == '{ variant_query_option } '"
647645
648646 check_gene_cnv_frequencies_advanced (
@@ -710,16 +708,16 @@ def check_gene_cnv_frequencies_advanced(
710708):
711709 # Pick test parameters at random.
712710 if region is None :
713- region = random .choice (api .contigs )
711+ region = str ( np . random .choice (api .contigs ) )
714712 if area_by is None :
715- area_by = random .choice (["country" , "admin1_iso" , "admin2_name" ])
713+ area_by = str ( np . random .choice (["country" , "admin1_iso" , "admin2_name" ]) )
716714 if period_by is None :
717- period_by = random .choice (["year" , "quarter" , "month" , "random_year" ])
715+ period_by = str ( np . random .choice (["year" , "quarter" , "month" , "random_year" ]) )
718716 if sample_sets is None :
719717 all_sample_sets = api .sample_sets ()["sample_set" ].to_list ()
720- sample_sets = random .choice (all_sample_sets )
718+ sample_sets = str ( np . random .choice (all_sample_sets ) )
721719 if min_cohort_size is None :
722- min_cohort_size = random .randint (0 , 2 )
720+ min_cohort_size = int ( np . random .randint (0 , 3 ) )
723721
724722 if period_by == "random_year" :
725723 # Add a random_year column to the sample metadata, if there isn't already.
0 commit comments