1+ import warnings
12from typing import Tuple , Optional
23
34import numpy as np
@@ -43,6 +44,8 @@ def _fst_gwss(
4344 inline_array ,
4445 chunks ,
4546 clip_min ,
47+ min_snps_threshold ,
48+ window_adjustment_factor ,
4649 ):
4750 # Compute allele counts.
4851 ac1 = self .snp_allele_counts (
@@ -81,6 +84,24 @@ def _fst_gwss(
8184 chunks = chunks ,
8285 ).compute ()
8386
87+ n_snps = len (pos )
88+ if n_snps < min_snps_threshold :
89+ raise ValueError (
90+ f"Too few SNP sites ({ n_snps } ) available for Fst GWSS. "
91+ f"At least { min_snps_threshold } sites are required. "
92+ "Try a larger genomic region or different site selection criteria."
93+ )
94+ if window_size >= n_snps :
95+ adjusted_window_size = max (1 , n_snps // window_adjustment_factor )
96+ warnings .warn (
97+ f"window_size ({ window_size } ) is >= the number of SNP sites "
98+ f"available ({ n_snps } ); automatically adjusting window_size to "
99+ f"{ adjusted_window_size } (= { n_snps } // { window_adjustment_factor } )." ,
100+ UserWarning ,
101+ stacklevel = 2 ,
102+ )
103+ window_size = adjusted_window_size
104+
84105 with self ._spinner (desc = "Compute Fst" ):
85106 with np .errstate (divide = "ignore" , invalid = "ignore" ):
86107 fst = allel .moving_hudson_fst (ac1 , ac2 , size = window_size )
@@ -96,8 +117,23 @@ def _fst_gwss(
96117 @doc (
97118 summary = """
98119 Run a Fst genome-wide scan to investigate genetic differentiation
99- between two cohorts.
120+ between two cohorts. If window_size is >= the number of available
121+ SNP sites, a UserWarning is issued and window_size is automatically
122+ adjusted to number_of_snps // window_adjustment_factor. A ValueError
123+ is raised if the number of available SNP sites is below
124+ min_snps_threshold.
100125 """ ,
126+ parameters = dict (
127+ min_snps_threshold = """
128+ Minimum number of SNP sites required. If fewer sites are
129+ available a ValueError is raised.
130+ """ ,
131+ window_adjustment_factor = """
132+ If window_size is >= the number of available SNP sites,
133+ window_size is automatically set to
134+ number_of_snps // window_adjustment_factor.
135+ """ ,
136+ ),
101137 returns = dict (
102138 x = "An array containing the window centre point genomic positions" ,
103139 fst = "An array with Fst statistic values for each window." ,
@@ -123,6 +159,8 @@ def fst_gwss(
123159 inline_array : base_params .inline_array = base_params .inline_array_default ,
124160 chunks : base_params .chunks = base_params .native_chunks ,
125161 clip_min : fst_params .clip_min = 0.0 ,
162+ min_snps_threshold : fst_params .min_snps_threshold = 1000 ,
163+ window_adjustment_factor : fst_params .window_adjustment_factor = 10 ,
126164 ) -> Tuple [np .ndarray , np .ndarray ]:
127165 # Change this name if you ever change the behaviour of this function, to
128166 # invalidate any previously cached data.
@@ -147,7 +185,13 @@ def fst_gwss(
147185 results = self .results_cache_get (name = name , params = params )
148186
149187 except CacheMiss :
150- results = self ._fst_gwss (** params , inline_array = inline_array , chunks = chunks )
188+ results = self ._fst_gwss (
189+ ** params ,
190+ inline_array = inline_array ,
191+ chunks = chunks ,
192+ min_snps_threshold = min_snps_threshold ,
193+ window_adjustment_factor = window_adjustment_factor ,
194+ )
151195 self .results_cache_set (name = name , params = params , results = results )
152196
153197 x = results ["x" ]
0 commit comments