Skip to content

Commit d9ba5eb

Browse files
committed
Add validate_sample_selection_params to funcs with sample_query, sample_indices. Add sample_query_options to biallelic_snps_to_plink.
1 parent 1177e63 commit d9ba5eb

4 files changed

Lines changed: 44 additions & 2 deletions

File tree

malariagen_data/anoph/distance.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ def biallelic_diplotype_pairwise_distances(
115115
# invalidate any previously cached data.
116116
name = "biallelic_diplotype_pairwise_distances"
117117

118+
# Check that either sample_query xor sample_indices are provided.
119+
base_params.validate_sample_selection_params(
120+
sample_query=sample_query, sample_indices=sample_indices
121+
)
122+
118123
## Normalize params for consistent hash value.
119124

120125
# Note: `_prep_sample_selection_cache_params` converts `sample_query` and `sample_query_options` into `sample_indices`.
@@ -272,6 +277,11 @@ def njt(
272277
# invalidate any previously cached data.
273278
name = "njt_v1"
274279

280+
# Check that either sample_query xor sample_indices are provided.
281+
base_params.validate_sample_selection_params(
282+
sample_query=sample_query, sample_indices=sample_indices
283+
)
284+
275285
## Normalize params for consistent hash value.
276286

277287
# Note: `_prep_sample_selection_cache_params` converts `sample_query` and `sample_query_options` into `sample_indices`.
@@ -458,6 +468,11 @@ def plot_njt(
458468
inline_array: base_params.inline_array = base_params.inline_array_default,
459469
chunks: base_params.chunks = base_params.native_chunks,
460470
) -> plotly_params.figure:
471+
# Check that either sample_query xor sample_indices are provided.
472+
base_params.validate_sample_selection_params(
473+
sample_query=sample_query, sample_indices=sample_indices
474+
)
475+
461476
# Only import anjl if needed, as it requires a couple of seconds to compile
462477
# functions.
463478
import anjl # type: ignore

malariagen_data/anoph/pca.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ def pca(
8282
# invalidate any previously cached data.
8383
name = "pca_v5"
8484

85+
# Check that either sample_query xor sample_indices are provided.
86+
base_params.validate_sample_selection_params(
87+
sample_query=sample_query, sample_indices=sample_indices
88+
)
89+
8590
## Normalize params for consistent hash value.
8691

8792
# Note: `_prep_sample_selection_cache_params` converts `sample_query` and `sample_query_options` into `sample_indices`.

malariagen_data/anoph/snp_data.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def snp_genotypes(
456456
inline_array: base_params.inline_array = base_params.inline_array_default,
457457
chunks: base_params.chunks = base_params.native_chunks,
458458
) -> da.Array:
459-
# Additional parameter checks.
459+
# Check that either sample_query xor sample_indices are provided.
460460
base_params.validate_sample_selection_params(
461461
sample_query=sample_query, sample_indices=sample_indices
462462
)
@@ -1001,7 +1001,7 @@ def snp_calls(
10011001
max_cohort_size: Optional[base_params.max_cohort_size] = None,
10021002
random_seed: base_params.random_seed = 42,
10031003
) -> xr.Dataset:
1004-
# Additional parameter checks.
1004+
# Check that either sample_query xor sample_indices are provided.
10051005
base_params.validate_sample_selection_params(
10061006
sample_query=sample_query, sample_indices=sample_indices
10071007
)
@@ -1302,6 +1302,11 @@ def snp_allele_counts(
13021302
# to invalidate any previously cached data.
13031303
name = "snp_allele_counts_v2"
13041304

1305+
# Check that either sample_query xor sample_indices are provided.
1306+
base_params.validate_sample_selection_params(
1307+
sample_query=sample_query, sample_indices=sample_indices
1308+
)
1309+
13051310
## Normalize params for consistent hash value.
13061311

13071312
# Note: `_prep_sample_selection_cache_params` converts `sample_query` and `sample_query_options` into `sample_indices`.
@@ -1688,6 +1693,11 @@ def biallelic_snp_calls(
16881693
n_snps: Optional[base_params.n_snps] = None,
16891694
thin_offset: base_params.thin_offset = 0,
16901695
) -> xr.Dataset:
1696+
# Check that either sample_query xor sample_indices are provided.
1697+
base_params.validate_sample_selection_params(
1698+
sample_query=sample_query, sample_indices=sample_indices
1699+
)
1700+
16911701
# Perform an allele count.
16921702
ac = self.snp_allele_counts(
16931703
region=region,
@@ -1849,6 +1859,11 @@ def biallelic_diplotypes(
18491859
# invalidate any previously cached data.
18501860
name = "biallelic_diplotypes"
18511861

1862+
# Check that either sample_query xor sample_indices are provided.
1863+
base_params.validate_sample_selection_params(
1864+
sample_query=sample_query, sample_indices=sample_indices
1865+
)
1866+
18521867
## Normalize params for consistent hash value.
18531868

18541869
# Note: `_prep_sample_selection_cache_params` converts `sample_query` and `sample_query_options` into `sample_indices`.

malariagen_data/anoph/to_plink.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def biallelic_snps_to_plink(
5757
thin_offset: base_params.thin_offset = 0,
5858
sample_sets: Optional[base_params.sample_sets] = None,
5959
sample_query: Optional[base_params.sample_query] = None,
60+
sample_query_options: Optional[base_params.sample_query_options] = None,
6061
sample_indices: Optional[base_params.sample_indices] = None,
6162
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
6263
min_minor_ac: Optional[
@@ -69,6 +70,11 @@ def biallelic_snps_to_plink(
6970
inline_array: base_params.inline_array = base_params.inline_array_default,
7071
chunks: base_params.chunks = base_params.native_chunks,
7172
):
73+
# Check that either sample_query xor sample_indices are provided.
74+
base_params.validate_sample_selection_params(
75+
sample_query=sample_query, sample_indices=sample_indices
76+
)
77+
7278
# Define output files
7379
plink_file_path = f"{output_dir}/{region}.{n_snps}.{min_minor_ac}.{max_missing_an}.{thin_offset}"
7480

@@ -84,6 +90,7 @@ def biallelic_snps_to_plink(
8490
region=region,
8591
sample_sets=sample_sets,
8692
sample_query=sample_query,
93+
sample_query_options=sample_query_options,
8794
sample_indices=sample_indices,
8895
site_mask=site_mask,
8996
min_minor_ac=min_minor_ac,

0 commit comments

Comments
 (0)