Skip to content

Commit a168e03

Browse files
committed
Making progress through alimanfoo's comments
1 parent 304eff0 commit a168e03

4 files changed

Lines changed: 35 additions & 37 deletions

File tree

malariagen_data/anoph/hap_frq.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@ def __init__(
2525
# to the superclass constructor.
2626
super().__init__(**kwargs)
2727

28-
# Set up cache variables.
29-
self._cache_annotator = None
30-
3128
@check_types
3229
@doc(
3330
summary="""
@@ -46,37 +43,30 @@ def haplotypes_frequencies(
4643
region: base_params.region,
4744
cohorts: base_params.cohorts,
4845
sample_query: Optional[base_params.sample_query] = None,
46+
sample_query_options: Optional[base_params.sample_query_options] = None,
4947
min_cohort_size: base_params.min_cohort_size = 10,
5048
sample_sets: Optional[base_params.sample_sets] = None,
5149
chunks: base_params.chunks = base_params.native_chunks,
5250
inline_array: base_params.inline_array = base_params.inline_array_default,
5351
) -> pd.DataFrame:
5452
# Access sample metadata.
5553
df_samples = self.sample_metadata(
56-
sample_sets=sample_sets, sample_query=sample_query
54+
sample_sets=sample_sets,
55+
sample_query=sample_query,
56+
sample_query_options=sample_query_options,
5757
)
5858

5959
# Build cohort dictionary, maps cohort labels to boolean indexers.
60-
coh_dict = locate_cohorts(cohorts=cohorts, data=df_samples)
61-
62-
# Remove cohorts below minimum cohort size.
63-
coh_dict = {
64-
coh: loc_coh
65-
for coh, loc_coh in coh_dict.items()
66-
if np.count_nonzero(loc_coh) >= min_cohort_size
67-
}
68-
69-
# Early check for no cohorts.
70-
if len(coh_dict) == 0:
71-
raise ValueError(
72-
"No cohorts available for the given sample selection parameters and minimum cohort size."
73-
)
60+
coh_dict = locate_cohorts(
61+
cohorts=cohorts, data=df_samples, min_cohort_size=min_cohort_size
62+
)
7463

7564
# Access haplotypes.
7665
ds_haps = self.haplotypes(
7766
region=region,
7867
sample_sets=sample_sets,
7968
sample_query=sample_query,
69+
sample_query_options=sample_query_options,
8070
chunks=chunks,
8171
inline_array=inline_array,
8272
)
@@ -152,14 +142,17 @@ def haplotypes_frequencies_advanced(
152142
period_by: frq_params.period_by,
153143
sample_sets: Optional[base_params.sample_sets] = None,
154144
sample_query: Optional[base_params.sample_query] = None,
145+
sample_query_options: Optional[base_params.sample_query_options] = None,
155146
min_cohort_size: base_params.min_cohort_size = 10,
156147
ci_method: Optional[frq_params.ci_method] = frq_params.ci_method_default,
157148
chunks: base_params.chunks = base_params.native_chunks,
158149
inline_array: base_params.inline_array = base_params.inline_array_default,
159150
) -> xr.Dataset:
160151
# Load sample metadata.
161152
df_samples = self.sample_metadata(
162-
sample_sets=sample_sets, sample_query=sample_query
153+
sample_sets=sample_sets,
154+
sample_query=sample_query,
155+
sample_query_options=sample_query_options,
163156
)
164157

165158
# Prepare sample metadata for cohort grouping.
@@ -189,6 +182,7 @@ def haplotypes_frequencies_advanced(
189182
region=region,
190183
sample_sets=sample_sets,
191184
sample_query=sample_query,
185+
sample_query_options=sample_query_options,
192186
chunks=chunks,
193187
inline_array=inline_array,
194188
)

malariagen_data/anoph/sample_metadata.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1343,7 +1343,7 @@ def plot_sample_location_geo(
13431343
return fig
13441344

13451345

1346-
def locate_cohorts(*, cohorts, data):
1346+
def locate_cohorts(*, cohorts, data, min_cohort_size):
13471347
# Build cohort dictionary where key=cohort_id, value=loc_coh.
13481348
coh_dict = {}
13491349

@@ -1374,4 +1374,17 @@ def locate_cohorts(*, cohorts, data):
13741374
loc_coh = data[cohorts] == coh
13751375
coh_dict[coh] = loc_coh.values
13761376

1377+
# Remove cohorts below minimum cohort size.
1378+
coh_dict = {
1379+
coh: loc_coh
1380+
for coh, loc_coh in coh_dict.items()
1381+
if np.count_nonzero(loc_coh) >= min_cohort_size
1382+
}
1383+
1384+
# Early check for no cohorts.
1385+
if len(coh_dict) == 0:
1386+
raise ValueError(
1387+
"No cohorts available for the given sample selection parameters and minimum cohort size."
1388+
)
1389+
13771390
return coh_dict

malariagen_data/anoph/snp_frq.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -149,20 +149,9 @@ def snp_allele_frequencies(
149149
)
150150

151151
# Build cohort dictionary, maps cohort labels to boolean indexers.
152-
coh_dict = locate_cohorts(cohorts=cohorts, data=df_samples)
153-
154-
# Remove cohorts below minimum cohort size.
155-
coh_dict = {
156-
coh: loc_coh
157-
for coh, loc_coh in coh_dict.items()
158-
if np.count_nonzero(loc_coh) >= min_cohort_size
159-
}
160-
161-
# Early check for no cohorts.
162-
if len(coh_dict) == 0:
163-
raise ValueError(
164-
"No cohorts available for the given sample selection parameters and minimum cohort size."
165-
)
152+
coh_dict = locate_cohorts(
153+
cohorts=cohorts, data=df_samples, min_cohort_size=min_cohort_size
154+
)
166155

167156
# Access SNP data.
168157
ds_snp = self.snp_calls(

malariagen_data/anopheles.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from .anoph.cnv_data import AnophelesCnvData
4141
from .anoph.genome_features import AnophelesGenomeFeaturesData
4242
from .anoph.genome_sequence import AnophelesGenomeSequenceData
43-
from .anoph.hap_data import AnophelesHapData, hap_params
43+
from .anoph.hap_data import hap_params
4444
from .anoph.hap_frq import AnophelesHapFrequencyAnalysis
4545
from .anoph.igv import AnophelesIgv
4646
from .anoph.pca import AnophelesPca
@@ -106,7 +106,7 @@ class AnophelesDataResource(
106106
AnophelesAimData,
107107
AnophelesSnpData,
108108
AnophelesCnvData,
109-
AnophelesHapData,
109+
# AnophelesHapData,
110110
AnophelesSampleMetadata,
111111
AnophelesGenomeFeaturesData,
112112
AnophelesGenomeSequenceData,
@@ -1152,7 +1152,9 @@ def _gene_cnv_frequencies(
11521152
is_called = cn >= 0
11531153

11541154
debug("set up cohort dict")
1155-
coh_dict = locate_cohorts(cohorts=cohorts, data=df_samples)
1155+
coh_dict = locate_cohorts(
1156+
cohorts=cohorts, data=df_samples, min_cohort_size=min_cohort_size
1157+
)
11561158

11571159
debug("compute cohort frequencies")
11581160
freq_cols = dict()

0 commit comments

Comments
 (0)