Skip to content

Commit 303b9f4

Browse files
Merge branch 'master' into test-veff-1138
2 parents 1e4a40a + 97a16e1 commit 303b9f4

9 files changed

Lines changed: 44 additions & 59 deletions

File tree

malariagen_data/adar1.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ def __init__(
105105
cohorts_analysis=cohorts_analysis,
106106
aim_analysis=None,
107107
aim_metadata_dtype=None,
108-
aim_ids=None,
109-
aim_palettes=None,
110108
site_filters_analysis=site_filters_analysis,
111109
discordant_read_calls_analysis=discordant_read_calls_analysis,
112110
default_site_mask="darlingi",
@@ -123,14 +121,10 @@ def __init__(
123121
gcs_region_urls=GCS_REGION_URLS,
124122
major_version_number=MAJOR_VERSION_NUMBER,
125123
major_version_path=MAJOR_VERSION_PATH,
126-
gff_gene_type="gene",
127-
gff_gene_name_attribute="Note",
128-
gff_default_attributes=("ID", "Parent", "Note", "description"),
129124
storage_options=storage_options,
130125
tqdm_class=tqdm_class,
131126
taxon_colors=TAXON_COLORS,
132127
virtual_contigs=None,
133-
gene_names=None,
134128
inversion_tag_path=None,
135129
unrestricted_use_only=unrestricted_use_only,
136130
surveillance_use_only=surveillance_use_only,

malariagen_data/adir1.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ def __init__(
105105
cohorts_analysis=cohorts_analysis,
106106
aim_analysis=None,
107107
aim_metadata_dtype=None,
108-
aim_ids=None,
109-
aim_palettes=None,
110108
site_filters_analysis=site_filters_analysis,
111109
discordant_read_calls_analysis=discordant_read_calls_analysis,
112110
default_site_mask="dirus",
@@ -123,14 +121,10 @@ def __init__(
123121
gcs_region_urls=GCS_REGION_URLS,
124122
major_version_number=MAJOR_VERSION_NUMBER,
125123
major_version_path=MAJOR_VERSION_PATH,
126-
gff_gene_type="protein_coding_gene",
127-
gff_gene_name_attribute="Note",
128-
gff_default_attributes=("ID", "Parent", "Note", "description"),
129124
storage_options=storage_options,
130125
tqdm_class=tqdm_class,
131126
taxon_colors=TAXON_COLORS,
132127
virtual_contigs=None,
133-
gene_names=None,
134128
inversion_tag_path=None,
135129
unrestricted_use_only=unrestricted_use_only,
136130
surveillance_use_only=surveillance_use_only,

malariagen_data/af1.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,6 @@ def __init__(
107107
cohorts_analysis=cohorts_analysis,
108108
aim_analysis=None,
109109
aim_metadata_dtype=None,
110-
aim_ids=None,
111-
aim_palettes=None,
112110
site_filters_analysis=site_filters_analysis,
113111
discordant_read_calls_analysis=discordant_read_calls_analysis,
114112
default_site_mask="funestus",
@@ -125,14 +123,10 @@ def __init__(
125123
gcs_region_urls=GCS_REGION_URLS,
126124
major_version_number=MAJOR_VERSION_NUMBER,
127125
major_version_path=MAJOR_VERSION_PATH,
128-
gff_gene_type="protein_coding_gene",
129-
gff_gene_name_attribute="Note",
130-
gff_default_attributes=("ID", "Parent", "Note", "description"),
131126
storage_options=storage_options,
132127
tqdm_class=tqdm_class,
133128
taxon_colors=TAXON_COLORS,
134129
virtual_contigs=None,
135-
gene_names=None,
136130
inversion_tag_path=None,
137131
unrestricted_use_only=unrestricted_use_only,
138132
surveillance_use_only=surveillance_use_only,

malariagen_data/ag3.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,6 @@ def __init__(
186186
cohorts_analysis=cohorts_analysis,
187187
aim_analysis=aim_analysis,
188188
aim_metadata_dtype=AIM_METADATA_DTYPE,
189-
aim_ids=("gambcolu_vs_arab", "gamb_vs_colu"),
190-
aim_palettes=AIM_PALETTES,
191189
site_filters_analysis=site_filters_analysis,
192190
discordant_read_calls_analysis=discordant_read_calls_analysis,
193191
default_site_mask="gamb_colu_arab",
@@ -204,15 +202,11 @@ def __init__(
204202
gcs_region_urls=GCS_REGION_URLS,
205203
major_version_number=MAJOR_VERSION_NUMBER,
206204
major_version_path=MAJOR_VERSION_PATH,
207-
gff_gene_type="gene",
208-
gff_gene_name_attribute="Name",
209-
gff_default_attributes=("ID", "Parent", "Name", "description"),
210205
storage_options=storage_options,
211206
tqdm_class=tqdm_class,
212207
taxon_colors=TAXON_COLORS,
213208
aim_species_colors=AIM_SPECIES_COLORS,
214209
virtual_contigs=VIRTUAL_CONTIGS,
215-
gene_names=GENE_NAMES,
216210
inversion_tag_path=INVERSION_TAG_PATH,
217211
unrestricted_use_only=unrestricted_use_only,
218212
surveillance_use_only=surveillance_use_only,

malariagen_data/amin1.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ def __init__(
105105
cohorts_analysis=cohorts_analysis,
106106
aim_analysis=None,
107107
aim_metadata_dtype=None,
108-
aim_ids=None,
109-
aim_palettes=None,
110108
site_filters_analysis=site_filters_analysis,
111109
discordant_read_calls_analysis=discordant_read_calls_analysis,
112110
default_site_mask="minimus",
@@ -123,14 +121,10 @@ def __init__(
123121
gcs_region_urls=GCS_REGION_URLS,
124122
major_version_number=MAJOR_VERSION_NUMBER,
125123
major_version_path=MAJOR_VERSION_PATH,
126-
gff_gene_type="protein_coding_gene",
127-
gff_gene_name_attribute="Note",
128-
gff_default_attributes=("ID", "Parent", "Note", "description"),
129124
storage_options=storage_options, # used by fsspec via init_filesystem()
130125
tqdm_class=tqdm_class,
131126
taxon_colors=TAXON_COLORS,
132127
virtual_contigs=None,
133-
gene_names=None,
134128
inversion_tag_path=None,
135129
unrestricted_use_only=unrestricted_use_only,
136130
surveillance_use_only=surveillance_use_only,

malariagen_data/anoph/aim_data.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,24 @@ def __init__(
4040
# to the superclass constructor.
4141
super().__init__(**kwargs)
4242

43-
# Store possible values for the `aims` parameter.
44-
# TODO Consider moving this to data resource configuration.
45-
self._aim_ids = aim_ids
46-
self._aim_palettes = aim_palettes
43+
# Read AIM parameters from the JSON config, falling back to
44+
# constructor args for backward compatibility.
45+
config = self.config
46+
_aim_ids = config.get("AIM_IDS", None)
47+
if _aim_ids is not None:
48+
self._aim_ids: Optional[aim_params.aim_ids] = tuple(_aim_ids)
49+
else:
50+
self._aim_ids = aim_ids
51+
52+
_aim_palettes = config.get("AIM_PALETTES", None)
53+
if _aim_palettes is not None:
54+
# Convert lists to tuples for each palette entry.
55+
self._aim_palettes: Optional[aim_params.aim_palettes] = {
56+
k: tuple(v)
57+
for k, v in _aim_palettes.items() # type: ignore
58+
}
59+
else:
60+
self._aim_palettes = aim_palettes
4761

4862
# Set up caches.
4963
self._cache_aim_variants: Dict[str, xr.Dataset] = dict()

malariagen_data/anoph/genome_features.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ class AnophelesGenomeFeaturesData(AnophelesGenomeSequenceData):
2323
def __init__(
2424
self,
2525
*,
26-
gff_gene_type: str,
27-
gff_gene_name_attribute: str,
28-
gff_default_attributes: Tuple[str, ...],
26+
gff_gene_type: Optional[str] = None,
27+
gff_gene_name_attribute: Optional[str] = None,
28+
gff_default_attributes: Optional[Tuple[str, ...]] = None,
2929
gene_names: Optional[Mapping[str, str]] = None,
3030
**kwargs,
3131
):
@@ -34,16 +34,30 @@ def __init__(
3434
# to the superclass constructor.
3535
super().__init__(**kwargs)
3636

37-
# TODO Consider moving these parameters to configuration, as they could
38-
# change if the GFF ever changed.
39-
self._gff_gene_type = gff_gene_type
40-
self._gff_gene_name_attribute = gff_gene_name_attribute
41-
self._gff_default_attributes = gff_default_attributes
37+
# Read GFF parameters from the JSON config, falling back to
38+
# constructor args for backward compatibility.
39+
config = self.config
40+
self._gff_gene_type = config.get("GFF_GENE_TYPE", gff_gene_type or "gene")
41+
self._gff_gene_name_attribute = config.get(
42+
"GFF_GENE_NAME_ATTRIBUTE", gff_gene_name_attribute or "Name"
43+
)
44+
_default_attrs = config.get("GFF_DEFAULT_ATTRIBUTES", None)
45+
if _default_attrs is not None:
46+
self._gff_default_attributes = tuple(_default_attrs)
47+
elif gff_default_attributes is not None:
48+
self._gff_default_attributes = gff_default_attributes
49+
else:
50+
self._gff_default_attributes = ("ID", "Parent", "Name", "description")
4251

4352
# Allow manual override of gene names.
44-
if gene_names is None:
45-
gene_names = dict()
46-
self._gene_name_overrides = gene_names
53+
# Read from config if available, falling back to constructor arg.
54+
_gene_names = config.get("GENE_NAMES", None)
55+
if _gene_names is not None:
56+
self._gene_name_overrides = _gene_names
57+
elif gene_names is not None:
58+
self._gene_name_overrides = gene_names
59+
else:
60+
self._gene_name_overrides = dict()
4761

4862
# Setup caches.
4963
self._cache_genome_features: Dict[Tuple[str, ...], pd.DataFrame] = dict()

malariagen_data/anopheles.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515

1616
from .anoph import (
17-
aim_params,
1817
base_params,
1918
dash_params,
2019
gplt_params,
@@ -110,8 +109,6 @@ def __init__(
110109
cohorts_analysis: Optional[str],
111110
aim_analysis: Optional[str],
112111
aim_metadata_dtype: Optional[Mapping[str, Any]],
113-
aim_ids: Optional[aim_params.aim_ids],
114-
aim_palettes: Optional[aim_params.aim_palettes],
115112
site_filters_analysis: Optional[str],
116113
discordant_read_calls_analysis: Optional[str],
117114
default_site_mask: Optional[str],
@@ -128,15 +125,11 @@ def __init__(
128125
gcs_region_urls: Mapping[str, str],
129126
major_version_number: int,
130127
major_version_path: str,
131-
gff_gene_type: str,
132-
gff_gene_name_attribute: str,
133-
gff_default_attributes: Tuple[str, ...],
134128
tqdm_class,
135129
storage_options: Mapping,
136130
taxon_colors: Optional[Mapping[str, str]] = None,
137131
aim_species_colors: Optional[Mapping[str, str]] = None,
138132
virtual_contigs: Optional[Mapping[str, Sequence[str]]] = None,
139-
gene_names: Optional[Mapping[str, str]] = None,
140133
inversion_tag_path: Optional[str] = None,
141134
unrestricted_use_only: Optional[bool] = None,
142135
surveillance_use_only: Optional[bool] = None,
@@ -156,14 +149,9 @@ def __init__(
156149
major_version_number=major_version_number,
157150
major_version_path=major_version_path,
158151
storage_options=storage_options,
159-
gff_gene_type=gff_gene_type,
160-
gff_gene_name_attribute=gff_gene_name_attribute,
161-
gff_default_attributes=gff_default_attributes,
162152
cohorts_analysis=cohorts_analysis,
163153
aim_analysis=aim_analysis,
164154
aim_metadata_dtype=aim_metadata_dtype,
165-
aim_ids=aim_ids,
166-
aim_palettes=aim_palettes,
167155
site_filters_analysis=site_filters_analysis,
168156
discordant_read_calls_analysis=discordant_read_calls_analysis,
169157
default_site_mask=default_site_mask,
@@ -174,7 +162,6 @@ def __init__(
174162
taxon_colors=taxon_colors,
175163
aim_species_colors=aim_species_colors,
176164
virtual_contigs=virtual_contigs,
177-
gene_names=gene_names,
178165
inversion_tag_path=inversion_tag_path,
179166
unrestricted_use_only=unrestricted_use_only,
180167
surveillance_use_only=surveillance_use_only,

malariagen_data/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ def _dask_compress_dataarray(a, indexer, indexer_computed, dim):
361361

362362

363363
def _da_compress(
364-
indexer: da.Array | np.ndarray,
364+
indexer: Union[da.Array, np.ndarray],
365365
data: da.Array,
366366
axis: int,
367367
indexer_computed: Optional[np.ndarray] = None,

0 commit comments

Comments
 (0)