Skip to content

Commit 94d4232

Browse files
authored
Merge branch 'master' into fix/mjn-duplicate-nodes
2 parents 947468f + d316582 commit 94d4232

6 files changed

Lines changed: 36 additions & 6 deletions

File tree

malariagen_data/ag3.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ def _setup_aim_palettes():
7474
"gcx4": TAXON_PALETTE[10],
7575
"unassigned": "black",
7676
}
77+
# Colors for aim_species column, matching the AIM palettes.
78+
AIM_SPECIES_COLORS = {
79+
"gambiae": AIM_PALETTES["gamb_vs_colu"][1],
80+
"coluzzii": AIM_PALETTES["gamb_vs_colu"][3],
81+
"arabiensis": AIM_PALETTES["gambcolu_vs_arab"][3],
82+
"gambcolu": AIM_PALETTES["gambcolu_vs_arab"][1],
83+
"unassigned": "black",
84+
}
7785

7886
# Note: These column names will be treated as case-insensitive,
7987
# because these column names and the column names from the CSV
@@ -197,6 +205,7 @@ def __init__(
197205
storage_options=storage_options,
198206
tqdm_class=tqdm_class,
199207
taxon_colors=TAXON_COLORS,
208+
aim_species_colors=AIM_SPECIES_COLORS,
200209
virtual_contigs=VIRTUAL_CONTIGS,
201210
gene_names=GENE_NAMES,
202211
inversion_tag_path=INVERSION_TAG_PATH,

malariagen_data/anoph/aim_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ def plot_aim_heatmap(
208208
show: plotly_params.show = True,
209209
renderer: plotly_params.renderer = None,
210210
) -> plotly_params.figure:
211+
aims = self._prep_aims_param(aims=aims)
211212
# Load AIM calls.
212213
ds = self.aim_calls(
213214
aims=aims,

malariagen_data/anoph/sample_metadata.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def __init__(
3333
aim_analysis: Optional[str] = None,
3434
aim_metadata_dtype: Optional[Mapping[str, Any]] = None,
3535
taxon_colors: Optional[Mapping[str, str]] = None,
36+
aim_species_colors: Optional[Mapping[str, str]] = None,
3637
**kwargs,
3738
):
3839
# N.B., this class is designed to work cooperatively, and
@@ -73,6 +74,8 @@ def __init__(
7374
# Set up taxon colors.
7475
self._taxon_colors = taxon_colors
7576

77+
self._aim_species_colors = aim_species_colors
78+
7679
# Set up extra metadata.
7780
self._extra_metadata: List = []
7881

@@ -1304,6 +1307,11 @@ def _setup_sample_colors_plotly(
13041307
# Special case, default taxon colors and order.
13051308
color_discrete_map = self._taxon_colors
13061309

1310+
# Special handling for aim_species colors.
1311+
if color == "aim_species" and color_discrete_map is None:
1312+
# Special case, default aim_species colors and order.
1313+
color_discrete_map = self._aim_species_colors
1314+
13071315
if isinstance(color, str):
13081316
if "cohort_" + color in data.columns:
13091317
# Convenience to allow things like "admin1_year" instead of "cohort_admin1_year".

malariagen_data/anoph/snp_data.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
from functools import lru_cache
23
from typing import Any, Dict, List, Optional, Tuple, Union
34

@@ -1253,6 +1254,12 @@ def _snp_calls(
12531254
if max_cohort_size is not None:
12541255
n_samples = ds.sizes["samples"]
12551256
if n_samples > max_cohort_size:
1257+
warnings.warn(
1258+
f"Cohort downsampled from {n_samples} to {max_cohort_size} "
1259+
"samples. Set max_cohort_size=None to disable downsampling.",
1260+
UserWarning,
1261+
stacklevel=2,
1262+
)
12561263
rng = np.random.default_rng(seed=random_seed)
12571264
loc_downsample = rng.choice(
12581265
n_samples, size=max_cohort_size, replace=False

malariagen_data/anopheles.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,13 @@ def __init__(
133133
gff_default_attributes: Tuple[str, ...],
134134
tqdm_class,
135135
storage_options: Mapping,
136-
taxon_colors: Optional[Mapping[str, str]],
137-
virtual_contigs: Optional[Mapping[str, Sequence[str]]],
138-
gene_names: Optional[Mapping[str, str]],
139-
inversion_tag_path: Optional[str],
140-
unrestricted_use_only: Optional[bool],
141-
surveillance_use_only: Optional[bool],
136+
taxon_colors: Optional[Mapping[str, str]] = None,
137+
aim_species_colors: Optional[Mapping[str, str]] = None,
138+
virtual_contigs: Optional[Mapping[str, Sequence[str]]] = None,
139+
gene_names: Optional[Mapping[str, str]] = None,
140+
inversion_tag_path: Optional[str] = None,
141+
unrestricted_use_only: Optional[bool] = None,
142+
surveillance_use_only: Optional[bool] = None,
142143
):
143144
super().__init__(
144145
url=url,
@@ -171,6 +172,7 @@ def __init__(
171172
results_cache=results_cache,
172173
tqdm_class=tqdm_class,
173174
taxon_colors=taxon_colors,
175+
aim_species_colors=aim_species_colors,
174176
virtual_contigs=virtual_contigs,
175177
gene_names=gene_names,
176178
inversion_tag_path=inversion_tag_path,

tests/anoph/test_dipclust.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ def random_transcripts_contig(*, api, contig, n):
1111
df_gff = api.genome_features(attributes=["ID", "Parent"])
1212
df_transcripts = df_gff.query(f"type == 'mRNA' and contig == '{contig}'")
1313
transcript_ids = df_transcripts["ID"].dropna().to_list()
14+
n = min(n, len(transcript_ids))
15+
if n == 0:
16+
pytest.skip(f"No mRNA transcripts found for contig '{contig}'")
1417
transcripts = random.sample(transcript_ids, n)
1518
return transcripts
1619

0 commit comments

Comments
 (0)