Skip to content

Commit 4044a2f

Browse files
authored
Merge branch 'master' into fix/cnv-discordant-read-calls-error
2 parents 88b4850 + 73fdafe commit 4044a2f

14 files changed

Lines changed: 331 additions & 86 deletions

File tree

.github/workflows/latest_docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
run: poetry run sphinx-build -b html docs/source docs/build/html
3131

3232
- name: Deploy HTML to GitHub Pages 🚀
33-
uses: peaceiris/actions-gh-pages@v3.9.3
33+
uses: peaceiris/actions-gh-pages@v4
3434
with:
3535
publish_branch: gh-pages
3636
github_token: ${{ secrets.GITHUB_TOKEN }}

CONTRIBUTING.md

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -200,15 +200,6 @@ poetry run pytest -v tests --typeguard-packages=malariagen_data,malariagen_data.
200200
- Address review feedback by pushing new commits to your branch
201201
- Once approved, a maintainer will merge your PR
202202

203-
## AI-assisted contributions
204-
205-
We welcome contributions that involve AI tools (like GitHub Copilot, ChatGPT, or similar). If you use AI assistance:
206-
207-
- Review and understand any AI-generated code before submitting
208-
- Ensure the code follows project conventions and passes all tests
209-
- You remain responsible for the quality and correctness of the contribution
210-
- Disclosure of AI usage is optional. Regardless of tools used, contributors remain responsible for the quality and correctness of their submissions.
211-
212203
## Communication
213204

214205
- **Issues**: Use [GitHub Issues](https://github.com/malariagen/malariagen-data-python/issues) for bug reports and feature requests

malariagen_data/ag3.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ def _setup_aim_palettes():
7474
"gcx4": TAXON_PALETTE[10],
7575
"unassigned": "black",
7676
}
77+
# Colors for aim_species column, matching the AIM palettes.
78+
AIM_SPECIES_COLORS = {
79+
"gambiae": AIM_PALETTES["gamb_vs_colu"][1],
80+
"coluzzii": AIM_PALETTES["gamb_vs_colu"][3],
81+
"arabiensis": AIM_PALETTES["gambcolu_vs_arab"][3],
82+
"gambcolu": AIM_PALETTES["gambcolu_vs_arab"][1],
83+
"unassigned": "black",
84+
}
7785

7886
# Note: These column names will be treated as case-insensitive,
7987
# because these column names and the column names from the CSV
@@ -197,6 +205,7 @@ def __init__(
197205
storage_options=storage_options,
198206
tqdm_class=tqdm_class,
199207
taxon_colors=TAXON_COLORS,
208+
aim_species_colors=AIM_SPECIES_COLORS,
200209
virtual_contigs=VIRTUAL_CONTIGS,
201210
gene_names=GENE_NAMES,
202211
inversion_tag_path=INVERSION_TAG_PATH,

malariagen_data/anoph/aim_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ def plot_aim_heatmap(
208208
show: plotly_params.show = True,
209209
renderer: plotly_params.renderer = None,
210210
) -> plotly_params.figure:
211+
aims = self._prep_aims_param(aims=aims)
211212
# Load AIM calls.
212213
ds = self.aim_calls(
213214
aims=aims,

malariagen_data/anoph/describe.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import inspect
2+
from typing import Optional
3+
4+
import pandas as pd
5+
from numpydoc_decorator import doc # type: ignore
6+
7+
from .base import AnophelesBase
8+
9+
10+
class AnophelesDescribe(AnophelesBase):
11+
"""Mixin class providing API introspection and discovery functionality."""
12+
13+
@doc(
14+
summary="""
15+
List all available public API methods with their descriptions.
16+
""",
17+
returns="""
18+
A dataframe with one row per public method, containing the method
19+
name, a short summary description, and its category (data access,
20+
analysis, or plotting).
21+
""",
22+
parameters=dict(
23+
category="""
24+
Optional filter to show only methods of a given category.
25+
Supported values are "data", "analysis", "plot", or None to
26+
show all methods.
27+
""",
28+
),
29+
)
30+
def describe_api(
31+
self,
32+
category: Optional[str] = None,
33+
) -> pd.DataFrame:
34+
methods_info = []
35+
36+
# Walk through all public methods on this instance.
37+
for name in sorted(dir(self)):
38+
# Skip private/dunder methods.
39+
if name.startswith("_"):
40+
continue
41+
42+
attr = getattr(type(self), name, None)
43+
if attr is None:
44+
continue
45+
46+
# Only include callable methods and non-property attributes.
47+
if isinstance(attr, property):
48+
continue
49+
if not callable(attr):
50+
continue
51+
52+
# Extract the docstring summary.
53+
summary = self._extract_summary(attr)
54+
55+
# Determine category.
56+
method_category = self._categorize_method(name)
57+
58+
methods_info.append(
59+
{
60+
"method": name,
61+
"summary": summary,
62+
"category": method_category,
63+
}
64+
)
65+
66+
df = pd.DataFrame(methods_info)
67+
68+
# Apply category filter if specified.
69+
if category is not None:
70+
valid_categories = {"data", "analysis", "plot"}
71+
if category not in valid_categories:
72+
raise ValueError(
73+
f"Invalid category: {category!r}. "
74+
f"Must be one of {valid_categories}."
75+
)
76+
df = df[df["category"] == category].reset_index(drop=True)
77+
78+
return df
79+
80+
@staticmethod
81+
def _extract_summary(method) -> str:
82+
"""Extract the first line of the docstring as a summary."""
83+
docstring = inspect.getdoc(method)
84+
if not docstring:
85+
return ""
86+
# Take the first non-empty line as the summary.
87+
for line in docstring.strip().splitlines():
88+
line = line.strip()
89+
if line:
90+
return line
91+
return ""
92+
93+
@staticmethod
94+
def _categorize_method(name: str) -> str:
95+
"""Categorize a method based on its name."""
96+
if name.startswith("plot_"):
97+
return "plot"
98+
data_prefixes = (
99+
"sample_",
100+
"snp_",
101+
"hap_",
102+
"cnv_",
103+
"genome_",
104+
"open_",
105+
"lookup_",
106+
"read_",
107+
"general_",
108+
"sequence_",
109+
"cohorts_",
110+
"aim_",
111+
"gene_",
112+
)
113+
if name.startswith(data_prefixes):
114+
return "data"
115+
return "analysis"

malariagen_data/anoph/fst.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def plot_fst_gwss_track(
235235
height=height,
236236
toolbar_location="above",
237237
x_range=x_range,
238-
y_range=(0, 1),
238+
y_range=(clip_min, 1),
239239
output_backend=output_backend,
240240
)
241241

@@ -252,7 +252,7 @@ def plot_fst_gwss_track(
252252

253253
# tidy up the plot
254254
fig.yaxis.axis_label = "Fst"
255-
fig.yaxis.ticker = [0, 1]
255+
fig.yaxis.ticker = sorted(set([clip_min, 0, 1]))
256256
self._bokeh_style_genome_xaxis(fig, contig)
257257

258258
if show: # pragma: no cover
@@ -539,11 +539,10 @@ def plot_pairwise_average_fst(
539539
if annotation == "standard error":
540540
fig_df.loc[cohort1, cohort2] = se
541541
elif annotation == "Z score":
542-
try:
543-
zs = fst / se
544-
fig_df.loc[cohort1, cohort2] = zs
545-
except ZeroDivisionError:
542+
if se == 0:
546543
fig_df.loc[cohort1, cohort2] = np.nan
544+
else:
545+
fig_df.loc[cohort1, cohort2] = fst / se
547546
else:
548547
fig_df.loc[cohort1, cohort2] = fst
549548

malariagen_data/anoph/sample_metadata.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def __init__(
3333
aim_analysis: Optional[str] = None,
3434
aim_metadata_dtype: Optional[Mapping[str, Any]] = None,
3535
taxon_colors: Optional[Mapping[str, str]] = None,
36+
aim_species_colors: Optional[Mapping[str, str]] = None,
3637
**kwargs,
3738
):
3839
# N.B., this class is designed to work cooperatively, and
@@ -73,6 +74,8 @@ def __init__(
7374
# Set up taxon colors.
7475
self._taxon_colors = taxon_colors
7576

77+
self._aim_species_colors = aim_species_colors
78+
7679
# Set up extra metadata.
7780
self._extra_metadata: List = []
7881

@@ -1304,6 +1307,11 @@ def _setup_sample_colors_plotly(
13041307
# Special case, default taxon colors and order.
13051308
color_discrete_map = self._taxon_colors
13061309

1310+
# Special handling for aim_species colors.
1311+
if color == "aim_species" and color_discrete_map is None:
1312+
# Special case, default aim_species colors and order.
1313+
color_discrete_map = self._aim_species_colors
1314+
13071315
if isinstance(color, str):
13081316
if "cohort_" + color in data.columns:
13091317
# Convenience to allow things like "admin1_year" instead of "cohort_admin1_year".

malariagen_data/anoph/snp_data.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
from functools import lru_cache
23
from typing import Any, Dict, List, Optional, Tuple, Union
34

@@ -1253,6 +1254,12 @@ def _snp_calls(
12531254
if max_cohort_size is not None:
12541255
n_samples = ds.sizes["samples"]
12551256
if n_samples > max_cohort_size:
1257+
warnings.warn(
1258+
f"Cohort downsampled from {n_samples} to {max_cohort_size} "
1259+
"samples. Set max_cohort_size=None to disable downsampling.",
1260+
UserWarning,
1261+
stacklevel=2,
1262+
)
12561263
rng = np.random.default_rng(seed=random_seed)
12571264
loc_downsample = rng.choice(
12581265
n_samples, size=max_cohort_size, replace=False

malariagen_data/anopheles.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from .anoph.phenotypes import AnophelesPhenotypeData
4545
from .mjn import _median_joining_network, _mjn_graph
4646
from .anoph.hapclust import AnophelesHapClustAnalysis
47+
from .anoph.describe import AnophelesDescribe
4748
from .anoph.dipclust import AnophelesDipClustAnalysis
4849
from .util import (
4950
CacheMiss,
@@ -95,6 +96,7 @@ class AnophelesDataResource(
9596
AnophelesSampleMetadata,
9697
AnophelesGenomeFeaturesData,
9798
AnophelesGenomeSequenceData,
99+
AnophelesDescribe,
98100
AnophelesBase,
99101
AnophelesPhenotypeData,
100102
):
@@ -131,12 +133,13 @@ def __init__(
131133
gff_default_attributes: Tuple[str, ...],
132134
tqdm_class,
133135
storage_options: Mapping,
134-
taxon_colors: Optional[Mapping[str, str]],
135-
virtual_contigs: Optional[Mapping[str, Sequence[str]]],
136-
gene_names: Optional[Mapping[str, str]],
137-
inversion_tag_path: Optional[str],
138-
unrestricted_use_only: Optional[bool],
139-
surveillance_use_only: Optional[bool],
136+
taxon_colors: Optional[Mapping[str, str]] = None,
137+
aim_species_colors: Optional[Mapping[str, str]] = None,
138+
virtual_contigs: Optional[Mapping[str, Sequence[str]]] = None,
139+
gene_names: Optional[Mapping[str, str]] = None,
140+
inversion_tag_path: Optional[str] = None,
141+
unrestricted_use_only: Optional[bool] = None,
142+
surveillance_use_only: Optional[bool] = None,
140143
):
141144
super().__init__(
142145
url=url,
@@ -169,6 +172,7 @@ def __init__(
169172
results_cache=results_cache,
170173
tqdm_class=tqdm_class,
171174
taxon_colors=taxon_colors,
175+
aim_species_colors=aim_species_colors,
172176
virtual_contigs=virtual_contigs,
173177
gene_names=gene_names,
174178
inversion_tag_path=inversion_tag_path,

malariagen_data/mjn.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,6 @@ def _mjn_graph_edges(
281281
# add edge from final intermediate node to node j
282282
source = f"anon_{i}_{j}_{sep-2}"
283283
target = j
284-
graph_node = {
285-
"id": source,
286-
"count": 0,
287-
"width": anon_width,
288-
}
289-
graph_nodes.append(graph_node)
290284
graph_edge = {
291285
"id": f"edge_{i}_{j}_{sep-1}",
292286
"source": source,

0 commit comments

Comments
 (0)