Skip to content

Commit 7354522

Browse files
authored
Merge branch 'master' into refactor/het-analysis
2 parents 0afc432 + 147329a commit 7354522

10 files changed

Lines changed: 347 additions & 88 deletions

File tree

malariagen_data/anoph/describe.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import inspect
2+
from typing import Optional
3+
4+
import pandas as pd
5+
from numpydoc_decorator import doc # type: ignore
6+
7+
from .base import AnophelesBase
8+
9+
10+
class AnophelesDescribe(AnophelesBase):
11+
"""Mixin class providing API introspection and discovery functionality."""
12+
13+
@doc(
14+
summary="""
15+
List all available public API methods with their descriptions.
16+
""",
17+
returns="""
18+
A dataframe with one row per public method, containing the method
19+
name, a short summary description, and its category (data access,
20+
analysis, or plotting).
21+
""",
22+
parameters=dict(
23+
category="""
24+
Optional filter to show only methods of a given category.
25+
Supported values are "data", "analysis", "plot", or None to
26+
show all methods.
27+
""",
28+
),
29+
)
30+
def describe_api(
31+
self,
32+
category: Optional[str] = None,
33+
) -> pd.DataFrame:
34+
methods_info = []
35+
36+
# Walk through all public methods on this instance.
37+
for name in sorted(dir(self)):
38+
# Skip private/dunder methods.
39+
if name.startswith("_"):
40+
continue
41+
42+
attr = getattr(type(self), name, None)
43+
if attr is None:
44+
continue
45+
46+
# Only include callable methods and non-property attributes.
47+
if isinstance(attr, property):
48+
continue
49+
if not callable(attr):
50+
continue
51+
52+
# Extract the docstring summary.
53+
summary = self._extract_summary(attr)
54+
55+
# Determine category.
56+
method_category = self._categorize_method(name)
57+
58+
methods_info.append(
59+
{
60+
"method": name,
61+
"summary": summary,
62+
"category": method_category,
63+
}
64+
)
65+
66+
df = pd.DataFrame(methods_info)
67+
68+
# Apply category filter if specified.
69+
if category is not None:
70+
valid_categories = {"data", "analysis", "plot"}
71+
if category not in valid_categories:
72+
raise ValueError(
73+
f"Invalid category: {category!r}. "
74+
f"Must be one of {valid_categories}."
75+
)
76+
df = df[df["category"] == category].reset_index(drop=True)
77+
78+
return df
79+
80+
@staticmethod
81+
def _extract_summary(method) -> str:
82+
"""Extract the first line of the docstring as a summary."""
83+
docstring = inspect.getdoc(method)
84+
if not docstring:
85+
return ""
86+
# Take the first non-empty line as the summary.
87+
for line in docstring.strip().splitlines():
88+
line = line.strip()
89+
if line:
90+
return line
91+
return ""
92+
93+
@staticmethod
94+
def _categorize_method(name: str) -> str:
95+
"""Categorize a method based on its name."""
96+
if name.startswith("plot_"):
97+
return "plot"
98+
data_prefixes = (
99+
"sample_",
100+
"snp_",
101+
"hap_",
102+
"cnv_",
103+
"genome_",
104+
"open_",
105+
"lookup_",
106+
"read_",
107+
"general_",
108+
"sequence_",
109+
"cohorts_",
110+
"aim_",
111+
"gene_",
112+
)
113+
if name.startswith(data_prefixes):
114+
return "data"
115+
return "analysis"

malariagen_data/anoph/dipclust.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -542,10 +542,10 @@ def _dipclust_concat_subplots(
542542
if isinstance(figure, go.Figure):
543543
# This is a figure, access the traces within it.
544544
for trace in range(len(figure["data"])):
545-
fig.append_trace(figure["data"][trace], row=i + 1, col=1)
545+
fig.add_trace(figure["data"][trace], row=i + 1, col=1)
546546
else:
547547
# Assume this is a trace, add directly.
548-
fig.append_trace(figure, row=i + 1, col=1)
548+
fig.add_trace(figure, row=i + 1, col=1)
549549

550550
fig.update_xaxes(visible=False)
551551
fig.update_layout(

malariagen_data/anoph/frq_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ def plot_frequencies_heatmap(
276276
heatmap_df.set_index(index_col, inplace=True)
277277

278278
# Clean column names.
279-
heatmap_df.columns = heatmap_df.columns.str.lstrip("frq_")
279+
heatmap_df.columns = heatmap_df.columns.str.removeprefix("frq_")
280280

281281
# Deal with width and height.
282282
if width is None:

malariagen_data/anoph/sample_metadata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,11 +1569,11 @@ def plot_sample_location_mapbox(
15691569
# Sort by `color` column by default, which can be overridden via category_orders.
15701570
df_locations = df_samples[location_columns].drop_duplicates().sort_values(color)
15711571

1572-
fig = px.scatter_mapbox(
1572+
fig = px.scatter_map(
15731573
df_locations,
15741574
lat="latitude",
15751575
lon="longitude",
1576-
mapbox_style="open-street-map",
1576+
map_style="open-street-map",
15771577
zoom=zoom,
15781578
color=color,
15791579
category_orders=category_orders,

malariagen_data/anopheles.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from .anoph.phenotypes import AnophelesPhenotypeData
4545
from .mjn import _median_joining_network, _mjn_graph
4646
from .anoph.hapclust import AnophelesHapClustAnalysis
47+
from .anoph.describe import AnophelesDescribe
4748
from .anoph.dipclust import AnophelesDipClustAnalysis
4849
from .anoph.heterozygosity import AnophelesHetAnalysis
4950
from .util import (
@@ -97,6 +98,7 @@ class AnophelesDataResource(
9798
AnophelesSampleMetadata,
9899
AnophelesGenomeFeaturesData,
99100
AnophelesGenomeSequenceData,
101+
AnophelesDescribe,
100102
AnophelesBase,
101103
AnophelesPhenotypeData,
102104
):

malariagen_data/plasmodium.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22

3+
import os
34
import dask.array as da
45
import pandas as pd
56
import xarray
@@ -59,7 +60,7 @@ def sample_metadata(self):
5960
One row per sample.
6061
"""
6162
if self._cache_sample_metadata is None:
62-
path = f"{self._path}/{self.CONF['metadata_path']}"
63+
path = os.path.join(self._path, self.CONF["metadata_path"])
6364
with self._fs.open(path) as f:
6465
self._cache_sample_metadata = pd.read_csv(f, sep="\t", na_values="")
6566
return self._cache_sample_metadata
@@ -74,7 +75,7 @@ def _open_variant_calls_zarr(self):
7475
7576
"""
7677
if self._cache_variant_calls_zarr is None:
77-
path = f"{self._path}/{self.CONF['variant_calls_zarr_path']}"
78+
path = os.path.join(self._path, self.CONF["variant_calls_zarr_path"])
7879
store = _init_zarr_store(fs=self._fs, path=path)
7980
self._cache_variant_calls_zarr = zarr.open_consolidated(store=store)
8081
return self._cache_variant_calls_zarr
@@ -204,7 +205,7 @@ def open_genome(self):
204205
205206
"""
206207
if self._cache_genome is None:
207-
path = f"{self._path}/{self.CONF['reference_path']}"
208+
path = os.path.join(self._path, self.CONF["reference_path"])
208209
store = _init_zarr_store(fs=self._fs, path=path)
209210
self._cache_genome = zarr.open_consolidated(store=store)
210211
return self._cache_genome
@@ -316,7 +317,7 @@ def genome_features(self, attributes=("ID", "Parent", "Name")):
316317
try:
317318
df = self._cache_genome_features[attributes]
318319
except KeyError:
319-
path = f"{self._path}/{self.CONF['annotations_path']}"
320+
path = os.path.join(self._path, self.CONF["annotations_path"])
320321
with self._fs.open(path, mode="rb") as f:
321322
df = _read_gff3(f, compression="gzip")
322323
if attributes is not None:

0 commit comments

Comments
 (0)