Skip to content

Commit da95a59

Browse files
authored
Merge branch 'master' into pin-pandas
2 parents e6bfc86 + d542fc6 commit da95a59

3 files changed

Lines changed: 244 additions & 0 deletions

File tree

malariagen_data/anoph/describe.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import inspect
2+
from typing import Optional
3+
4+
import pandas as pd
5+
from numpydoc_decorator import doc # type: ignore
6+
7+
from .base import AnophelesBase
8+
9+
10+
class AnophelesDescribe(AnophelesBase):
11+
"""Mixin class providing API introspection and discovery functionality."""
12+
13+
@doc(
14+
summary="""
15+
List all available public API methods with their descriptions.
16+
""",
17+
returns="""
18+
A dataframe with one row per public method, containing the method
19+
name, a short summary description, and its category (data access,
20+
analysis, or plotting).
21+
""",
22+
parameters=dict(
23+
category="""
24+
Optional filter to show only methods of a given category.
25+
Supported values are "data", "analysis", "plot", or None to
26+
show all methods.
27+
""",
28+
),
29+
)
30+
def describe_api(
31+
self,
32+
category: Optional[str] = None,
33+
) -> pd.DataFrame:
34+
methods_info = []
35+
36+
# Walk through all public methods on this instance.
37+
for name in sorted(dir(self)):
38+
# Skip private/dunder methods.
39+
if name.startswith("_"):
40+
continue
41+
42+
attr = getattr(type(self), name, None)
43+
if attr is None:
44+
continue
45+
46+
# Only include callable methods and non-property attributes.
47+
if isinstance(attr, property):
48+
continue
49+
if not callable(attr):
50+
continue
51+
52+
# Extract the docstring summary.
53+
summary = self._extract_summary(attr)
54+
55+
# Determine category.
56+
method_category = self._categorize_method(name)
57+
58+
methods_info.append(
59+
{
60+
"method": name,
61+
"summary": summary,
62+
"category": method_category,
63+
}
64+
)
65+
66+
df = pd.DataFrame(methods_info)
67+
68+
# Apply category filter if specified.
69+
if category is not None:
70+
valid_categories = {"data", "analysis", "plot"}
71+
if category not in valid_categories:
72+
raise ValueError(
73+
f"Invalid category: {category!r}. "
74+
f"Must be one of {valid_categories}."
75+
)
76+
df = df[df["category"] == category].reset_index(drop=True)
77+
78+
return df
79+
80+
@staticmethod
81+
def _extract_summary(method) -> str:
82+
"""Extract the first line of the docstring as a summary."""
83+
docstring = inspect.getdoc(method)
84+
if not docstring:
85+
return ""
86+
# Take the first non-empty line as the summary.
87+
for line in docstring.strip().splitlines():
88+
line = line.strip()
89+
if line:
90+
return line
91+
return ""
92+
93+
@staticmethod
94+
def _categorize_method(name: str) -> str:
95+
"""Categorize a method based on its name."""
96+
if name.startswith("plot_"):
97+
return "plot"
98+
data_prefixes = (
99+
"sample_",
100+
"snp_",
101+
"hap_",
102+
"cnv_",
103+
"genome_",
104+
"open_",
105+
"lookup_",
106+
"read_",
107+
"general_",
108+
"sequence_",
109+
"cohorts_",
110+
"aim_",
111+
"gene_",
112+
)
113+
if name.startswith(data_prefixes):
114+
return "data"
115+
return "analysis"

malariagen_data/anopheles.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from .anoph.phenotypes import AnophelesPhenotypeData
4545
from .mjn import _median_joining_network, _mjn_graph
4646
from .anoph.hapclust import AnophelesHapClustAnalysis
47+
from .anoph.describe import AnophelesDescribe
4748
from .anoph.dipclust import AnophelesDipClustAnalysis
4849
from .util import (
4950
CacheMiss,
@@ -95,6 +96,7 @@ class AnophelesDataResource(
9596
AnophelesSampleMetadata,
9697
AnophelesGenomeFeaturesData,
9798
AnophelesGenomeSequenceData,
99+
AnophelesDescribe,
98100
AnophelesBase,
99101
AnophelesPhenotypeData,
100102
):

tests/anoph/test_describe.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import pandas as pd
2+
import pytest
3+
from pytest_cases import parametrize_with_cases
4+
5+
from malariagen_data import af1 as _af1
6+
from malariagen_data import ag3 as _ag3
7+
from malariagen_data.anoph.describe import AnophelesDescribe
8+
9+
10+
@pytest.fixture
11+
def ag3_sim_api(ag3_sim_fixture):
12+
return AnophelesDescribe(
13+
url=ag3_sim_fixture.url,
14+
public_url=ag3_sim_fixture.url,
15+
config_path=_ag3.CONFIG_PATH,
16+
major_version_number=_ag3.MAJOR_VERSION_NUMBER,
17+
major_version_path=_ag3.MAJOR_VERSION_PATH,
18+
pre=True,
19+
)
20+
21+
22+
@pytest.fixture
23+
def af1_sim_api(af1_sim_fixture):
24+
return AnophelesDescribe(
25+
url=af1_sim_fixture.url,
26+
public_url=af1_sim_fixture.url,
27+
config_path=_af1.CONFIG_PATH,
28+
major_version_number=_af1.MAJOR_VERSION_NUMBER,
29+
major_version_path=_af1.MAJOR_VERSION_PATH,
30+
pre=False,
31+
)
32+
33+
34+
def case_ag3_sim(ag3_sim_fixture, ag3_sim_api):
35+
return ag3_sim_fixture, ag3_sim_api
36+
37+
38+
def case_af1_sim(af1_sim_fixture, af1_sim_api):
39+
return af1_sim_fixture, af1_sim_api
40+
41+
42+
@parametrize_with_cases("fixture,api", cases=".")
43+
def test_describe_api_returns_dataframe(fixture, api):
44+
"""Test that describe_api returns a DataFrame with expected columns."""
45+
df = api.describe_api()
46+
assert isinstance(df, pd.DataFrame)
47+
assert "method" in df.columns
48+
assert "summary" in df.columns
49+
assert "category" in df.columns
50+
assert len(df) > 0
51+
52+
53+
@parametrize_with_cases("fixture,api", cases=".")
54+
def test_describe_api_no_private_methods(fixture, api):
55+
"""Test that describe_api does not include private or dunder methods."""
56+
df = api.describe_api()
57+
for method_name in df["method"]:
58+
assert not method_name.startswith(
59+
"_"
60+
), f"Private method {method_name!r} should not appear in describe_api output"
61+
62+
63+
@parametrize_with_cases("fixture,api", cases=".")
64+
def test_describe_api_category_filter(fixture, api):
65+
"""Test filtering by category."""
66+
for category in ("data", "analysis", "plot"):
67+
df = api.describe_api(category=category)
68+
assert isinstance(df, pd.DataFrame)
69+
if len(df) > 0:
70+
assert all(df["category"] == category)
71+
72+
73+
@parametrize_with_cases("fixture,api", cases=".")
74+
def test_describe_api_invalid_category(fixture, api):
75+
"""Test that an invalid category raises ValueError."""
76+
with pytest.raises(ValueError, match="Invalid category"):
77+
api.describe_api(category="invalid")
78+
79+
80+
@parametrize_with_cases("fixture,api", cases=".")
81+
def test_describe_api_known_methods(fixture, api):
82+
"""Test that some known methods appear in the output."""
83+
df = api.describe_api()
84+
method_names = set(df["method"])
85+
# These methods should exist on AnophelesDescribe (inherited from AnophelesBase).
86+
assert "describe_api" in method_names
87+
assert "sample_sets" in method_names
88+
89+
90+
@parametrize_with_cases("fixture,api", cases=".")
91+
def test_describe_api_summaries_not_empty(fixture, api):
92+
"""Test that at least some methods have non-empty summaries."""
93+
df = api.describe_api()
94+
non_empty = df[df["summary"] != ""]
95+
assert len(non_empty) > 0, "Expected at least some methods to have summaries"
96+
97+
98+
def test_categorize_method():
99+
"""Test the static _categorize_method helper."""
100+
assert AnophelesDescribe._categorize_method("plot_pca") == "plot"
101+
assert AnophelesDescribe._categorize_method("plot_heterozygosity") == "plot"
102+
assert AnophelesDescribe._categorize_method("sample_metadata") == "data"
103+
assert AnophelesDescribe._categorize_method("snp_calls") == "data"
104+
assert AnophelesDescribe._categorize_method("genome_sequence") == "data"
105+
assert AnophelesDescribe._categorize_method("lookup_release") == "data"
106+
assert AnophelesDescribe._categorize_method("diversity_stats") == "analysis"
107+
assert AnophelesDescribe._categorize_method("cohort_diversity_stats") == "analysis"
108+
109+
110+
def test_extract_summary():
111+
"""Test the static _extract_summary helper."""
112+
113+
def dummy_func():
114+
"""This is a test summary.
115+
116+
More details here.
117+
"""
118+
pass
119+
120+
summary = AnophelesDescribe._extract_summary(dummy_func)
121+
assert summary == "This is a test summary."
122+
123+
def no_doc_func():
124+
pass
125+
126+
summary = AnophelesDescribe._extract_summary(no_doc_func)
127+
assert summary == ""

0 commit comments

Comments
 (0)