Skip to content

Commit f591dba

Browse files
authored
Merge pull request #810 from malariagen/docs/phenotype-functions
docs: remove usage examples and register phenotype methods in Ag3.rst
2 parents 054c05d + 4a050dd commit f591dba

2 files changed

Lines changed: 55 additions & 65 deletions

File tree

docs/source/Ag3.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,3 +224,14 @@ Inversion karyotypes
224224
:toctree: generated/
225225

226226
karyotype
227+
228+
Phenotype data access
229+
---------------------
230+
.. autosummary::
231+
:toctree: generated/
232+
233+
phenotype_data
234+
phenotypes_with_snps
235+
phenotypes_with_haplotypes
236+
phenotype_sample_sets
237+
phenotype_binary

malariagen_data/anoph/phenotypes.py

Lines changed: 44 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
from typing import Callable, Optional, List, Any
44
import warnings
55
import fsspec
6+
from numpydoc_decorator import doc # type: ignore
7+
8+
from ..util import check_types
69
from malariagen_data.anoph import base_params, phenotype_params
710

811

@@ -308,6 +311,13 @@ def _create_phenotype_dataset(
308311

309312
return ds
310313

314+
@check_types
315+
@doc(
316+
summary="Load phenotypic data from insecticide resistance bioassays.",
317+
returns=dict(
318+
df="DataFrame containing phenotype data merged with sample metadata. Includes sample identifiers, phenotypic measurements, and experimental conditions."
319+
),
320+
)
311321
def phenotype_data(
312322
self,
313323
sample_sets: Optional[base_params.sample_sets] = None,
@@ -318,55 +328,9 @@ def phenotype_data(
318328
max_cohort_size: Optional[base_params.max_cohort_size] = None,
319329
) -> pd.DataFrame:
320330
"""
321-
Load phenotypic data from insecticide resistance bioassays.
322-
323-
Parameters
324-
----------
325-
sample_sets : Optional[base_params.sample_sets]
326-
Sample sets to load data for.
327-
sample_query : Optional[base_params.sample_query]
328-
Query string to filter samples. Can include phenotype-specific columns like:
329-
- insecticide: e.g., "insecticide == 'Deltamethrin'"
330-
- dose: e.g., "dose in [0.5, 2.0]"
331-
- phenotype: e.g., "phenotype == 'alive'"
332-
- location: e.g., "location == 'Cotonou'"
333-
- Any other metadata columns
334-
sample_query_options : Optional[base_params.sample_query_options]
335-
Options for the sample query.
336-
cohort_size : Optional[base_params.cohort_size]
337-
Exact cohort size for sampling.
338-
min_cohort_size : Optional[base_params.min_cohort_size]
339-
Minimum cohort size to include.
340-
max_cohort_size : Optional[base_params.max_cohort_size]
341-
Maximum cohort size (will be randomly sampled if exceeded).
342-
343-
Returns
344-
-------
345-
pd.DataFrame
346-
DataFrame containing phenotype data merged with sample metadata.
347-
348-
Examples
349-
--------
350-
# Load all phenotype data
351-
df = ag3.phenotype_data(sample_sets=['1237-VO-BJ-DJOGBENOU-VMF00050'])
352-
353-
# Filter by insecticide
354-
df = ag3.phenotype_data(
355-
sample_sets=['1237-VO-BJ-DJOGBENOU-VMF00050'],
356-
sample_query="insecticide == 'Deltamethrin'"
357-
)
358-
359-
# Filter by multiple criteria
360-
df = ag3.phenotype_data(
361-
sample_sets=['1237-VO-BJ-DJOGBENOU-VMF00050'],
362-
sample_query="insecticide == 'Deltamethrin' and dose >= 1.0 and phenotype == 'alive'"
363-
)
364-
365-
# Filter by location and insecticide
366-
df = ag3.phenotype_data(
367-
sample_query="location == 'Cotonou' and insecticide in ['Deltamethrin', 'Bendiocarb']"
368-
)
331+
Retrieve and merge phenotype data with sample metadata for bioassay analysis.
369332
"""
333+
370334
# 1. Normalize sample_sets
371335
sample_sets_norm = self._prep_sample_sets_param(sample_sets=sample_sets)
372336

@@ -416,6 +380,13 @@ def phenotype_data(
416380

417381
return df_final
418382

383+
@check_types
384+
@doc(
385+
summary="Combine phenotypic traits with SNP genotype data for GWAS analysis.",
386+
returns=dict(
387+
ds="xarray Dataset containing phenotype data and SNP genotype calls for the specified region."
388+
),
389+
)
419390
def phenotypes_with_snps(
420391
self,
421392
region: base_params.region,
@@ -426,9 +397,8 @@ def phenotypes_with_snps(
426397
min_cohort_size: Optional[base_params.min_cohort_size] = None,
427398
max_cohort_size: Optional[base_params.max_cohort_size] = None,
428399
) -> xr.Dataset:
429-
"""
430-
Load phenotypic data and merge with SNP calls.
431-
"""
400+
"""Merge phenotypes with SNP calls in a given region for association testing."""
401+
432402
df_phenotypes = self.phenotype_data(
433403
sample_sets=sample_sets,
434404
sample_query=sample_query,
@@ -455,6 +425,13 @@ def phenotypes_with_snps(
455425

456426
return ds
457427

428+
@check_types
429+
@doc(
430+
summary="Combine phenotypic traits with haplotype data for extended association analysis.",
431+
returns=dict(
432+
ds="xarray Dataset with phenotype and haplotype data for the specified region."
433+
),
434+
)
458435
def phenotypes_with_haplotypes(
459436
self,
460437
region: base_params.region,
@@ -465,9 +442,8 @@ def phenotypes_with_haplotypes(
465442
min_cohort_size: Optional[base_params.min_cohort_size] = None,
466443
max_cohort_size: Optional[base_params.max_cohort_size] = None,
467444
) -> xr.Dataset:
468-
"""
469-
Load phenotypic data and merge with haplotype data.
470-
"""
445+
"""Merge phenotypes with haplotype data in a given region for association testing."""
446+
471447
df_phenotypes = self.phenotype_data(
472448
sample_sets=sample_sets,
473449
sample_query=sample_query,
@@ -494,15 +470,14 @@ def phenotypes_with_haplotypes(
494470

495471
return ds
496472

473+
@check_types
474+
@doc(
475+
summary="List sample sets that contain phenotypic data.",
476+
returns=dict(sample_sets="List of sample set identifiers with phenotype data."),
477+
)
497478
def phenotype_sample_sets(self) -> List[str]:
498-
"""
499-
Get list of sample sets that have phenotypic data available.
479+
"""Identify sample sets containing phenotype data."""
500480

501-
Returns
502-
-------
503-
List[str]
504-
List of sample set identifiers with available phenotype data.
505-
"""
506481
all_sample_sets = self.sample_sets()["sample_set"].tolist() # type: ignore[operator]
507482
phenotype_sample_sets = []
508483
base_phenotype_path = f"{self._url}v3.2/phenotypes/all"
@@ -517,6 +492,12 @@ def phenotype_sample_sets(self) -> List[str]:
517492

518493
return phenotype_sample_sets
519494

495+
@doc(
496+
summary="Convert phenotype data into binary format for statistical analysis.",
497+
returns=dict(
498+
binary="Pandas Series indexed by sample_id with binary classification: 1 for resistant, 0 for susceptible, NaN for unknown."
499+
),
500+
)
520501
def phenotype_binary(
521502
self,
522503
sample_sets: Optional[base_params.sample_sets] = None,
@@ -531,10 +512,8 @@ def phenotype_binary(
531512
min_cohort_size: Optional[base_params.min_cohort_size] = None,
532513
max_cohort_size: Optional[base_params.max_cohort_size] = None,
533514
) -> pd.Series:
534-
"""
535-
Load phenotypic data as binary outcomes (1=alive/resistant, 0=dead/susceptible, NaN=unknown).
536-
Returns a pandas Series indexed by sample_id.
537-
"""
515+
"""Generate binary phenotypic labels from raw phenotype data."""
516+
538517
# Build the sample_query string from individual parameters
539518
query_parts = []
540519
if insecticide is not None:

0 commit comments

Comments
 (0)