Skip to content

Commit 9f801a1

Browse files
authored
Merge branch 'master' into issue-919-cnv-variant-query
2 parents 51c4dce + 64ff049 commit 9f801a1

13 files changed

Lines changed: 1954 additions & 702 deletions

File tree

malariagen_data/anoph/base_params.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@
8181
""",
8282
]
8383

84+
cohort_group_query: TypeAlias = Annotated[
85+
str,
86+
"""
87+
A pandas query string to be evaluated against cohort-group metadata.
88+
""",
89+
]
90+
8491
sample_indices: TypeAlias = Annotated[
8592
List[int],
8693
"""

malariagen_data/anoph/cnv_frq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ def _gene_cnv_frequencies_advanced(
586586
nobs[:, cohort_index] = np.repeat(cohort_n_called, 2)
587587
else:
588588
assert nobs_mode == "fixed"
589-
nobs[:, cohort_index] = cohort.size * 2
589+
nobs[:, cohort_index] = cohort.size
590590

591591
debug("compute frequency")
592592
with np.errstate(divide="ignore", invalid="ignore"):
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from typing import Optional
2+
3+
import pandas as pd
4+
from numpydoc_decorator import doc
5+
6+
from ..util import _check_types
7+
from . import base_params
8+
from .base import AnophelesBase
9+
10+
11+
class AnophelesCohortGroupMetadata(AnophelesBase):
12+
def __init__(
13+
self,
14+
**kwargs,
15+
):
16+
# N.B., this class is designed to work cooperatively, and
17+
# so it's important that any remaining parameters are passed
18+
# to the superclass constructor.
19+
super().__init__(**kwargs)
20+
21+
@_check_types
22+
@doc(
23+
summary="""
24+
Read metadata for a specific cohort group, including cohort size,
25+
country code, taxon, administrative units name, ISO code, geoBoundaries
26+
shape ID and representative latitude and longitude points.
27+
""",
28+
parameters=dict(
29+
cohort_group="""
30+
A cohort group name. Accepted values are:
31+
"admin1_month", "admin1_quarter", "admin1_year",
32+
"admin2_month", "admin2_quarter", "admin2_year".
33+
"""
34+
),
35+
returns="A dataframe of cohort metadata, one row per cohort.",
36+
)
37+
def cohort_group_metadata(
38+
self,
39+
cohort_group: base_params.cohorts,
40+
cohort_group_query: Optional[base_params.cohort_group_query] = None,
41+
) -> pd.DataFrame:
42+
major_version_path = self._major_version_path
43+
cohorts_analysis = self.config.get("DEFAULT_COHORTS_ANALYSIS")
44+
45+
path = f"{major_version_path[:2]}_cohorts/cohorts_{cohorts_analysis}/cohorts_{cohort_group}.csv"
46+
47+
# Read the manifest into a pandas dataframe.
48+
with self.open_file(path) as f:
49+
df_cohorts = pd.read_csv(f, sep=",", na_values="")
50+
51+
# Ensure all column names are lower case.
52+
df_cohorts.columns = [c.lower() for c in df_cohorts.columns]
53+
54+
# Apply a cohort group selection.
55+
if cohort_group_query is not None:
56+
# Assume a pandas query string.
57+
df_cohorts = df_cohorts.query(cohort_group_query)
58+
df_cohorts = df_cohorts.reset_index(drop=True)
59+
60+
return df_cohorts.copy()

0 commit comments

Comments
 (0)