Skip to content

Commit daa6a95

Browse files
ahernankSharon-codes
authored andcommitted
add new cohort group metadata class
1 parent c43de7f commit daa6a95

1 file changed

Lines changed: 60 additions & 0 deletions

File tree

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from typing import Optional
2+
3+
import pandas as pd
4+
from numpydoc_decorator import doc
5+
6+
from ..util import check_types
7+
from . import base_params
8+
from .base import AnophelesBase
9+
10+
11+
class AnophelesCohortGroupMetadata(AnophelesBase):
12+
def __init__(
13+
self,
14+
**kwargs,
15+
):
16+
# N.B., this class is designed to work cooperatively, and
17+
# so it's important that any remaining parameters are passed
18+
# to the superclass constructor.
19+
super().__init__(**kwargs)
20+
21+
@check_types
22+
@doc(
23+
summary="""
24+
Read metadata for a specific cohort group, including cohort size,
25+
country code, taxon, administrative units name, ISO code, geoBoundaries
26+
shape ID and representative latitude and longitude points.
27+
""",
28+
parameters=dict(
29+
cohort_group="""
30+
A cohort group name. Accepted values are:
31+
"admin1_month", "admin1_quarter", "admin1_year",
32+
"admin2_month", "admin2_quarter", "admin2_year".
33+
"""
34+
),
35+
returns="A dataframe of cohort metadata, one row per cohort.",
36+
)
37+
def cohort_group_metadata(
38+
self,
39+
cohort_group: base_params.cohorts,
40+
cohort_group_query: Optional[base_params.cohort_group_query] = None,
41+
) -> pd.DataFrame:
42+
major_version_path = self._major_version_path
43+
cohorts_analysis = self.config.get("DEFAULT_COHORTS_ANALYSIS")
44+
45+
path = f"{major_version_path[:2]}_cohorts/cohorts_{cohorts_analysis}/cohorts_{cohort_group}.csv"
46+
47+
# Read the manifest into a pandas dataframe.
48+
with self.open_file(path) as f:
49+
df_cohorts = pd.read_csv(f, sep=",", na_values="")
50+
51+
# Ensure all column names are lower case.
52+
df_cohorts.columns = [c.lower() for c in df_cohorts.columns]
53+
54+
# Apply a cohort group selection.
55+
if cohort_group_query is not None:
56+
# Assume a pandas query string.
57+
df_cohorts = df_cohorts.query(cohort_group_query)
58+
df_cohorts = df_cohorts.reset_index(drop=True)
59+
60+
return df_cohorts.copy()

0 commit comments

Comments
 (0)