@@ -1628,45 +1628,6 @@ def prep_samples_for_cohort_grouping(
16281628 return df_samples
16291629
16301630
1631- def build_cohorts_from_sample_grouping (
1632- * , group_samples_by_cohort , min_cohort_size , taxon_by = "taxon"
1633- ):
1634- # Build cohorts dataframe.
1635- df_cohorts = group_samples_by_cohort .agg (
1636- size = ("sample_id" , len ),
1637- lat_mean = ("latitude" , "mean" ),
1638- lat_max = ("latitude" , "max" ),
1639- lat_min = ("latitude" , "min" ),
1640- lon_mean = ("longitude" , "mean" ),
1641- lon_max = ("longitude" , "max" ),
1642- lon_min = ("longitude" , "min" ),
1643- )
1644- # Reset index so that the index fields are included as columns.
1645- df_cohorts = df_cohorts .reset_index ()
1646-
1647- # Add cohort helper variables.
1648- cohort_period_start = df_cohorts ["period" ].apply (lambda v : v .start_time )
1649- cohort_period_end = df_cohorts ["period" ].apply (lambda v : v .end_time )
1650- df_cohorts ["period_start" ] = cohort_period_start
1651- df_cohorts ["period_end" ] = cohort_period_end
1652- # Create a label that is similar to the cohort metadata,
1653- # although this won't be perfect.
1654- df_cohorts ["label" ] = df_cohorts .apply (
1655- lambda v : f"{ v .area } _{ v [taxon_by ][:4 ]} _{ v .period } " , axis = "columns"
1656- )
1657-
1658- # Apply minimum cohort size.
1659- df_cohorts = df_cohorts .query (f"size >= { min_cohort_size } " ).reset_index (drop = True )
1660-
1661- # Early check for no cohorts.
1662- if len (df_cohorts ) == 0 :
1663- raise ValueError (
1664- "No cohorts available for the given sample selection parameters and minimum cohort size."
1665- )
1666-
1667- return df_cohorts
1668-
1669-
16701631def add_frequency_ci (* , ds , ci_method ):
16711632 from statsmodels .stats .proportion import proportion_confint # type: ignore
16721633
0 commit comments