Skip to content

Commit 4357f9f

Browse files
committed
In build_cohorts_from_sample_grouping replace non-alphanumeric chars with underscore when non-default taxon_by col
1 parent fa8d2a7 commit 4357f9f

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

malariagen_data/anoph/frq_base.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22
import pandas as pd
3+
import re
34
import xarray as xr
45
import plotly.express as px
56
from textwrap import dedent
@@ -95,9 +96,16 @@ def build_cohorts_from_sample_grouping(
9596
df_cohorts["period_end"] = cohort_period_end
9697
# Create a label that is similar to the cohort metadata,
9798
# although this won't be perfect.
98-
df_cohorts["label"] = df_cohorts.apply(
99-
lambda v: f"{v.area}_{v[taxon_by][:4]}_{v.period}", axis="columns"
100-
)
99+
if taxon_by == frq_params.taxon_by_default:
100+
df_cohorts["label"] = df_cohorts.apply(
101+
lambda v: f"{v.area}_{v[taxon_by][:4]}_{v.period}", axis="columns"
102+
)
103+
else:
104+
# Replace non-alphanumeric characters in the taxon with underscores.
105+
df_cohorts["label"] = df_cohorts.apply(
106+
lambda v: f"{v.area}_{re.sub(r'[^A-Za-z0-9]+', '_', str(v[taxon_by]))}_{v.period}",
107+
axis="columns",
108+
)
101109

102110
# Apply minimum cohort size.
103111
df_cohorts = df_cohorts.query(f"size >= {min_cohort_size}").reset_index(drop=True)

0 commit comments

Comments
 (0)