Skip to content

Commit 01111ab

Browse files
committed
fix: align karyotype implementation with config-driven patterns and repo conventions
1 parent 3e7859e commit 01111ab

2 files changed

Lines changed: 12 additions & 5 deletions

File tree

malariagen_data/anoph/karyotype.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,12 @@ def _require_karyotype_analysis(self):
7070
def load_inversion_tags(self, inversion: inversion_param) -> pd.DataFrame:
7171
self._require_karyotype_analysis()
7272

73+
filename = self.config.get(
74+
"KARYOTYPE_TAG_SNPS_FILENAME", "karyotype_tag_snps.csv"
75+
)
7376
path = (
7477
f"{self._base_path}/{self._major_version_path}"
75-
f"/karyotype/{self._karyotype_analysis}/karyotype_tag_snps.csv"
78+
f"/snp_karyotype/{self._karyotype_analysis}/{filename}"
7679
)
7780
with self._fs.open(path) as f:
7881
df_tag_snps = pd.read_csv(f, sep=",")

tests/anoph/conftest.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,7 +1154,10 @@ def init_config(self):
11541154
"SITE_ANNOTATIONS_ZARR_PATH": "reference/genome/agamp4/Anopheles-gambiae-PEST_SEQANNOTATION_AgamP4.12.zarr",
11551155
"DEFAULT_AIM_ANALYSIS": "20220528",
11561156
"DEFAULT_SITE_FILTERS_ANALYSIS": "dt_20200416",
1157+
# Simulated placeholder; real value will be set in production config.
11571158
"DEFAULT_KARYOTYPE_ANALYSIS": "20231213",
1159+
"KARYOTYPE_INVERSIONS": ["2La", "2Rb"],
1160+
"KARYOTYPE_TAG_SNPS_FILENAME": "karyotype_tag_snps.csv",
11581161
"DEFAULT_COHORTS_ANALYSIS": "20230516",
11591162
"SITE_MASK_IDS": ["gamb_colu_arab", "gamb_colu", "arab"],
11601163
"PHASING_ANALYSIS_IDS": ["gamb_colu_arab", "gamb_colu", "arab"],
@@ -1526,10 +1529,13 @@ def init_snp_sites(self):
15261529

15271530
def init_karyotype_tags(self):
15281531
analysis = self.config["DEFAULT_KARYOTYPE_ANALYSIS"]
1532+
inversions = self.config["KARYOTYPE_INVERSIONS"]
1533+
filename = self.config["KARYOTYPE_TAG_SNPS_FILENAME"]
15291534

15301535
# Generate tag SNP data using positions from simulated SNP sites.
15311536
tags = []
1532-
for contig, inversion in [("2R", "2Rb"), ("2L", "2La")]:
1537+
for inversion in inversions:
1538+
contig = inversion[:2]
15331539
snp_pos = self.snp_sites[contig]["variants"]["POS"][:]
15341540
snp_alt = self.snp_sites[contig]["variants"]["ALT"][:]
15351541
n_tags = min(20, len(snp_pos))
@@ -1546,9 +1552,7 @@ def init_karyotype_tags(self):
15461552
)
15471553

15481554
df = pd.DataFrame(tags)
1549-
path = (
1550-
self.bucket_path / "v3" / "karyotype" / analysis / "karyotype_tag_snps.csv"
1551-
)
1555+
path = self.bucket_path / "v3" / "snp_karyotype" / analysis / filename
15521556
path.parent.mkdir(parents=True, exist_ok=True)
15531557
df.to_csv(path, index=False)
15541558

0 commit comments

Comments
 (0)