@@ -1154,7 +1154,10 @@ def init_config(self):
11541154 "SITE_ANNOTATIONS_ZARR_PATH" : "reference/genome/agamp4/Anopheles-gambiae-PEST_SEQANNOTATION_AgamP4.12.zarr" ,
11551155 "DEFAULT_AIM_ANALYSIS" : "20220528" ,
11561156 "DEFAULT_SITE_FILTERS_ANALYSIS" : "dt_20200416" ,
1157+ # Simulated placeholder; real value will be set in production config.
11571158 "DEFAULT_KARYOTYPE_ANALYSIS" : "20231213" ,
1159+ "KARYOTYPE_INVERSIONS" : ["2La" , "2Rb" ],
1160+ "KARYOTYPE_TAG_SNPS_FILENAME" : "karyotype_tag_snps.csv" ,
11581161 "DEFAULT_COHORTS_ANALYSIS" : "20230516" ,
11591162 "SITE_MASK_IDS" : ["gamb_colu_arab" , "gamb_colu" , "arab" ],
11601163 "PHASING_ANALYSIS_IDS" : ["gamb_colu_arab" , "gamb_colu" , "arab" ],
@@ -1526,10 +1529,13 @@ def init_snp_sites(self):
15261529
15271530 def init_karyotype_tags (self ):
15281531 analysis = self .config ["DEFAULT_KARYOTYPE_ANALYSIS" ]
1532+ inversions = self .config ["KARYOTYPE_INVERSIONS" ]
1533+ filename = self .config ["KARYOTYPE_TAG_SNPS_FILENAME" ]
15291534
15301535 # Generate tag SNP data using positions from simulated SNP sites.
15311536 tags = []
1532- for contig , inversion in [("2R" , "2Rb" ), ("2L" , "2La" )]:
1537+ for inversion in inversions :
1538+ contig = inversion [:2 ]
15331539 snp_pos = self .snp_sites [contig ]["variants" ]["POS" ][:]
15341540 snp_alt = self .snp_sites [contig ]["variants" ]["ALT" ][:]
15351541 n_tags = min (20 , len (snp_pos ))
@@ -1546,9 +1552,7 @@ def init_karyotype_tags(self):
15461552 )
15471553
15481554 df = pd .DataFrame (tags )
1549- path = (
1550- self .bucket_path / "v3" / "karyotype" / analysis / "karyotype_tag_snps.csv"
1551- )
1555+ path = self .bucket_path / "v3" / "snp_karyotype" / analysis / filename
15521556 path .parent .mkdir (parents = True , exist_ok = True )
15531557 df .to_csv (path , index = False )
15541558
0 commit comments