@@ -36,37 +36,59 @@ def _karyotype_tags_n_alt(gt, alts, inversion_alts):
3636class AnophelesKaryotypeAnalysis (AnophelesSnpData ):
3737 def __init__ (
3838 self ,
39- inversion_tag_path : Optional [str ] = None ,
39+ karyotype_analysis : Optional [str ] = None ,
4040 ** kwargs ,
4141 ):
4242 # N.B., this class is designed to work cooperatively, and
4343 # so it's important that any remaining parameters are passed
4444 # to the superclass constructor.
4545 super ().__init__ (** kwargs )
4646
47- self ._inversion_tag_path = inversion_tag_path
47+ # If provided, this analysis version will override the
48+ # default value provided in the release configuration.
49+ self ._karyotype_analysis_override = karyotype_analysis
50+
51+ @property
52+ def _karyotype_analysis (self ) -> Optional [str ]:
53+ if self ._karyotype_analysis_override :
54+ return self ._karyotype_analysis_override
55+ else :
56+ # N.B., this will return None if the key is not present in the
57+ # config.
58+ return self .config .get ("DEFAULT_KARYOTYPE_ANALYSIS" )
59+
60+ def _require_karyotype_analysis (self ):
61+ if not self ._karyotype_analysis :
62+ raise NotImplementedError (
63+ "Inversion karyotype analysis is not available for this data resource."
64+ )
4865
4966 @_check_types
5067 @doc (
5168 summary = "Load tag SNPs for a given inversion." ,
5269 )
5370 def load_inversion_tags (self , inversion : inversion_param ) -> pd .DataFrame :
54- # needs to be modified depending on where we are hosting
55- import importlib .resources
56- from .. import resources
71+ self ._require_karyotype_analysis ()
5772
58- if self ._inversion_tag_path is None :
59- raise NotImplementedError (
60- "No inversion tags are available for this data resource."
73+ path = (
74+ f"{ self ._base_path } /{ self ._major_version_path } "
75+ f"/snp_karyotype/{ self ._karyotype_analysis } /karyotype_tag_snps.csv"
76+ )
77+ with self ._fs .open (path ) as f :
78+ df_tag_snps = pd .read_csv (f , sep = "," )
79+
80+ # Validate inversion name.
81+ available = sorted (df_tag_snps ["inversion" ].unique ())
82+ if inversion not in available :
83+ raise ValueError (
84+ f"Unknown inversion '{ inversion } '. Available inversions: { available } "
6185 )
62- else :
63- with importlib .resources .path (resources , self ._inversion_tag_path ) as path :
64- df_tag_snps = pd .read_csv (path , sep = "," )
65- return df_tag_snps .loc [df_tag_snps ["inversion" ] == inversion ].reset_index ()
86+
87+ return df_tag_snps .query (f"inversion == '{ inversion } '" ).reset_index (drop = True )
6688
6789 @_check_types
6890 @doc (
69- summary = "Infer karyotype from tag SNPs for a given inversion in Ag ." ,
91+ summary = "Infer karyotype from tag SNPs for a given inversion." ,
7092 )
7193 def karyotype (
7294 self ,
@@ -79,7 +101,7 @@ def karyotype(
79101 df_tagsnps = self .load_inversion_tags (inversion = inversion )
80102 inversion_pos = df_tagsnps ["position" ]
81103 inversion_alts = df_tagsnps ["alt_allele" ]
82- contig = inversion [ 0 : 2 ]
104+ contig = df_tagsnps [ "contig" ]. iloc [ 0 ]
83105
84106 # get snp calls for inversion region
85107 start , end = np .min (inversion_pos ), np .max (inversion_pos )
0 commit comments