diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 1a1e7fdde..000000000 --- a/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -include malariagen_data/resources/* diff --git a/docs/source/Ag3.rst b/docs/source/Ag3.rst index d4bb77003..c7d4486b0 100644 --- a/docs/source/Ag3.rst +++ b/docs/source/Ag3.rst @@ -224,6 +224,7 @@ Inversion karyotypes :toctree: generated/ karyotype + load_inversion_tags Phenotype data access --------------------- diff --git a/malariagen_data/adar1.py b/malariagen_data/adar1.py index 1340a9554..fd0b29180 100644 --- a/malariagen_data/adar1.py +++ b/malariagen_data/adar1.py @@ -130,7 +130,6 @@ def __init__( tqdm_class=tqdm_class, taxon_colors=TAXON_COLORS, virtual_contigs=None, - inversion_tag_path=None, unrestricted_use_only=unrestricted_use_only, surveillance_use_only=surveillance_use_only, ) diff --git a/malariagen_data/adir1.py b/malariagen_data/adir1.py index f8c07c61d..be93877af 100644 --- a/malariagen_data/adir1.py +++ b/malariagen_data/adir1.py @@ -130,7 +130,6 @@ def __init__( tqdm_class=tqdm_class, taxon_colors=TAXON_COLORS, virtual_contigs=None, - inversion_tag_path=None, unrestricted_use_only=unrestricted_use_only, surveillance_use_only=surveillance_use_only, ) diff --git a/malariagen_data/af1.py b/malariagen_data/af1.py index 94d425f2a..c66cd8dee 100644 --- a/malariagen_data/af1.py +++ b/malariagen_data/af1.py @@ -132,7 +132,6 @@ def __init__( tqdm_class=tqdm_class, taxon_colors=TAXON_COLORS, virtual_contigs=None, - inversion_tag_path=None, unrestricted_use_only=unrestricted_use_only, surveillance_use_only=surveillance_use_only, ) diff --git a/malariagen_data/ag3.py b/malariagen_data/ag3.py index ae425a79f..dec66e30b 100644 --- a/malariagen_data/ag3.py +++ b/malariagen_data/ag3.py @@ -27,7 +27,6 @@ GENE_NAMES = { "AGAP004707": "Vgsc/para", } -INVERSION_TAG_PATH = "karyotype_tag_snps.csv" def _setup_aim_palettes(): @@ -213,7 +212,6 @@ def __init__( aim_species_colors=AIM_SPECIES_COLORS, virtual_contigs=VIRTUAL_CONTIGS, gene_names=GENE_NAMES, - inversion_tag_path=INVERSION_TAG_PATH, unrestricted_use_only=unrestricted_use_only, surveillance_use_only=surveillance_use_only, ) diff --git a/malariagen_data/amin1.py b/malariagen_data/amin1.py index ae93c6d3a..2923f3465 100644 --- a/malariagen_data/amin1.py +++ b/malariagen_data/amin1.py @@ -130,7 +130,6 @@ def __init__( tqdm_class=tqdm_class, taxon_colors=TAXON_COLORS, virtual_contigs=None, - inversion_tag_path=None, unrestricted_use_only=unrestricted_use_only, surveillance_use_only=surveillance_use_only, ) diff --git a/malariagen_data/anoph/karyotype.py b/malariagen_data/anoph/karyotype.py index fcfa23f14..b33ea67d8 100644 --- a/malariagen_data/anoph/karyotype.py +++ b/malariagen_data/anoph/karyotype.py @@ -36,7 +36,7 @@ def _karyotype_tags_n_alt(gt, alts, inversion_alts): class AnophelesKaryotypeAnalysis(AnophelesSnpData): def __init__( self, - inversion_tag_path: Optional[str] = None, + karyotype_analysis: Optional[str] = None, **kwargs, ): # N.B., this class is designed to work cooperatively, and @@ -44,29 +44,51 @@ def __init__( # to the superclass constructor. super().__init__(**kwargs) - self._inversion_tag_path = inversion_tag_path + # If provided, this analysis version will override the + # default value provided in the release configuration. + self._karyotype_analysis_override = karyotype_analysis + + @property + def _karyotype_analysis(self) -> Optional[str]: + if self._karyotype_analysis_override: + return self._karyotype_analysis_override + else: + # N.B., this will return None if the key is not present in the + # config. + return self.config.get("DEFAULT_KARYOTYPE_ANALYSIS") + + def _require_karyotype_analysis(self): + if not self._karyotype_analysis: + raise NotImplementedError( + "Inversion karyotype analysis is not available for this data resource." + ) @_check_types @doc( summary="Load tag SNPs for a given inversion.", ) def load_inversion_tags(self, inversion: inversion_param) -> pd.DataFrame: - # needs to be modified depending on where we are hosting - import importlib.resources - from .. import resources + self._require_karyotype_analysis() - if self._inversion_tag_path is None: - raise NotImplementedError( - "No inversion tags are available for this data resource." + path = ( + f"{self._base_path}/{self._major_version_path}" + f"/snp_karyotype/{self._karyotype_analysis}/karyotype_tag_snps.csv" + ) + with self._fs.open(path) as f: + df_tag_snps = pd.read_csv(f, sep=",") + + # Validate inversion name. + available = sorted(df_tag_snps["inversion"].unique()) + if inversion not in available: + raise ValueError( + f"Unknown inversion '{inversion}'. Available inversions: {available}" ) - else: - with importlib.resources.path(resources, self._inversion_tag_path) as path: - df_tag_snps = pd.read_csv(path, sep=",") - return df_tag_snps.loc[df_tag_snps["inversion"] == inversion].reset_index() + + return df_tag_snps.query(f"inversion == '{inversion}'").reset_index(drop=True) @_check_types @doc( - summary="Infer karyotype from tag SNPs for a given inversion in Ag.", + summary="Infer karyotype from tag SNPs for a given inversion.", ) def karyotype( self, @@ -79,7 +101,7 @@ def karyotype( df_tagsnps = self.load_inversion_tags(inversion=inversion) inversion_pos = df_tagsnps["position"] inversion_alts = df_tagsnps["alt_allele"] - contig = inversion[0:2] + contig = df_tagsnps["contig"].iloc[0] # get snp calls for inversion region start, end = np.min(inversion_pos), np.max(inversion_pos) diff --git a/malariagen_data/anopheles.py b/malariagen_data/anopheles.py index 8342dbb88..c1bec8658 100644 --- a/malariagen_data/anopheles.py +++ b/malariagen_data/anopheles.py @@ -142,7 +142,7 @@ def __init__( aim_species_colors: Optional[Mapping[str, str]] = None, virtual_contigs: Optional[Mapping[str, Sequence[str]]] = None, gene_names: Optional[Mapping[str, str]] = None, - inversion_tag_path: Optional[str] = None, + karyotype_analysis: Optional[str] = None, unrestricted_use_only: Optional[bool] = None, surveillance_use_only: Optional[bool] = None, ): @@ -180,7 +180,7 @@ def __init__( aim_species_colors=aim_species_colors, virtual_contigs=virtual_contigs, gene_names=gene_names, - inversion_tag_path=inversion_tag_path, + karyotype_analysis=karyotype_analysis, unrestricted_use_only=unrestricted_use_only, surveillance_use_only=surveillance_use_only, ) diff --git a/malariagen_data/resources/karyotype_tag_snps.csv b/malariagen_data/resources/karyotype_tag_snps.csv deleted file mode 100644 index b39a264d3..000000000 --- a/malariagen_data/resources/karyotype_tag_snps.csv +++ /dev/null @@ -1,1081 +0,0 @@ -inversion,contig,position,alt_allele -2La,2L,20528221,T -2La,2L,20542311,A -2La,2L,20542372,T -2La,2L,20542904,T -2La,2L,20546192,A -2La,2L,20571941,T -2La,2L,20578023,T -2La,2L,20581351,A -2La,2L,20583670,A -2La,2L,20629290,G -2La,2L,20630701,T -2La,2L,20674783,C -2La,2L,20694485,A -2La,2L,20698730,T -2La,2L,20726654,T -2La,2L,20726658,A -2La,2L,20729960,G -2La,2L,20731296,T -2La,2L,20732483,G -2La,2L,20742895,A -2La,2L,20749485,T -2La,2L,20754096,C -2La,2L,20754760,T -2La,2L,20755104,C -2La,2L,20758474,A -2La,2L,20762714,G -2La,2L,20769277,A -2La,2L,20769367,A -2La,2L,20769388,C -2La,2L,20769644,A -2La,2L,20771498,A -2La,2L,20813877,T -2La,2L,20816241,T -2La,2L,20971238,A -2La,2L,20982789,C -2La,2L,20983841,T -2La,2L,20985959,A -2La,2L,21014123,G -2La,2L,21014130,C -2La,2L,21078564,A -2La,2L,21078840,C -2La,2L,21092863,A -2La,2L,21099293,A -2La,2L,21099782,T -2La,2L,21100634,T -2La,2L,21112810,C -2La,2L,21119743,T -2La,2L,21120275,T -2La,2L,21120654,A -2La,2L,21127124,A -2La,2L,21145451,G -2La,2L,21148755,G -2La,2L,21148772,T -2La,2L,21149451,T -2La,2L,21152079,T -2La,2L,21152623,C -2La,2L,21303515,G -2La,2L,21305940,G -2La,2L,21315321,G -2La,2L,21327897,A -2La,2L,21350986,A -2La,2L,21352486,G -2La,2L,21359135,C -2La,2L,21375737,T -2La,2L,21375838,A -2La,2L,21375860,C -2La,2L,21382484,G -2La,2L,21419140,C -2La,2L,21476016,A -2La,2L,21478217,C -2La,2L,21526072,A -2La,2L,21552485,A -2La,2L,21552520,T -2La,2L,21565787,A -2La,2L,21577177,T -2La,2L,21578369,G -2La,2L,21599122,A -2La,2L,21692785,T -2La,2L,21694393,T -2La,2L,21702927,A -2La,2L,21708908,T -2La,2L,21751957,A -2La,2L,21752004,G -2La,2L,21761878,T -2La,2L,21779164,A -2La,2L,21779347,A -2La,2L,21786480,T -2La,2L,21822032,G -2La,2L,21866367,A -2La,2L,21869803,A -2La,2L,21876024,G -2La,2L,21878989,G -2La,2L,21885505,A -2La,2L,21901650,T -2La,2L,21908488,A -2La,2L,21925603,A -2La,2L,21933942,A -2La,2L,21933997,T -2La,2L,21938042,G -2La,2L,21948174,C -2La,2L,21949028,A -2La,2L,21963352,C -2La,2L,21984681,A -2La,2L,22005825,A -2La,2L,22005827,G -2La,2L,22195491,G -2La,2L,22196370,A -2La,2L,22197252,T -2La,2L,22257010,G -2La,2L,22279501,T -2La,2L,22279837,C -2La,2L,35979157,T -2La,2L,35979171,C -2La,2L,36107557,A -2La,2L,37473230,C -2La,2L,37537115,A -2La,2L,37545776,A -2La,2L,37577859,T -2La,2L,37801352,A -2La,2L,37815881,A -2La,2L,37816172,G -2La,2L,37827293,G -2La,2L,37828222,A -2La,2L,37937450,T -2La,2L,37957665,G -2La,2L,37973332,C -2La,2L,38135950,C -2La,2L,38136568,G -2La,2L,38137300,G -2La,2L,38143348,G -2La,2L,38246226,T -2La,2L,38906159,T -2La,2L,38986269,T -2La,2L,39263512,T -2La,2L,39508010,T -2La,2L,39633726,T -2La,2L,39786784,C -2La,2L,39797418,G -2La,2L,39800681,A -2La,2L,39806126,C -2La,2L,39931654,A -2La,2L,39934491,T -2La,2L,40665067,T -2La,2L,40677811,C -2La,2L,40679699,T -2La,2L,40756267,G -2La,2L,40803280,T -2La,2L,40818175,A -2La,2L,40835341,G -2La,2L,40842010,A -2La,2L,40854796,A -2La,2L,40856464,A -2La,2L,40858445,C -2La,2L,40862541,T -2La,2L,41077843,T -2La,2L,41077847,T -2La,2L,41097011,T -2La,2L,41112598,T -2La,2L,41116522,T -2La,2L,41122465,C -2La,2L,41123840,A -2La,2L,41134510,A -2La,2L,41140630,C -2La,2L,41162163,C -2La,2L,41196590,A -2La,2L,41198153,T -2La,2L,41236014,T -2La,2L,41304603,A -2La,2L,41304604,A -2La,2L,41305252,A -2La,2L,41307689,C -2La,2L,41323736,A -2La,2L,41344216,T -2La,2L,41344408,A -2La,2L,41499892,A -2La,2L,41516340,A -2La,2L,41519257,A -2La,2L,41593694,T -2La,2L,41593915,C -2La,2L,41619107,G -2La,2L,41620797,G -2La,2L,41623007,C -2La,2L,41623090,A -2La,2L,41705676,G -2La,2L,41705933,A -2La,2L,41730045,T -2La,2L,41739509,T -2La,2L,41785301,A -2La,2L,41788579,G -2La,2L,41807288,G -2La,2L,41807310,T -2La,2L,41822800,C -2La,2L,41895651,A -2La,2L,41938176,A -2La,2L,41966114,T -2La,2L,42019144,G -2La,2L,42039639,T -2La,2L,42059094,T -2La,2L,42068130,G -2La,2L,42069422,A -2La,2L,42070451,C -2La,2L,42072537,A -2Rb,2R,19041572,T -2Rb,2R,19053536,A -2Rb,2R,19056207,T -2Rb,2R,19056210,T -2Rb,2R,19059676,T -2Rb,2R,19073606,A -2Rb,2R,19073763,C -2Rb,2R,19073983,G -2Rb,2R,19074656,G -2Rb,2R,19074736,A -2Rb,2R,19076050,T -2Rb,2R,19076091,C -2Rb,2R,19076097,C -2Rb,2R,19076502,C -2Rb,2R,19076685,C -2Rb,2R,19081915,C -2Rb,2R,19088508,C -2Rb,2R,19089940,C -2Rb,2R,19102478,A -2Rb,2R,19120135,T -2Rb,2R,19121693,T -2Rb,2R,19122005,G -2Rb,2R,19122006,C -2Rb,2R,19122292,A -2Rb,2R,19154641,G -2Rb,2R,19164690,T -2Rb,2R,19164723,G -2Rb,2R,19192322,A -2Rb,2R,19226880,T -2Rb,2R,19279177,A -2Rb,2R,19283533,A -2Rb,2R,19303092,T -2Rb,2R,19313407,T -2Rb,2R,19346197,T -2Rb,2R,19354574,T -2Rb,2R,19395562,T -2Rb,2R,19428947,G -2Rb,2R,19430360,A -2Rb,2R,19441998,C -2Rb,2R,19500901,G -2Rb,2R,19510197,G -2Rb,2R,19512215,A -2Rb,2R,19546906,C -2Rb,2R,19557970,G -2Rb,2R,19600137,A -2Rb,2R,19630965,A -2Rb,2R,19642808,T -2Rb,2R,19675681,T -2Rb,2R,19678673,A -2Rb,2R,19680573,T -2Rb,2R,19695497,A -2Rb,2R,19708903,G -2Rb,2R,19708907,A -2Rb,2R,19709069,A -2Rb,2R,19716810,A -2Rb,2R,19718899,T -2Rb,2R,19733454,C -2Rb,2R,19748299,C -2Rb,2R,19748315,C -2Rb,2R,19761636,G -2Rb,2R,19773853,A -2Rb,2R,19787732,C -2Rb,2R,19788580,A -2Rb,2R,19806440,C -2Rb,2R,19859913,T -2Rb,2R,19891330,G -2Rb,2R,19897475,C -2Rb,2R,19928374,C -2Rb,2R,19928406,G -2Rb,2R,19931841,T -2Rb,2R,20000276,T -2Rb,2R,20000282,A -2Rb,2R,20009251,A -2Rb,2R,20014607,T -2Rb,2R,20050807,T -2Rb,2R,20051494,T -2Rb,2R,20051798,G -2Rb,2R,20053165,C -2Rb,2R,20054544,T -2Rb,2R,20068232,T -2Rb,2R,20095881,T -2Rb,2R,20096045,G -2Rb,2R,20096047,T -2Rb,2R,20123780,T -2Rb,2R,20134074,C -2Rb,2R,20136036,C -2Rb,2R,20136063,T -2Rb,2R,20136707,T -2Rb,2R,20147084,G -2Rb,2R,20161426,T -2Rb,2R,20175599,C -2Rb,2R,20185020,A -2Rb,2R,20185144,G -2Rb,2R,20185302,T -2Rb,2R,20185328,T -2Rb,2R,20201354,C -2Rb,2R,20221655,T -2Rb,2R,20229928,A -2Rb,2R,20247651,A -2Rb,2R,20250313,C -2Rb,2R,20252458,G -2Rb,2R,20297673,T -2Rb,2R,20300958,T -2Rb,2R,20301292,A -2Rb,2R,20304196,A -2Rb,2R,20304198,G -2Rb,2R,20314957,A -2Rb,2R,20318594,A -2Rb,2R,20320344,A -2Rb,2R,20320419,T -2Rb,2R,20344667,T -2Rb,2R,20347477,C -2Rb,2R,20410944,G -2Rb,2R,20413335,A -2Rb,2R,20414164,T -2Rb,2R,20414414,A -2Rb,2R,20443158,T -2Rb,2R,20475100,T -2Rb,2R,20525569,C -2Rb,2R,20529675,A -2Rb,2R,20687083,G -2Rb,2R,20713750,G -2Rb,2R,20722346,T -2Rb,2R,20744840,T -2Rb,2R,20785356,T -2Rb,2R,20785392,C -2Rb,2R,20786209,C -2Rb,2R,20789693,G -2Rb,2R,20789801,C -2Rb,2R,20789875,G -2Rb,2R,20789897,A -2Rb,2R,20790398,T -2Rb,2R,20791160,A -2Rb,2R,20830388,A -2Rb,2R,20839693,T -2Rb,2R,20848467,T -2Rb,2R,21080875,G -2Rb,2R,21098199,A -2Rb,2R,21294773,G -2Rb,2R,21429408,A -2Rb,2R,21430669,T -2Rb,2R,21444128,G -2Rb,2R,21449656,G -2Rb,2R,21449883,T -2Rb,2R,21450385,C -2Rb,2R,21453637,A -2Rb,2R,21461524,C -2Rb,2R,21466797,C -2Rb,2R,21469908,T -2Rb,2R,21471059,C -2Rb,2R,21475316,T -2Rb,2R,21541904,T -2Rb,2R,21595055,T -2Rb,2R,21618832,T -2Rb,2R,21628301,A -2Rb,2R,21696926,T -2Rb,2R,21734230,C -2Rb,2R,21773847,C -2Rb,2R,21793906,A -2Rb,2R,21818599,A -2Rb,2R,21897929,T -2Rb,2R,21978986,C -2Rb,2R,22057950,G -2Rb,2R,22088446,A -2Rb,2R,22108731,G -2Rb,2R,22188409,T -2Rb,2R,22203733,T -2Rb,2R,22206425,C -2Rb,2R,22209028,T -2Rb,2R,22219701,A -2Rb,2R,22257522,A -2Rb,2R,22261646,A -2Rb,2R,22276262,A -2Rb,2R,22276276,G -2Rb,2R,22292829,T -2Rb,2R,22293586,C -2Rb,2R,22293627,G -2Rb,2R,22293683,T -2Rb,2R,22293684,T -2Rb,2R,22294679,G -2Rb,2R,22312989,G -2Rb,2R,22313000,G -2Rb,2R,22313024,T -2Rb,2R,22385665,T -2Rb,2R,22738212,T -2Rb,2R,22866119,T -2Rb,2R,22866571,T -2Rb,2R,22919684,G -2Rb,2R,22924518,A -2Rb,2R,22924520,G -2Rb,2R,22941194,T -2Rb,2R,22942292,C -2Rb,2R,22945861,G -2Rb,2R,22946578,T -2Rb,2R,22946606,A -2Rb,2R,22948653,G -2Rb,2R,22950446,G -2Rb,2R,22985791,C -2Rb,2R,23068941,C -2Rb,2R,23072690,T -2Rb,2R,23160832,C -2Rb,2R,23162089,T -2Rb,2R,23168078,T -2Rb,2R,23168254,T -2Rb,2R,23168258,A -2Rb,2R,23170542,T -2Rb,2R,23170605,G -2Rb,2R,23170622,A -2Rb,2R,23174492,C -2Rb,2R,23181995,T -2Rb,2R,23187985,A -2Rb,2R,23204059,A -2Rb,2R,23222571,A -2Rb,2R,23248539,A -2Rb,2R,23308853,C -2Rb,2R,23324343,T -2Rb,2R,23342730,A -2Rb,2R,23354421,C -2Rb,2R,23366786,T -2Rb,2R,23427644,T -2Rb,2R,23445468,A -2Rb,2R,23455044,A -2Rb,2R,23456111,T -2Rb,2R,23549539,G -2Rb,2R,23657633,C -2Rb,2R,23675792,C -2Rb,2R,23678193,A -2Rb,2R,23725576,T -2Rb,2R,23726725,C -2Rb,2R,23761051,C -2Rb,2R,23798430,C -2Rb,2R,23867863,T -2Rb,2R,23997303,T -2Rb,2R,24046055,G -2Rb,2R,24057482,A -2Rb,2R,24173177,G -2Rb,2R,24209686,T -2Rb,2R,24273901,T -2Rb,2R,24346823,A -2Rb,2R,24352369,T -2Rb,2R,24353184,A -2Rb,2R,24361160,T -2Rb,2R,24404091,C -2Rb,2R,24404180,C -2Rb,2R,24428390,G -2Rb,2R,24439238,C -2Rb,2R,24464134,C -2Rb,2R,24473402,A -2Rb,2R,24482453,G -2Rb,2R,24515932,A -2Rb,2R,24516400,C -2Rb,2R,24516401,A -2Rb,2R,24520143,C -2Rb,2R,24523993,T -2Rb,2R,24535885,G -2Rb,2R,24540615,A -2Rb,2R,24544647,T -2Rb,2R,24544657,T -2Rb,2R,24574829,A -2Rb,2R,24574834,C -2Rb,2R,24574838,A -2Rb,2R,24578547,A -2Rb,2R,24629593,C -2Rb,2R,24638126,G -2Rb,2R,24640954,G -2Rb,2R,24692714,T -2Rb,2R,24750302,A -2Rb,2R,24758778,T -2Rb,2R,24771588,T -2Rb,2R,24792625,G -2Rb,2R,24819958,G -2Rb,2R,24859207,G -2Rb,2R,25033805,G -2Rb,2R,25052588,G -2Rb,2R,25091537,G -2Rb,2R,25107397,T -2Rb,2R,25107398,C -2Rb,2R,25107416,T -2Rb,2R,25108615,T -2Rb,2R,25130459,T -2Rb,2R,25259589,T -2Rb,2R,25423714,A -2Rb,2R,25423784,A -2Rb,2R,25424046,T -2Rb,2R,25456112,A -2Rb,2R,25464269,G -2Rb,2R,25520570,A -2Rb,2R,25542367,G -2Rb,2R,25559972,C -2Rb,2R,25562481,A -2Rb,2R,25566401,T -2Rb,2R,25575980,C -2Rb,2R,25581009,T -2Rb,2R,25708219,C -2Rb,2R,25718301,C -2Rb,2R,25719127,G -2Rb,2R,25737080,G -2Rb,2R,25741671,T -2Rb,2R,25790826,T -2Rb,2R,25856474,C -2Rb,2R,25860400,T -2Rb,2R,25876512,C -2Rb,2R,25890942,T -2Rb,2R,25890945,T -2Rb,2R,25890947,T -2Rb,2R,25897207,G -2Rb,2R,25903671,G -2Rb,2R,25911312,G -2Rb,2R,25911315,A -2Rb,2R,25924943,G -2Rb,2R,25945146,G -2Rb,2R,25945188,T -2Rb,2R,25961467,G -2Rb,2R,25962183,C -2Rb,2R,25964441,T -2Rb,2R,25964666,G -2Rb,2R,25964790,A -2Rb,2R,25965182,A -2Rb,2R,25967231,T -2Rb,2R,25982702,G -2Rb,2R,25984234,A -2Rb,2R,25984350,T -2Rb,2R,26008997,G -2Rb,2R,26178750,C -2Rb,2R,26193996,G -2Rb,2R,26194005,T -2Rb,2R,26228513,C -2Rb,2R,26229650,G -2Rb,2R,26229905,T -2Rb,2R,26248614,G -2Rb,2R,26254834,G -2Rb,2R,26259109,A -2Rb,2R,26259123,A -2Rb,2R,26371581,A -2Rb,2R,26380901,T -2Rb,2R,26414602,G -2Rb,2R,26427790,G -2Rb,2R,26446866,A -2Rb,2R,26458861,T -2Rb,2R,26472378,T -2Rb,2R,26658466,T -2Rb,2R,26660389,G -2Rb,2R,26694130,C -2Rb,2R,26743467,A -2Rb,2R,26746643,C -2Rb,2R,26746644,A -2Rb,2R,26746649,T -2Rb,2R,26746735,C -2Rb,2R,26746778,C -2Rd,2R,31523562,T -2Rd,2R,31610485,C -2Rd,2R,31614850,G -2Rd,2R,31614874,T -2Rd,2R,31615285,T -2Rd,2R,31616748,A -2Rd,2R,31629231,G -2Rd,2R,31629262,T -2Rd,2R,31639025,T -2Rd,2R,31653553,T -2Rd,2R,31684997,T -2Rd,2R,32860895,C -2Rd,2R,32862480,C -2Rd,2R,32882145,T -2Rd,2R,33993155,G -2Rd,2R,35034708,C -2Rd,2R,35093874,C -2Rd,2R,35094016,T -2Rd,2R,35095675,C -2Rd,2R,35095821,C -2Rd,2R,35095993,C -2Rd,2R,35097594,T -2Rd,2R,35098106,G -2Rd,2R,35098247,C -2Rd,2R,35098298,T -2Rd,2R,35098532,T -2Rd,2R,35098795,T -2Rd,2R,35102397,G -2Rd,2R,35109422,T -2Rd,2R,35680047,T -2Rd,2R,35715586,G -2Rd,2R,35715596,A -2Rd,2R,35716380,T -2Rd,2R,35716630,A -2Rd,2R,35718113,T -2Rd,2R,35718615,C -2Rd,2R,35718894,C -2Rd,2R,35729889,T -2Rd,2R,35768318,A -2Rd,2R,35769869,G -2Rd,2R,35812652,A -2Rd,2R,35926470,G -2Rd,2R,35926622,T -2Rd,2R,35927871,T -2Rd,2R,35931331,A -2Rd,2R,36194326,T -2Rd,2R,36195407,C -2Rd,2R,36196095,T -2Rd,2R,36196296,A -2Rd,2R,36200109,C -2Rd,2R,36201603,A -2Rd,2R,36207148,T -2Rd,2R,36219896,T -2Rd,2R,36219925,T -2Rd,2R,36220196,T -2Rd,2R,36220209,A -2Rd,2R,36220633,T -2Rd,2R,36221088,C -2Rd,2R,36222995,C -2Rd,2R,36225222,T -2Rd,2R,36235006,G -2Rd,2R,36235759,A -2Rd,2R,36275039,T -2Rd,2R,36275710,T -2Rd,2R,36275729,T -2Rd,2R,36275752,T -2Rd,2R,36280260,G -2Rd,2R,36280542,T -2Rd,2R,36281224,T -2Rd,2R,36317242,G -2Rd,2R,36451715,T -2Rd,2R,36460867,T -2Rd,2R,36460940,A -2Rd,2R,36462391,T -2Rd,2R,36462456,A -2Rd,2R,36463427,T -2Rd,2R,36463593,C -2Rd,2R,36467028,G -2Rd,2R,36571526,A -2Rd,2R,36593441,A -2Rd,2R,36637286,T -2Rd,2R,36656217,C -2Rd,2R,36675489,A -2Rd,2R,36676536,A -2Rd,2R,36682047,T -2Rd,2R,36682204,G -2Rd,2R,36682492,T -2Rd,2R,36682513,T -2Rd,2R,36682566,C -2Rd,2R,36684127,T -2Rd,2R,36716395,T -2Rd,2R,36716481,A -2Rd,2R,36717184,C -2Rd,2R,36717266,A -2Rd,2R,36717274,T -2Rd,2R,36717402,T -2Rd,2R,36717438,C -2Rd,2R,36717441,A -2Rd,2R,36718158,G -2Rd,2R,36718492,A -2Rd,2R,36719706,A -2Rd,2R,36720512,T -2Rd,2R,36721258,A -2Rd,2R,36916171,G -2Rd,2R,36926852,T -2Rd,2R,36926889,T -2Rd,2R,36928881,T -2Rd,2R,36928948,T -2Rd,2R,36948912,T -2Rd,2R,36957796,T -2Rd,2R,36993412,A -2Rd,2R,37347155,T -2Rd,2R,37348929,A -2Rd,2R,37350350,T -2Rd,2R,37350654,G -2Rd,2R,37351250,A -2Rd,2R,37686680,C -2Rd,2R,37697668,T -2Rd,2R,37894422,G -2Rd,2R,38275741,A -2Rd,2R,38612980,T -2Rd,2R,38812337,A -2Rd,2R,38812537,C -2Rd,2R,38812664,G -2Rd,2R,38813556,A -2Rd,2R,38813559,C -2Rd,2R,39528753,C -2Rd,2R,39588534,C -2Rd,2R,39799497,G -2Rd,2R,39968896,C -2Rd,2R,39971868,A -2Rd,2R,39981770,C -2Rd,2R,41349565,A -2Rd,2R,41451632,A -2Rd,2R,42164430,T -2Rd,2R,42168396,A -2Rd,2R,42204119,A -2Rd,2R,42220746,C -2Rd,2R,42225360,A -2Rd,2R,42230996,C -2Rd,2R,42235046,C -2Rd,2R,42252030,C -2Rd,2R,42253040,A -2Rd,2R,42278696,T -2Rd,2R,42289566,T -2Rd,2R,42299927,A -2Rd,2R,42373269,T -2Rc_gam,2R,28454105,G -2Rc_gam,2R,28978689,G -2Rc_gam,2R,28978698,T -2Rc_gam,2R,29305153,T -2Rc_gam,2R,29305276,T -2Rc_gam,2R,29305383,T -2Rc_gam,2R,29306134,T -2Rc_gam,2R,29306197,T -2Rc_gam,2R,29307769,T -2Rc_gam,2R,29307919,C -2Rc_gam,2R,29309864,T -2Rc_gam,2R,29310166,C -2Rc_gam,2R,29310191,G -2Rc_gam,2R,29310201,C -2Rc_gam,2R,29310253,C -2Rc_gam,2R,29310296,A -2Rc_gam,2R,29310336,T -2Rc_gam,2R,29314785,T -2Rc_gam,2R,29384371,G -2Rc_gam,2R,30328642,T -2Rc_gam,2R,30476017,A -2Rc_gam,2R,30481345,C -2Rc_gam,2R,30714057,A -2Rc_gam,2R,30714060,G -2Rc_gam,2R,30724210,A -2Rc_gam,2R,30725625,A -2Rc_gam,2R,30897767,G -2Rc_gam,2R,30897771,A -2Rc_gam,2R,30897774,A -2Rc_gam,2R,30897775,G -2Rc_gam,2R,30967698,T -2Rc_gam,2R,31074641,T -2Rc_gam,2R,31091052,C -2Rc_gam,2R,31092050,A -2Rc_gam,2R,31093974,T -2Rc_gam,2R,31093987,T -2Rc_gam,2R,31121606,C -2Rc_gam,2R,31126113,A -2Rc_gam,2R,31126453,C -2Rc_gam,2R,31127706,T -2Rc_gam,2R,31147129,C -2Rc_gam,2R,31147132,A -2Rc_gam,2R,31150175,T -2Rc_gam,2R,31152120,G -2Rc_gam,2R,31152351,C -2Rc_gam,2R,31172511,C -2Rc_gam,2R,31202061,T -2Rc_gam,2R,31202064,T -2Rc_gam,2R,31202076,T -2Rc_col,2R,27007651,T -2Rc_col,2R,27225808,T -2Rc_col,2R,27250183,T -2Rc_col,2R,27254753,A -2Rc_col,2R,27280531,A -2Rc_col,2R,27280581,C -2Rc_col,2R,27280752,T -2Rc_col,2R,27281084,C -2Rc_col,2R,27282099,A -2Rc_col,2R,27282330,T -2Rc_col,2R,27283425,A -2Rc_col,2R,27725280,G -2Rc_col,2R,27740327,T -2Rc_col,2R,27740592,A -2Rc_col,2R,28017129,A -2Rc_col,2R,28017133,A -2Rc_col,2R,28104728,C -2Rc_col,2R,28104970,C -2Rc_col,2R,28113792,G -2Rc_col,2R,28121178,T -2Rc_col,2R,28122009,C -2Rc_col,2R,28171113,C -2Rc_col,2R,28434972,T -2Rc_col,2R,28468246,A -2Rc_col,2R,28484341,T -2Rc_col,2R,28484350,C -2Rc_col,2R,28776715,T -2Rc_col,2R,28805094,T -2Rc_col,2R,28814945,A -2Rc_col,2R,29026558,C -2Rc_col,2R,29084286,A -2Rc_col,2R,29087279,C -2Rc_col,2R,29134005,G -2Rc_col,2R,29175698,C -2Rc_col,2R,29194375,A -2Rc_col,2R,29268921,A -2Rc_col,2R,29269103,A -2Rc_col,2R,29269324,G -2Rc_col,2R,29269352,C -2Rc_col,2R,29322912,G -2Rc_col,2R,29393543,G -2Rc_col,2R,29393579,G -2Rc_col,2R,29393593,A -2Rc_col,2R,29409159,A -2Rc_col,2R,29415920,G -2Rc_col,2R,29417833,C -2Rc_col,2R,29517224,G -2Rc_col,2R,29517240,C -2Rc_col,2R,29519609,G -2Rc_col,2R,29787632,C -2Rc_col,2R,30355430,G -2Rc_col,2R,30386671,A -2Rc_col,2R,30392408,G -2Rc_col,2R,30523061,T -2Rc_col,2R,31231129,A -2Rc_col,2R,31431356,T -2Rc_col,2R,31463934,T -2Rj,2R,3264090,A -2Rj,2R,3264102,A -2Rj,2R,3264504,A -2Rj,2R,3264955,A -2Rj,2R,3265032,T -2Rj,2R,3265382,G -2Rj,2R,3265629,G -2Rj,2R,3265668,T -2Rj,2R,3267404,C -2Rj,2R,3267929,G -2Rj,2R,3268001,A -2Rj,2R,3268739,T -2Rj,2R,3269237,A -2Rj,2R,3270801,A -2Rj,2R,3274227,T -2Rj,2R,3274229,G -2Rj,2R,3274806,C -2Rj,2R,3275342,G -2Rj,2R,3275588,T -2Rj,2R,3276108,A -2Rj,2R,3276813,C -2Rj,2R,3277024,T -2Rj,2R,3277356,A -2Rj,2R,3277374,A -2Rj,2R,3278069,T -2Rj,2R,3278288,A -2Rj,2R,3278637,A -2Rj,2R,3280216,T -2Rj,2R,3280413,T -2Rj,2R,3281412,A -2Rj,2R,3282767,A -2Rj,2R,3283741,C -2Rj,2R,3283744,T -2Rj,2R,3288528,T -2Rj,2R,3288561,C -2Rj,2R,3299675,A -2Rj,2R,3300002,T -2Rj,2R,3300788,T -2Rj,2R,3301197,A -2Rj,2R,3301739,A -2Rj,2R,3302459,A -2Rj,2R,3302603,A -2Rj,2R,3302698,G -2Rj,2R,3303615,T -2Rj,2R,3303715,T -2Rj,2R,3304231,T -2Rj,2R,3304469,C -2Rj,2R,3306289,C -2Rj,2R,3306290,A -2Rj,2R,3307638,A -2Rj,2R,3308920,T -2Rj,2R,3325788,A -2Rj,2R,3341654,C -2Rj,2R,10916292,C -2Rj,2R,11234277,T -2Rj,2R,11234287,C -2Rj,2R,11234497,C -2Rj,2R,13585215,T -2Rj,2R,13598254,A -2Rj,2R,15555216,A -2Rj,2R,15556093,G -2Rj,2R,15705549,T -2Rj,2R,15706857,T -2Rj,2R,15708973,T -2Rj,2R,15710203,T -2Rj,2R,15711499,T -2Rj,2R,15711596,T -2Rj,2R,15712475,T -2Rj,2R,15712693,A -2Rj,2R,15712957,T -2Rj,2R,15713100,A -2Rj,2R,15714321,A -2Rj,2R,15716132,T -2Rj,2R,15716428,G -2Rj,2R,15718542,T -2Rj,2R,15719084,A -2Rj,2R,15720433,T -2Rj,2R,15732334,C -2Rj,2R,15732338,T -2Rj,2R,15740463,T -2Rj,2R,15740657,A -2Rj,2R,15740931,C -2Rj,2R,15740978,T -2Rj,2R,15741314,G -2Rj,2R,15742834,T -2Rj,2R,15743161,A -2Rj,2R,15743203,A -2Rj,2R,15743636,G -2Rj,2R,15745595,A -2Rj,2R,15746439,A -2Rj,2R,15747295,G -2Rj,2R,15747386,C -2Rj,2R,15747519,G -2Rj,2R,15747695,T -2Rj,2R,15748347,G -2Rj,2R,15748971,A -2Rj,2R,15749248,A -2Rj,2R,15749285,T -2Rj,2R,15750449,T -2Ru,2R,31516240,T -2Ru,2R,31516340,C -2Ru,2R,31516795,C -2Ru,2R,31517127,A -2Ru,2R,31517528,T -2Ru,2R,31517733,G -2Ru,2R,31517858,G -2Ru,2R,31518159,C -2Ru,2R,31518839,A -2Ru,2R,31523460,G -2Ru,2R,31553046,A -2Ru,2R,31557927,C -2Ru,2R,31558044,T -2Ru,2R,31558111,T -2Ru,2R,31558469,T -2Ru,2R,31560220,A -2Ru,2R,31560735,T -2Ru,2R,31563611,C -2Ru,2R,31567669,T -2Ru,2R,31568447,T -2Ru,2R,31694267,A -2Ru,2R,31703855,G -2Ru,2R,31705832,G -2Ru,2R,31706650,T -2Ru,2R,31710083,C -2Ru,2R,31710302,C -2Ru,2R,31710303,T -2Ru,2R,31710305,A -2Ru,2R,31710369,G -2Ru,2R,31710915,T -2Ru,2R,31711195,C -2Ru,2R,31713353,T -2Ru,2R,31715733,T -2Ru,2R,31719403,T -2Ru,2R,31756006,T -2Ru,2R,31757751,T -2Ru,2R,31761812,T -2Ru,2R,31761967,G -2Ru,2R,31762081,T -2Ru,2R,31881417,A -2Ru,2R,31903706,G -2Ru,2R,31914348,A -2Ru,2R,31936521,C -2Ru,2R,31936715,T -2Ru,2R,31937094,A -2Ru,2R,31941065,A -2Ru,2R,31942143,C -2Ru,2R,32021333,G -2Ru,2R,32021855,A -2Ru,2R,32022565,A -2Ru,2R,32022571,C -2Ru,2R,32023835,C -2Ru,2R,32024114,T -2Ru,2R,32035375,T -2Ru,2R,32036381,G -2Ru,2R,32069754,G -2Ru,2R,32069796,C -2Ru,2R,32085587,G -2Ru,2R,32186487,G -2Ru,2R,32186515,C -2Ru,2R,32187926,A -2Ru,2R,32259877,A -2Ru,2R,32334045,A -2Ru,2R,32345688,T -2Ru,2R,32389180,T -2Ru,2R,32389955,T -2Ru,2R,32391064,A -2Ru,2R,32391115,A -2Ru,2R,32454642,A -2Ru,2R,32455088,C -2Ru,2R,32455416,C -2Ru,2R,32455477,A -2Ru,2R,32456078,C -2Ru,2R,32456084,C -2Ru,2R,32456100,G -2Ru,2R,32457600,A -2Ru,2R,32578054,G -2Ru,2R,32667533,T -2Ru,2R,32683852,T -2Ru,2R,32783950,T -2Ru,2R,32837417,G -2Ru,2R,32864046,A -2Ru,2R,32867333,C -2Ru,2R,32868032,A -2Ru,2R,32874840,G -2Ru,2R,32875403,T -2Ru,2R,32875417,A -2Ru,2R,32969453,A -2Ru,2R,32970067,T -2Ru,2R,32970693,C -2Ru,2R,32970938,C -2Ru,2R,32992157,G -2Ru,2R,33219359,T -2Ru,2R,33244517,C -2Ru,2R,33263900,C -2Ru,2R,33265687,A -2Ru,2R,33265726,G -2Ru,2R,33266311,G -2Ru,2R,33318408,C -2Ru,2R,33322045,C -2Ru,2R,33340915,A -2Ru,2R,33430169,T -2Ru,2R,33430227,A -2Ru,2R,33432734,T -2Ru,2R,33432741,C -2Ru,2R,33530545,A -2Ru,2R,33530587,A -2Ru,2R,33531272,A -2Ru,2R,33531440,C -2Ru,2R,33531465,C -2Ru,2R,33531510,G -2Ru,2R,33531524,A -2Ru,2R,33605881,G -2Ru,2R,33613826,A -2Ru,2R,33640613,A -2Ru,2R,33697365,C -2Ru,2R,33776367,A -2Ru,2R,33834025,A -2Ru,2R,34072431,A -2Ru,2R,34123399,C -2Ru,2R,34123484,T -2Ru,2R,34124219,T -2Ru,2R,34124287,T -2Ru,2R,34125748,A -2Ru,2R,34128662,T -2Ru,2R,34132663,A -2Ru,2R,34138577,T -2Ru,2R,34138591,T -2Ru,2R,34142017,A -2Ru,2R,34147633,T -2Ru,2R,34175319,T -2Ru,2R,34332338,A -2Ru,2R,34343157,C -2Ru,2R,34343989,T -2Ru,2R,34347394,T -2Ru,2R,34350693,T -2Ru,2R,34352080,G -2Ru,2R,34358302,T -2Ru,2R,34397401,C -2Ru,2R,34420116,A -2Ru,2R,34420126,A -2Ru,2R,34422482,C -2Ru,2R,34424106,C -2Ru,2R,34429402,G -2Ru,2R,34432021,A -2Ru,2R,34432086,C -2Ru,2R,34433444,A -2Ru,2R,34434289,A -2Ru,2R,34451733,T -2Ru,2R,34697268,T -2Ru,2R,34702912,T -2Ru,2R,34702931,T -2Ru,2R,34739085,A -2Ru,2R,34739416,C -2Ru,2R,34739767,G -2Ru,2R,34740162,T -2Ru,2R,34740307,A -2Ru,2R,34741288,A -2Ru,2R,34870982,T -2Ru,2R,34888110,A -2Ru,2R,34907054,A -2Ru,2R,35236293,A -2Ru,2R,35271668,G -2Ru,2R,35297836,A -2Ru,2R,35305761,G -2Ru,2R,35355667,C -2Ru,2R,35355964,G -2Ru,2R,35373216,C -2Ru,2R,35373702,T -2Ru,2R,35373966,T -2Ru,2R,35404397,C -2Ru,2R,35408302,T -2Ru,2R,35488437,G -2Ru,2R,35488795,A -2Ru,2R,35489077,G -2Ru,2R,35496370,A -2Ru,2R,35498331,G diff --git a/tests/anoph/conftest.py b/tests/anoph/conftest.py index f94768ad3..165c2755c 100644 --- a/tests/anoph/conftest.py +++ b/tests/anoph/conftest.py @@ -1030,6 +1030,7 @@ def __init__( self.init_genome_features() self.init_metadata() self.init_snp_sites() + self.init_karyotype_tags() self.init_site_filters() self.init_snp_genotypes() self.init_site_annotations() @@ -1094,6 +1095,9 @@ def init_metadata(self): def init_snp_sites(self): pass + def init_karyotype_tags(self): + pass + def init_site_filters(self): pass @@ -1150,6 +1154,7 @@ def init_config(self): "SITE_ANNOTATIONS_ZARR_PATH": "reference/genome/agamp4/Anopheles-gambiae-PEST_SEQANNOTATION_AgamP4.12.zarr", "DEFAULT_AIM_ANALYSIS": "20220528", "DEFAULT_SITE_FILTERS_ANALYSIS": "dt_20200416", + "DEFAULT_KARYOTYPE_ANALYSIS": "simtest", "DEFAULT_COHORTS_ANALYSIS": "20230516", "SITE_MASK_IDS": ["gamb_colu_arab", "gamb_colu", "arab"], "PHASING_ANALYSIS_IDS": ["gamb_colu_arab", "gamb_colu", "arab"], @@ -1519,6 +1524,40 @@ def init_snp_sites(self): path=path, contigs=self.contigs, genome=self.genome ) + def init_karyotype_tags(self): + analysis = self.config["DEFAULT_KARYOTYPE_ANALYSIS"] + + # Generate tag SNP data using positions from simulated SNP sites. + # N.B., inversions are defined here with their contigs explicitly + # rather than derived via string slicing, for robustness. + tags = [] + for contig, inversion in [("2L", "2La"), ("2R", "2Rb")]: + snp_pos = self.snp_sites[contig]["variants"]["POS"][:] + snp_alt = self.snp_sites[contig]["variants"]["ALT"][:] + n_tags = min(20, len(snp_pos)) + indices = self.rng.choice(len(snp_pos), size=n_tags, replace=False) + indices.sort() + for idx in indices: + tags.append( + { + "inversion": inversion, + "contig": contig, + "position": int(snp_pos[idx]), + "alt_allele": snp_alt[idx][0].decode(), + } + ) + + df = pd.DataFrame(tags) + path = ( + self.bucket_path + / "v3" + / "snp_karyotype" + / analysis + / "karyotype_tag_snps.csv" + ) + path.parent.mkdir(parents=True, exist_ok=True) + df.to_csv(path, index=False) + def init_site_filters(self): analysis = self.config["DEFAULT_SITE_FILTERS_ANALYSIS"] diff --git a/tests/anoph/test_karyotype.py b/tests/anoph/test_karyotype.py new file mode 100644 index 000000000..a3b9e44a9 --- /dev/null +++ b/tests/anoph/test_karyotype.py @@ -0,0 +1,89 @@ +import pandas as pd +import pytest + +from malariagen_data import ag3 as _ag3 +from malariagen_data import af1 as _af1 +from malariagen_data.anoph.karyotype import AnophelesKaryotypeAnalysis + + +@pytest.fixture +def ag3_sim_api(ag3_sim_fixture): + return AnophelesKaryotypeAnalysis( + url=ag3_sim_fixture.url, + public_url=ag3_sim_fixture.url, + config_path=_ag3.CONFIG_PATH, + major_version_number=_ag3.MAJOR_VERSION_NUMBER, + major_version_path=_ag3.MAJOR_VERSION_PATH, + pre=True, + gff_gene_type="gene", + gff_gene_name_attribute="Name", + gff_default_attributes=("ID", "Parent", "Name", "description"), + ) + + +@pytest.fixture +def af1_sim_api(af1_sim_fixture): + return AnophelesKaryotypeAnalysis( + url=af1_sim_fixture.url, + public_url=af1_sim_fixture.url, + config_path=_af1.CONFIG_PATH, + major_version_number=_af1.MAJOR_VERSION_NUMBER, + major_version_path=_af1.MAJOR_VERSION_PATH, + pre=True, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + ) + + +def test_load_inversion_tags(ag3_sim_api): + df = ag3_sim_api.load_inversion_tags(inversion="2Rb") + assert isinstance(df, pd.DataFrame) + assert set(df.columns) >= {"inversion", "contig", "position", "alt_allele"} + assert (df["inversion"] == "2Rb").all() + assert (df["contig"] == "2R").all() + assert len(df) > 0 + + +def test_load_inversion_tags_2la(ag3_sim_api): + df = ag3_sim_api.load_inversion_tags(inversion="2La") + assert isinstance(df, pd.DataFrame) + assert (df["inversion"] == "2La").all() + assert (df["contig"] == "2L").all() + assert len(df) > 0 + + +def test_load_inversion_tags_invalid(ag3_sim_api): + with pytest.raises(ValueError, match="Unknown inversion"): + ag3_sim_api.load_inversion_tags(inversion="X_x") + + +def test_load_inversion_tags_not_implemented(af1_sim_api): + with pytest.raises(NotImplementedError): + af1_sim_api.load_inversion_tags(inversion="2La") + + +def test_karyotype(ag3_sim_api): + df = ag3_sim_api.karyotype(inversion="2Rb") + assert isinstance(df, pd.DataFrame) + expected_cols = { + "sample_id", + "inversion", + "karyotype_2Rb_mean", + "karyotype_2Rb", + "total_tag_snps", + } + assert set(df.columns) == expected_cols + assert (df["inversion"] == "2Rb").all() + assert all(df["karyotype_2Rb"].isin([0, 1, 2])) + assert all(df["karyotype_2Rb_mean"].between(0, 2)) + + +def test_karyotype_invalid_inversion(ag3_sim_api): + with pytest.raises(ValueError, match="Unknown inversion"): + ag3_sim_api.karyotype(inversion="X_x") + + +def test_karyotype_not_implemented(af1_sim_api): + with pytest.raises(NotImplementedError): + af1_sim_api.karyotype(inversion="2La")