11import pandas as pd
22import xarray as xr
3- from typing import Callable , Optional , List , Any
3+ from typing import Callable , Optional , List , Any , TYPE_CHECKING
44import warnings
55import fsspec
66from numpydoc_decorator import doc # type: ignore
@@ -15,14 +15,20 @@ class AnophelesPhenotypeData:
1515 Inherited by AnophelesDataResource subclasses (e.g., Ag3).
1616 """
1717
18- # Type annotations for MyPy
19- _url : str
20- _fs : fsspec .AbstractFileSystem
21- sample_metadata : Callable [..., pd .DataFrame ]
22- sample_sets : list [str ]
23- _prep_sample_sets_param : Callable [..., Any ]
24- snp_calls : Callable [..., Any ]
25- haplotypes : Callable [..., Any ]
18+ if TYPE_CHECKING :
19+ # Type annotations for MyPy
20+ _url : str
21+ _fs : fsspec .AbstractFileSystem
22+ sample_metadata : Callable [..., pd .DataFrame ]
23+ _base_path : str
24+ _major_version_path : str
25+ _release_to_path : Callable [[str ], str ]
26+ lookup_release : Callable [..., str ]
27+ _prep_sample_sets_param : Callable [..., Any ]
28+
29+ sample_sets : Callable [..., pd .DataFrame ]
30+ snp_calls : Callable [..., Any ]
31+ haplotypes : Callable [..., Any ]
2632
2733 def __init__ (self , ** kwargs ):
2834 super ().__init__ (** kwargs )
@@ -35,11 +41,14 @@ def _load_phenotype_data(
3541 Load raw phenotypic data from GCS for given sample sets.
3642 """
3743 phenotype_dfs = []
38- base_phenotype_path = f"{ self ._url } v3.2/phenotypes/all"
3944
4045 for sample_set in sample_sets :
41- phenotype_path = f"{ base_phenotype_path } /{ sample_set } /phenotypes.csv"
4246 try :
47+ release = self .lookup_release (sample_set = sample_set )
48+ release_path = self ._release_to_path (release )
49+
50+ phenotype_path = f"{ self ._base_path } /{ release_path } /phenotypes/all/{ sample_set } /phenotypes.csv"
51+
4352 if not self ._fs .exists (phenotype_path ):
4453 warnings .warn (
4554 f"Phenotype data file not found for { sample_set } at { phenotype_path } "
@@ -61,14 +70,9 @@ def _load_phenotype_data(
6170 df_pheno ["sample_set" ] = sample_set
6271 phenotype_dfs .append (df_pheno )
6372
64- except FileNotFoundError :
65- warnings .warn (
66- f"Phenotype data file not found for { sample_set } at { phenotype_path } "
67- )
68- continue
6973 except Exception as e :
7074 warnings .warn (
71- f"Unexpected error loading phenotype data for { sample_set } from { phenotype_path } : { e } "
75+ f"Unexpected error loading phenotype data for { sample_set } : { e } "
7276 )
7377 continue
7478
@@ -480,11 +484,14 @@ def phenotype_sample_sets(self) -> List[str]:
480484
481485 all_sample_sets = self .sample_sets ()["sample_set" ].tolist () # type: ignore[operator]
482486 phenotype_sample_sets = []
483- base_phenotype_path = f"{ self ._url } v3.2/phenotypes/all"
484487
485488 for sample_set in all_sample_sets :
486489 try :
487- phenotype_path = f"{ base_phenotype_path } /{ sample_set } /phenotypes.csv"
490+ release = self .lookup_release (sample_set = sample_set )
491+ release_path = self ._release_to_path (release )
492+
493+ phenotype_path = f"{ self ._base_path } /{ release_path } /phenotypes/all/{ sample_set } /phenotypes.csv"
494+
488495 if self ._fs .exists (phenotype_path ):
489496 phenotype_sample_sets .append (sample_set )
490497 except Exception :
0 commit comments