Skip to content

Commit c659b69

Browse files
authored
Merge pull request #816 from malariagen/fix/phenotype-path-construction
fix(phenotypes): Correct GCS path construction
2 parents f591dba + 6dfba05 commit c659b69

1 file changed

Lines changed: 26 additions & 19 deletions

File tree

malariagen_data/anoph/phenotypes.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pandas as pd
22
import xarray as xr
3-
from typing import Callable, Optional, List, Any
3+
from typing import Callable, Optional, List, Any, TYPE_CHECKING
44
import warnings
55
import fsspec
66
from numpydoc_decorator import doc # type: ignore
@@ -15,14 +15,20 @@ class AnophelesPhenotypeData:
1515
Inherited by AnophelesDataResource subclasses (e.g., Ag3).
1616
"""
1717

18-
# Type annotations for MyPy
19-
_url: str
20-
_fs: fsspec.AbstractFileSystem
21-
sample_metadata: Callable[..., pd.DataFrame]
22-
sample_sets: list[str]
23-
_prep_sample_sets_param: Callable[..., Any]
24-
snp_calls: Callable[..., Any]
25-
haplotypes: Callable[..., Any]
18+
if TYPE_CHECKING:
19+
# Type annotations for MyPy
20+
_url: str
21+
_fs: fsspec.AbstractFileSystem
22+
sample_metadata: Callable[..., pd.DataFrame]
23+
_base_path: str
24+
_major_version_path: str
25+
_release_to_path: Callable[[str], str]
26+
lookup_release: Callable[..., str]
27+
_prep_sample_sets_param: Callable[..., Any]
28+
29+
sample_sets: Callable[..., pd.DataFrame]
30+
snp_calls: Callable[..., Any]
31+
haplotypes: Callable[..., Any]
2632

2733
def __init__(self, **kwargs):
2834
super().__init__(**kwargs)
@@ -35,11 +41,14 @@ def _load_phenotype_data(
3541
Load raw phenotypic data from GCS for given sample sets.
3642
"""
3743
phenotype_dfs = []
38-
base_phenotype_path = f"{self._url}v3.2/phenotypes/all"
3944

4045
for sample_set in sample_sets:
41-
phenotype_path = f"{base_phenotype_path}/{sample_set}/phenotypes.csv"
4246
try:
47+
release = self.lookup_release(sample_set=sample_set)
48+
release_path = self._release_to_path(release)
49+
50+
phenotype_path = f"{self._base_path}/{release_path}/phenotypes/all/{sample_set}/phenotypes.csv"
51+
4352
if not self._fs.exists(phenotype_path):
4453
warnings.warn(
4554
f"Phenotype data file not found for {sample_set} at {phenotype_path}"
@@ -61,14 +70,9 @@ def _load_phenotype_data(
6170
df_pheno["sample_set"] = sample_set
6271
phenotype_dfs.append(df_pheno)
6372

64-
except FileNotFoundError:
65-
warnings.warn(
66-
f"Phenotype data file not found for {sample_set} at {phenotype_path}"
67-
)
68-
continue
6973
except Exception as e:
7074
warnings.warn(
71-
f"Unexpected error loading phenotype data for {sample_set} from {phenotype_path}: {e}"
75+
f"Unexpected error loading phenotype data for {sample_set}: {e}"
7276
)
7377
continue
7478

@@ -480,11 +484,14 @@ def phenotype_sample_sets(self) -> List[str]:
480484

481485
all_sample_sets = self.sample_sets()["sample_set"].tolist() # type: ignore[operator]
482486
phenotype_sample_sets = []
483-
base_phenotype_path = f"{self._url}v3.2/phenotypes/all"
484487

485488
for sample_set in all_sample_sets:
486489
try:
487-
phenotype_path = f"{base_phenotype_path}/{sample_set}/phenotypes.csv"
490+
release = self.lookup_release(sample_set=sample_set)
491+
release_path = self._release_to_path(release)
492+
493+
phenotype_path = f"{self._base_path}/{release_path}/phenotypes/all/{sample_set}/phenotypes.csv"
494+
488495
if self._fs.exists(phenotype_path):
489496
phenotype_sample_sets.append(sample_set)
490497
except Exception:

0 commit comments

Comments
 (0)