Skip to content

Commit d125707

Browse files
committed
WIP: amend data types
1 parent 62a848e commit d125707

2 files changed

Lines changed: 13 additions & 11 deletions

File tree

malariagen_data/anoph/base.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,14 +405,14 @@ def _relevant_releases(self) -> Tuple[str, ...]:
405405

406406
if self._cache_releases is None:
407407
# Start a list of the relevant releases.
408-
relevant_releases = []
408+
relevant_releases = [] # type: List[str]
409409

410410
# Get the available releases, which depends on the `pre` setting.
411411
available_releases = self._available_releases
412412

413413
# If there are no criteria, then all available releases are relevant.
414414
if not self._unrestricted_use_only and not self._surveillance_use_only:
415-
relevant_releases = available_releases
415+
relevant_releases = list(available_releases)
416416

417417
elif self._unrestricted_use_only and not self._surveillance_use_only:
418418
# Get the releases with unrestricted data.
@@ -494,6 +494,9 @@ def client_location(self) -> str:
494494
location = "unknown"
495495
return location
496496

497+
def _surveillance_flags(self, sample_sets: List[str]):
498+
raise NotImplementedError("Subclasses must implement `_surveillance_flags`.")
499+
497500
def _release_has_unrestricted_data(self, *, release: str):
498501
"""Return `True` if the specified release has any unrestricted data. Otherwise return `False`."""
499502

@@ -526,7 +529,6 @@ def _release_has_surveillance_data(self, *, release: str):
526529
sample_sets = sample_sets_manifest_df["sample_set"].to_list()
527530

528531
# Determine whether any of the sample sets have surveillance data.
529-
# Note: rather than using `_surveillance_flags`, to avoid unnecessary processing, we only need to find one sample set.
530532
release_has_surveillance_data = False
531533
for sample_set in sample_sets:
532534
if self._sample_set_has_surveillance_data(sample_set=sample_set):

malariagen_data/anoph/sample_metadata.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,13 @@ def _parse_metadata_paths(
9090
self,
9191
path_template: str,
9292
parse_metadata_func: Callable[[str, Union[bytes, Exception]], pd.DataFrame],
93-
sample_sets: Optional[base_params.sample_sets] = None,
93+
sample_sets: List[str],
9494
aim_analysis: Optional[str] = None,
9595
cohorts_analysis: Optional[str] = None,
9696
) -> pd.DataFrame:
97-
# Warning: don't use `_prep_sample_sets_param` in this function because that can cause a circular dependency, eventually raising a RecursionError.
98-
# For instance, `_prep_sample_sets_param` uses `_relevant_sample_sets`, which uses `_surveillance_flags, which uses `_parse_metadata_paths`.
99-
# Instead, use `_prep_sample_sets_param` to prepare `sample_sets` before passing it to this function.
97+
# Note: we don't use `_prep_sample_sets_param` in this function because that can cause a circular dependency, eventually raising a `RecursionError`.
98+
# For instance, `_prep_sample_sets_param` uses `_relevant_sample_sets`, which uses `_surveillance_flags`, which uses `_parse_metadata_paths`.
99+
# Instead, use `_prep_sample_sets_param` to prepare `sample_sets` as a `List[str]` before passing it to this function.
100100

101101
# Obtain paths for all files we need to fetch.
102102
file_paths: Mapping[str, str] = self._metadata_paths(
@@ -410,10 +410,10 @@ def _parse_surveillance_flags(
410410
`is_surveillance` indicates whether the sample can be used for surveillance,
411411
""",
412412
)
413-
def _surveillance_flags(self, sample_sets: base_params.sample_sets) -> pd.DataFrame:
414-
# Warning: don't use `_prep_sample_sets_param` here, because `_prep_sample_sets_param` uses `_relevant_sample_sets`,
415-
# which uses this function, which would cause a RecursionError due to cyclic dependency.
416-
# Instead, prepare the `sample_sets` parameter before calling this function.
413+
def _surveillance_flags(self, sample_sets: List[str]) -> pd.DataFrame:
414+
# Note: we don't use `_prep_sample_sets_param` in this function because that can cause a circular dependency, eventually raising a `RecursionError`.
415+
# For instance, `_prep_sample_sets_param` uses `_relevant_sample_sets`, which uses `_surveillance_flags`.
416+
# Instead, use `_prep_sample_sets_param` to prepare `sample_sets` as a `List[str]` before passing it to this function.
417417

418418
return self._parse_metadata_paths(
419419
path_template="{release_path}/metadata/general/{sample_set}/surveillance.flags.csv",

0 commit comments

Comments
 (0)