Skip to content

Commit 01e083a

Browse files
authored
Merge branch 'master' into issue-554-codecov-patch-threshold
2 parents b24f76e + 0e43cc0 commit 01e083a

3 files changed

Lines changed: 344 additions & 120 deletions

File tree

malariagen_data/anoph/base.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,13 @@ def _sample_set_has_unrestricted_use(self, *, sample_set: str):
562562
release_manifest_df = self._read_sample_sets_manifest(
563563
single_release=sample_set_release
564564
)
565+
566+
if "unrestricted_use" not in release_manifest_df.columns:
567+
raise ValueError(
568+
f"Column 'unrestricted_use' missing from manifest for sample set '{sample_set}'. "
569+
"This indicates a data integrity issue in the release manifest."
570+
)
571+
565572
sample_set_records_srs = release_manifest_df.loc[
566573
release_manifest_df["sample_set"] == sample_set, "unrestricted_use"
567574
]
@@ -824,12 +831,19 @@ def lookup_study_info(self, sample_set: base_params.sample_set) -> dict:
824831
def lookup_terms_of_use_info(self, sample_set: base_params.sample_set) -> dict:
825832
if self._cache_sample_set_to_terms_of_use_info is None:
826833
df_sample_sets = self._available_sample_sets().set_index("sample_set")
834+
expected_cols = [
835+
"terms_of_use_expiry_date",
836+
"terms_of_use_url",
837+
"unrestricted_use",
838+
]
839+
missing_cols = [c for c in expected_cols if c not in df_sample_sets.columns]
840+
if missing_cols:
841+
raise ValueError(
842+
f"Terms-of-use columns missing from manifest: {missing_cols}. "
843+
"This indicates a data integrity issue in the release manifest."
844+
)
827845
self._cache_sample_set_to_terms_of_use_info = df_sample_sets[
828-
[
829-
"terms_of_use_expiry_date",
830-
"terms_of_use_url",
831-
"unrestricted_use",
832-
]
846+
expected_cols
833847
].to_dict(orient="index")
834848
try:
835849
return self._cache_sample_set_to_terms_of_use_info[sample_set]

0 commit comments

Comments
 (0)