Skip to content

Commit e1a37a1

Browse files
authored
Merge branch 'master' into fix/h12-multi-panel-param-forwarding
2 parents d53e3cd + 0e43cc0 commit e1a37a1

7 files changed

Lines changed: 352 additions & 126 deletions

File tree

malariagen_data/anoph/base.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,13 @@ def _sample_set_has_unrestricted_use(self, *, sample_set: str):
562562
release_manifest_df = self._read_sample_sets_manifest(
563563
single_release=sample_set_release
564564
)
565+
566+
if "unrestricted_use" not in release_manifest_df.columns:
567+
raise ValueError(
568+
f"Column 'unrestricted_use' missing from manifest for sample set '{sample_set}'. "
569+
"This indicates a data integrity issue in the release manifest."
570+
)
571+
565572
sample_set_records_srs = release_manifest_df.loc[
566573
release_manifest_df["sample_set"] == sample_set, "unrestricted_use"
567574
]
@@ -824,12 +831,19 @@ def lookup_study_info(self, sample_set: base_params.sample_set) -> dict:
824831
def lookup_terms_of_use_info(self, sample_set: base_params.sample_set) -> dict:
825832
if self._cache_sample_set_to_terms_of_use_info is None:
826833
df_sample_sets = self._available_sample_sets().set_index("sample_set")
834+
expected_cols = [
835+
"terms_of_use_expiry_date",
836+
"terms_of_use_url",
837+
"unrestricted_use",
838+
]
839+
missing_cols = [c for c in expected_cols if c not in df_sample_sets.columns]
840+
if missing_cols:
841+
raise ValueError(
842+
f"Terms-of-use columns missing from manifest: {missing_cols}. "
843+
"This indicates a data integrity issue in the release manifest."
844+
)
827845
self._cache_sample_set_to_terms_of_use_info = df_sample_sets[
828-
[
829-
"terms_of_use_expiry_date",
830-
"terms_of_use_url",
831-
"unrestricted_use",
832-
]
846+
expected_cols
833847
].to_dict(orient="index")
834848
try:
835849
return self._cache_sample_set_to_terms_of_use_info[sample_set]

malariagen_data/anoph/h12.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ def plot_h12_gwss_multi_overlay_track(
630630
)
631631

632632
# Plot H12.
633-
for i, (cohort_label, (x, h12, contig)) in enumerate(res.items()):
633+
for i, (cohort_label, (x, h12, contig_idx)) in enumerate(res.items()):
634634
fig.scatter(
635635
x=x,
636636
y=h12,

malariagen_data/anoph/snp_data.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,9 +1872,10 @@ def biallelic_snp_calls(
18721872

18731873
# Apply missingness condition.
18741874
if max_missing_an is not None:
1875-
an_missing = (ds_out.sizes["samples"] * ds_out.sizes["ploidy"]) - an
1875+
an_total = ds_out.sizes["samples"] * ds_out.sizes["ploidy"]
1876+
an_missing = an_total - an
18761877
if isinstance(max_missing_an, float):
1877-
an_missing_frac = an_missing / an
1878+
an_missing_frac = an_missing / an_total
18781879
loc_missing = an_missing_frac <= max_missing_an
18791880
else:
18801881
loc_missing = an_missing <= max_missing_an

malariagen_data/anopheles.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,7 @@ def cohort_diversity_stats(
10661066
cohort_label, cohort_query = cohort
10671067

10681068
else:
1069-
raise TypeError(r"invalid cohort parameter: {cohort!r}")
1069+
raise TypeError(f"invalid cohort parameter: {cohort!r}")
10701070

10711071
debug("access allele counts")
10721072
ac = self.snp_allele_counts(

0 commit comments

Comments
 (0)