Skip to content

Commit bb93545

Browse files
authored
Merge pull request #1260 from khushthecoder/fix/replace-asserts-with-runtime-validation
Fix/replace asserts with runtime validation
2 parents eb5bdaf + 57df6e3 commit bb93545

File tree

14 files changed

+276
-60
lines changed

14 files changed

+276
-60
lines changed

malariagen_data/anoph/aim_data.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,24 @@ def plot_aim_heatmap(
254254
gn = np.take(gn, ix_sorted, axis=1)
255255
samples = np.take(samples, ix_sorted, axis=0)
256256

257+
species = aims.split("_vs_")
258+
257259
# Set up colors for genotypes
258260
if palette is None:
259-
assert self._aim_palettes is not None
261+
if self._aim_palettes is None:
262+
raise RuntimeError(
263+
"AIM palettes are not available for this data resource. "
264+
"Please provide the 'palette' parameter explicitly (4 colors)."
265+
)
260266
palette = self._aim_palettes[aims]
261-
assert len(palette) == 4
267+
if len(palette) != 4:
268+
raise RuntimeError(
269+
"Expected AIM palette to have 4 colors "
270+
f"(missing, {species[0]}/{species[0]}, {species[0]}/{species[1]}, {species[1]}/{species[1]}), "
271+
f"got {len(palette)}"
272+
)
262273
# Expect 4 colors, in the order:
263274
# missing, hom taxon 1, het, hom taxon 2
264-
species = aims.split("_vs_")
265275

266276
# Create subplots.
267277
fig = go_make_subplots(

malariagen_data/anoph/cnv_frq.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,11 @@ def _gene_cnv_frequencies_advanced(
597597
if nobs_mode == "called":
598598
nobs[:, cohort_index] = np.repeat(cohort_n_called, 2)
599599
else:
600-
assert nobs_mode == "fixed"
600+
if nobs_mode != "fixed":
601+
raise RuntimeError(
602+
f"Internal error: expected nobs_mode='fixed', got {nobs_mode!r}. "
603+
"This should not happen; please open a GitHub issue."
604+
)
601605
nobs[:, cohort_index] = cohort.size
602606

603607
debug("compute frequency")

malariagen_data/anoph/frq_base.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -417,14 +417,6 @@ def plot_frequencies_heatmap(
417417
`aa_allele_frequencies_advanced()` or
418418
`gene_cnv_frequencies_advanced()`.
419419
""",
420-
taxa="""
421-
Taxon or list of taxa to include in the plot. If None,
422-
all taxa are shown.
423-
""",
424-
areas="""
425-
Area or list of areas to include in the plot. If None,
426-
all areas are shown.
427-
""",
428420
kwargs="Passed through to `px.line()`.",
429421
),
430422
returns="""

malariagen_data/anoph/genome_features.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,11 @@ def _genome_features_for_contig(self, *, contig: str, attributes: Tuple[str, ...
110110

111111
# Handle normal contigs in the reference genome.
112112
else:
113-
assert contig in self.contigs
113+
if contig not in self.contigs:
114+
raise ValueError(
115+
f"Contig {contig!r} not found. "
116+
f"Available contigs: {self.contigs}"
117+
)
114118
df = self._genome_features(attributes=attributes)
115119

116120
# Apply contig query.
@@ -561,7 +565,8 @@ def plot_genes(
561565

562566
# Increase the figure height by a certain factor, to accommodate labels.
563567
height_increase_factor = 1.3
564-
assert fig.height is not None
568+
if fig.height is None:
569+
raise RuntimeError("Figure height is unexpectedly None")
565570
fig.height = int(fig.height * height_increase_factor)
566571

567572
# Get the original y_range.

malariagen_data/anoph/genome_sequence.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ def _genome_sequence_for_contig(self, *, contig, inline_array, chunks):
8686

8787
# Handle normal contigs in the reference genome.
8888
else:
89-
assert contig in self.contigs
89+
if contig not in self.contigs:
90+
raise ValueError(
91+
f"Contig {contig!r} not found. "
92+
f"Available contigs: {self.contigs}"
93+
)
9094
root = self.open_genome()
9195
z = root[contig]
9296
d = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)

malariagen_data/anoph/h1x.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,17 @@ def _moving_h1x(ha, hb, size, start=0, stop=None, step=None):
403403
H1X values (sum of squares of joint haplotype frequencies).
404404
"""
405405

406-
assert ha.ndim == hb.ndim == 2
407-
assert ha.shape[0] == hb.shape[0]
406+
if ha.ndim != 2 or hb.ndim != 2:
407+
raise ValueError(
408+
"Expected both haplotype arrays to be 2-dimensional "
409+
"(n_variants, n_haplotypes), "
410+
f"got ndim=({ha.ndim}, {hb.ndim})"
411+
)
412+
if ha.shape[0] != hb.shape[0]:
413+
raise ValueError(
414+
"Expected both haplotype arrays to have the same number of variants "
415+
f"(axis 0), got ({ha.shape[0]}, {hb.shape[0]})"
416+
)
408417

409418
# Construct moving windows.
410419
windows = allel.index_windows(ha, size, start, stop, step)

malariagen_data/anoph/hap_data.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ def phasing_analysis_ids(self) -> Tuple[str, ...]:
5858
def _prep_phasing_analysis_param(self, *, analysis: hap_params.analysis) -> str:
5959
if analysis == base_params.DEFAULT:
6060
# Use whatever is the default phasing analysis for this data resource.
61-
assert self._default_phasing_analysis is not None
61+
if self._default_phasing_analysis is None:
62+
raise RuntimeError(
63+
"No default phasing analysis configured. "
64+
"Please specify the 'analysis' parameter explicitly."
65+
)
6266
return self._default_phasing_analysis
6367
elif analysis in self.phasing_analysis_ids:
6468
return analysis
@@ -118,7 +122,11 @@ def _haplotype_sites_for_contig(
118122

119123
# Handle contig in the reference genome.
120124
else:
121-
assert contig in self.contigs
125+
if contig not in self.contigs:
126+
raise ValueError(
127+
f"Contig {contig!r} not found. "
128+
f"Available contigs: {self.contigs}"
129+
)
122130
root = self.open_haplotype_sites(analysis=analysis)
123131
z = root[f"{contig}/variants/{field}"]
124132
ret = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
@@ -251,7 +259,11 @@ def _haplotypes_for_contig(
251259

252260
# Handle contig in the reference genome.
253261
else:
254-
assert contig in self.contigs
262+
if contig not in self.contigs:
263+
raise ValueError(
264+
f"Contig {contig!r} not found. "
265+
f"Available contigs: {self.contigs}"
266+
)
255267

256268
# Open haplotypes zarr.
257269
root = self.open_haplotypes(sample_set=sample_set, analysis=analysis)

malariagen_data/anoph/hap_frq.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,11 @@ def haplotypes_frequencies(
100100
hap_dict = {k: 0 for k in f_all.keys()}
101101

102102
n_samples = np.count_nonzero(loc_coh)
103-
assert n_samples >= min_cohort_size
103+
if n_samples < min_cohort_size:
104+
raise ValueError(
105+
f"Not enough samples ({n_samples}) for minimum "
106+
f"cohort size ({min_cohort_size})"
107+
)
104108
gt_coh = gt.compress(loc_coh, axis=1)
105109
gt_hap = gt_coh.to_haplotypes()
106110
f, _, _ = _haplotype_frequencies(gt_hap)
@@ -224,7 +228,11 @@ def haplotypes_frequencies_advanced(
224228
hap_freq = {k: 0 for k in f_all.keys()}
225229
hap_count = {k: 0 for k in f_all.keys()}
226230
hap_nob = {k: 2 * n_samples for k in f_all.keys()}
227-
assert n_samples >= min_cohort_size
231+
if n_samples < min_cohort_size:
232+
raise ValueError(
233+
f"Not enough samples ({n_samples}) for minimum "
234+
f"cohort size ({min_cohort_size})"
235+
)
228236
sample_indices = group_samples_by_cohort.indices[cohort_key]
229237
gt_coh = gt.take(sample_indices, axis=1)
230238
gt_hap = gt_coh.to_haplotypes()

malariagen_data/anoph/sample_metadata.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,16 @@ def _aim_analysis(self):
594594
def _parse_aim_metadata(
595595
self, sample_set: str, data: Union[bytes, Exception]
596596
) -> pd.DataFrame:
597-
assert self._aim_metadata_columns is not None
598-
assert self._aim_metadata_dtype is not None
597+
if self._aim_metadata_columns is None:
598+
raise RuntimeError(
599+
"Internal error: AIM metadata columns are not configured. "
600+
"This should not happen; please open a GitHub issue."
601+
)
602+
if self._aim_metadata_dtype is None:
603+
raise RuntimeError(
604+
"Internal error: AIM metadata dtypes are not configured. "
605+
"This should not happen; please open a GitHub issue."
606+
)
599607
if isinstance(data, bytes):
600608
# Parse CSV data but don't apply the dtype yet.
601609
df = pd.read_csv(io.BytesIO(data), na_values="")

malariagen_data/anoph/snp_data.py

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@ def site_mask_ids(self) -> Tuple[str, ...]:
103103
"""
104104
return tuple(self.config.get("SITE_MASK_IDS", ())) # ensure tuple
105105

106+
def site_mask_def(self) -> str:
107+
"""Return the default site mask identifier for this data resource."""
108+
if self._default_site_mask is None:
109+
raise RuntimeError(
110+
"No default site mask configured. Please specify the 'site_mask' parameter explicitly."
111+
)
112+
return self._default_site_mask
113+
106114
@property
107115
def _site_annotations_zarr_path(self) -> str:
108116
return self.config["SITE_ANNOTATIONS_ZARR_PATH"]
@@ -114,7 +122,11 @@ def _prep_site_mask_param(
114122
) -> base_params.site_mask:
115123
if site_mask == base_params.DEFAULT:
116124
# Use whatever is the default site mask for this data resource.
117-
assert self._default_site_mask is not None
125+
if self._default_site_mask is None:
126+
raise RuntimeError(
127+
"No default site mask configured. "
128+
"Please specify the 'site_mask' parameter explicitly."
129+
)
118130
return self._default_site_mask
119131
elif site_mask in self.site_mask_ids:
120132
return site_mask
@@ -214,7 +226,9 @@ def _site_filters_for_contig(
214226
*,
215227
contig: str,
216228
mask: base_params.site_mask,
217-
field: base_params.field,
229+
# Field identifies which per-variant filter array to read (e.g. "filter_pass").
230+
# Default kept for backwards compatibility with internal callers/tests.
231+
field: base_params.field = "filter_pass",
218232
inline_array: base_params.inline_array,
219233
chunks: base_params.chunks,
220234
) -> da.Array:
@@ -234,7 +248,11 @@ def _site_filters_for_contig(
234248
return d
235249

236250
else:
237-
assert contig in self.contigs
251+
if contig not in self.contigs:
252+
raise ValueError(
253+
f"Contig {contig!r} not found. "
254+
f"Available contigs: {self.contigs}"
255+
)
238256
root = self.open_site_filters(mask=mask)
239257
z = root[f"{contig}/variants/{field}"]
240258
d = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
@@ -336,12 +354,32 @@ def _snp_sites_for_contig(
336354

337355
# Handle contig in the reference genome.
338356
else:
339-
assert contig in self.contigs
357+
if contig not in self.contigs:
358+
raise ValueError(
359+
f"Contig {contig!r} not found. "
360+
f"Available contigs: {self.contigs}"
361+
)
340362
root = self.open_snp_sites()
341363
z = root[f"{contig}/variants/{field}"]
342364
ret = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
343365
return ret
344366

367+
# Backwards compatible alias for internal callers/tests.
368+
def snp_sites_for_contig(
369+
self,
370+
*,
371+
contig: base_params.contig,
372+
field: base_params.field,
373+
inline_array: base_params.inline_array,
374+
chunks: base_params.chunks,
375+
) -> da.Array:
376+
return self._snp_sites_for_contig(
377+
contig=contig,
378+
field=field,
379+
inline_array=inline_array,
380+
chunks=chunks,
381+
)
382+
345383
def _snp_sites_for_region(
346384
self,
347385
*,
@@ -445,7 +483,11 @@ def _snp_genotypes_for_contig(
445483
return da.concatenate(arrs)
446484

447485
else:
448-
assert contig in self.contigs
486+
if contig not in self.contigs:
487+
raise ValueError(
488+
f"Contig {contig!r} not found. "
489+
f"Available contigs: {self.contigs}"
490+
)
449491
root = self.open_snp_genotypes(sample_set=sample_set)
450492
z = root[f"{contig}/calldata/{field}"]
451493
d = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
@@ -601,7 +643,11 @@ def _snp_variants_for_contig(
601643
return ret
602644

603645
else:
604-
assert contig in self.contigs
646+
if contig not in self.contigs:
647+
raise ValueError(
648+
f"Contig {contig!r} not found. "
649+
f"Available contigs: {self.contigs}"
650+
)
605651
coords = dict()
606652
data_vars = dict()
607653
sites_root = self.open_snp_sites()
@@ -721,6 +767,40 @@ def _site_annotations_raw(
721767

722768
return ds
723769

770+
def _site_annotations_for_contig(
771+
self,
772+
*,
773+
contig,
774+
inline_array: base_params.inline_array,
775+
chunks: base_params.chunks,
776+
) -> xr.Dataset:
777+
"""
778+
Backwards compatible internal helper.
779+
780+
Raises a ValueError with a consistent message when the contig is unknown,
781+
matching expectations in tests and existing error-handling behavior.
782+
"""
783+
if contig in getattr(self, "virtual_contigs", {}):
784+
contigs = self.virtual_contigs[contig]
785+
ds_parts = [
786+
self._site_annotations_raw(
787+
contig=c,
788+
inline_array=inline_array,
789+
chunks=chunks,
790+
)
791+
for c in contigs
792+
]
793+
return _simple_xarray_concat(ds_parts, dim=DIM_VARIANT)
794+
795+
if contig not in self.contigs:
796+
raise ValueError(
797+
f"Contig {contig!r} not found. Available contigs: {self.contigs}"
798+
)
799+
800+
return self._site_annotations_raw(
801+
contig=contig, inline_array=inline_array, chunks=chunks
802+
)
803+
724804
@_check_types
725805
@doc(
726806
summary="Load site annotations.",
@@ -977,7 +1057,11 @@ def _snp_calls_for_contig(
9771057

9781058
# Handle contig in the reference genome.
9791059
else:
980-
assert contig in self.contigs
1060+
if contig not in self.contigs:
1061+
raise ValueError(
1062+
f"Contig {contig!r} not found. "
1063+
f"Available contigs: {self.contigs}"
1064+
)
9811065

9821066
coords = dict()
9831067
data_vars = dict()
@@ -1159,7 +1243,12 @@ def _raw_snp_calls(
11591243
inline_array=inline_array,
11601244
chunks=chunks,
11611245
)
1162-
assert x.sizes["variants"] == loc_ann.shape[0]
1246+
if x.sizes["variants"] != loc_ann.shape[0]:
1247+
raise RuntimeError(
1248+
f"Variants dimension mismatch: dataset has "
1249+
f"{x.sizes['variants']} variants but annotation "
1250+
f"mask has {loc_ann.shape[0]}"
1251+
)
11631252
x = x.isel(variants=loc_ann)
11641253

11651254
lx.append(x)

0 commit comments

Comments
 (0)