Skip to content

Commit eaf3122

Browse files
committed
fix: replace assert statements with proper runtime validation in production code
Resolves #1259. Replace 40+ assert statements across 13 production modules with explicit if/raise checks using ValueError, TypeError, or RuntimeError with descriptive error messages. This ensures validation is enforced even when Python runs with -O (optimize) flag, which silently strips assert statements. Exception mapping: - Invalid parameter values (metric, contig) → ValueError - Wrong array shape/ndim/dtype → ValueError - Internal state unexpectedly None → RuntimeError - Internal consistency violations → RuntimeError Three assert statements inside @numba.njit functions in util.py are intentionally preserved, as numba JIT compilation does not support Python exception raising. Files modified: - malariagen_data/util.py - malariagen_data/mjn.py - malariagen_data/anopheles.py - malariagen_data/anoph/snp_data.py - malariagen_data/anoph/hap_data.py - malariagen_data/anoph/hap_frq.py - malariagen_data/anoph/snp_frq.py - malariagen_data/anoph/cnv_frq.py - malariagen_data/anoph/genome_features.py - malariagen_data/anoph/genome_sequence.py - malariagen_data/anoph/h1x.py - malariagen_data/anoph/sample_metadata.py - malariagen_data/anoph/aim_data.py
1 parent 3c2ee64 commit eaf3122

14 files changed

Lines changed: 189 additions & 54 deletions

malariagen_data/anoph/aim_data.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,16 @@ def plot_aim_heatmap(
256256

257257
# Set up colors for genotypes
258258
if palette is None:
259-
assert self._aim_palettes is not None
259+
if self._aim_palettes is None:
260+
raise RuntimeError(
261+
"AIM palettes have not been configured. "
262+
"Please provide a 'palette' parameter or configure AIM_PALETTES."
263+
)
260264
palette = self._aim_palettes[aims]
261-
assert len(palette) == 4
265+
if len(palette) != 4:
266+
raise RuntimeError(
267+
f"Expected AIM palette to have 4 colors, got {len(palette)}"
268+
)
262269
# Expect 4 colors, in the order:
263270
# missing, hom taxon 1, het, hom taxon 2
264271
species = aims.split("_vs_")

malariagen_data/anoph/cnv_frq.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,11 @@ def _gene_cnv_frequencies_advanced(
597597
if nobs_mode == "called":
598598
nobs[:, cohort_index] = np.repeat(cohort_n_called, 2)
599599
else:
600-
assert nobs_mode == "fixed"
600+
if nobs_mode != "fixed":
601+
raise RuntimeError(
602+
f"Internal error: expected nobs_mode='fixed', "
603+
f"got {nobs_mode!r}"
604+
)
601605
nobs[:, cohort_index] = cohort.size
602606

603607
debug("compute frequency")

malariagen_data/anoph/frq_base.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -417,14 +417,6 @@ def plot_frequencies_heatmap(
417417
`aa_allele_frequencies_advanced()` or
418418
`gene_cnv_frequencies_advanced()`.
419419
""",
420-
taxa="""
421-
Taxon or list of taxa to include in the plot. If None,
422-
all taxa are shown.
423-
""",
424-
areas="""
425-
Area or list of areas to include in the plot. If None,
426-
all areas are shown.
427-
""",
428420
kwargs="Passed through to `px.line()`.",
429421
),
430422
returns="""

malariagen_data/anoph/genome_features.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,11 @@ def _genome_features_for_contig(self, *, contig: str, attributes: Tuple[str, ...
110110

111111
# Handle normal contigs in the reference genome.
112112
else:
113-
assert contig in self.contigs
113+
if contig not in self.contigs:
114+
raise ValueError(
115+
f"Contig {contig!r} not found. "
116+
f"Available contigs: {self.contigs}"
117+
)
114118
df = self._genome_features(attributes=attributes)
115119

116120
# Apply contig query.
@@ -561,7 +565,8 @@ def plot_genes(
561565

562566
# Increase the figure height by a certain factor, to accommodate labels.
563567
height_increase_factor = 1.3
564-
assert fig.height is not None
568+
if fig.height is None:
569+
raise RuntimeError("Figure height is unexpectedly None")
565570
fig.height = int(fig.height * height_increase_factor)
566571

567572
# Get the original y_range.

malariagen_data/anoph/genome_sequence.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ def _genome_sequence_for_contig(self, *, contig, inline_array, chunks):
8686

8787
# Handle normal contigs in the reference genome.
8888
else:
89-
assert contig in self.contigs
89+
if contig not in self.contigs:
90+
raise ValueError(
91+
f"Contig {contig!r} not found. "
92+
f"Available contigs: {self.contigs}"
93+
)
9094
root = self.open_genome()
9195
z = root[contig]
9296
d = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)

malariagen_data/anoph/h1x.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,16 @@ def _moving_h1x(ha, hb, size, start=0, stop=None, step=None):
403403
H1X values (sum of squares of joint haplotype frequencies).
404404
"""
405405

406-
assert ha.ndim == hb.ndim == 2
407-
assert ha.shape[0] == hb.shape[0]
406+
if ha.ndim != 2 or hb.ndim != 2:
407+
raise ValueError(
408+
f"Expected ha and hb to be 2-dimensional, "
409+
f"got ha.ndim={ha.ndim} and hb.ndim={hb.ndim}"
410+
)
411+
if ha.shape[0] != hb.shape[0]:
412+
raise ValueError(
413+
f"ha and hb must have the same number of variants, "
414+
f"got ha.shape[0]={ha.shape[0]} and hb.shape[0]={hb.shape[0]}"
415+
)
408416

409417
# Construct moving windows.
410418
windows = allel.index_windows(ha, size, start, stop, step)

malariagen_data/anoph/hap_data.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ def phasing_analysis_ids(self) -> Tuple[str, ...]:
5858
def _prep_phasing_analysis_param(self, *, analysis: hap_params.analysis) -> str:
5959
if analysis == base_params.DEFAULT:
6060
# Use whatever is the default phasing analysis for this data resource.
61-
assert self._default_phasing_analysis is not None
61+
if self._default_phasing_analysis is None:
62+
raise RuntimeError(
63+
"No default phasing analysis configured. "
64+
"Please specify the 'analysis' parameter explicitly."
65+
)
6266
return self._default_phasing_analysis
6367
elif analysis in self.phasing_analysis_ids:
6468
return analysis
@@ -118,7 +122,11 @@ def _haplotype_sites_for_contig(
118122

119123
# Handle contig in the reference genome.
120124
else:
121-
assert contig in self.contigs
125+
if contig not in self.contigs:
126+
raise ValueError(
127+
f"Contig {contig!r} not found. "
128+
f"Available contigs: {self.contigs}"
129+
)
122130
root = self.open_haplotype_sites(analysis=analysis)
123131
z = root[f"{contig}/variants/{field}"]
124132
ret = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
@@ -251,7 +259,11 @@ def _haplotypes_for_contig(
251259

252260
# Handle contig in the reference genome.
253261
else:
254-
assert contig in self.contigs
262+
if contig not in self.contigs:
263+
raise ValueError(
264+
f"Contig {contig!r} not found. "
265+
f"Available contigs: {self.contigs}"
266+
)
255267

256268
# Open haplotypes zarr.
257269
root = self.open_haplotypes(sample_set=sample_set, analysis=analysis)

malariagen_data/anoph/hap_frq.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,11 @@ def haplotypes_frequencies(
100100
hap_dict = {k: 0 for k in f_all.keys()}
101101

102102
n_samples = np.count_nonzero(loc_coh)
103-
assert n_samples >= min_cohort_size
103+
if n_samples < min_cohort_size:
104+
raise ValueError(
105+
f"Not enough samples ({n_samples}) for minimum "
106+
f"cohort size ({min_cohort_size})"
107+
)
104108
gt_coh = gt.compress(loc_coh, axis=1)
105109
gt_hap = gt_coh.to_haplotypes()
106110
f, _, _ = _haplotype_frequencies(gt_hap)
@@ -224,7 +228,11 @@ def haplotypes_frequencies_advanced(
224228
hap_freq = {k: 0 for k in f_all.keys()}
225229
hap_count = {k: 0 for k in f_all.keys()}
226230
hap_nob = {k: 2 * n_samples for k in f_all.keys()}
227-
assert n_samples >= min_cohort_size
231+
if n_samples < min_cohort_size:
232+
raise ValueError(
233+
f"Not enough samples ({n_samples}) for minimum "
234+
f"cohort size ({min_cohort_size})"
235+
)
228236
sample_indices = group_samples_by_cohort.indices[cohort_key]
229237
gt_coh = gt.take(sample_indices, axis=1)
230238
gt_hap = gt_coh.to_haplotypes()

malariagen_data/anoph/sample_metadata.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,10 @@ def _aim_analysis(self):
594594
def _parse_aim_metadata(
595595
self, sample_set: str, data: Union[bytes, Exception]
596596
) -> pd.DataFrame:
597-
assert self._aim_metadata_columns is not None
598-
assert self._aim_metadata_dtype is not None
597+
if self._aim_metadata_columns is None:
598+
raise RuntimeError("AIM metadata columns have not been configured.")
599+
if self._aim_metadata_dtype is None:
600+
raise RuntimeError("AIM metadata dtype has not been configured.")
599601
if isinstance(data, bytes):
600602
# Parse CSV data but don't apply the dtype yet.
601603
df = pd.read_csv(io.BytesIO(data), na_values="")

malariagen_data/anoph/snp_data.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,11 @@ def _prep_site_mask_param(
114114
) -> base_params.site_mask:
115115
if site_mask == base_params.DEFAULT:
116116
# Use whatever is the default site mask for this data resource.
117-
assert self._default_site_mask is not None
117+
if self._default_site_mask is None:
118+
raise RuntimeError(
119+
"No default site mask configured. "
120+
"Please specify the 'site_mask' parameter explicitly."
121+
)
118122
return self._default_site_mask
119123
elif site_mask in self.site_mask_ids:
120124
return site_mask
@@ -234,7 +238,11 @@ def _site_filters_for_contig(
234238
return d
235239

236240
else:
237-
assert contig in self.contigs
241+
if contig not in self.contigs:
242+
raise ValueError(
243+
f"Contig {contig!r} not found. "
244+
f"Available contigs: {self.contigs}"
245+
)
238246
root = self.open_site_filters(mask=mask)
239247
z = root[f"{contig}/variants/{field}"]
240248
d = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
@@ -336,7 +344,11 @@ def _snp_sites_for_contig(
336344

337345
# Handle contig in the reference genome.
338346
else:
339-
assert contig in self.contigs
347+
if contig not in self.contigs:
348+
raise ValueError(
349+
f"Contig {contig!r} not found. "
350+
f"Available contigs: {self.contigs}"
351+
)
340352
root = self.open_snp_sites()
341353
z = root[f"{contig}/variants/{field}"]
342354
ret = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
@@ -445,7 +457,11 @@ def _snp_genotypes_for_contig(
445457
return da.concatenate(arrs)
446458

447459
else:
448-
assert contig in self.contigs
460+
if contig not in self.contigs:
461+
raise ValueError(
462+
f"Contig {contig!r} not found. "
463+
f"Available contigs: {self.contigs}"
464+
)
449465
root = self.open_snp_genotypes(sample_set=sample_set)
450466
z = root[f"{contig}/calldata/{field}"]
451467
d = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
@@ -601,7 +617,11 @@ def _snp_variants_for_contig(
601617
return ret
602618

603619
else:
604-
assert contig in self.contigs
620+
if contig not in self.contigs:
621+
raise ValueError(
622+
f"Contig {contig!r} not found. "
623+
f"Available contigs: {self.contigs}"
624+
)
605625
coords = dict()
606626
data_vars = dict()
607627
sites_root = self.open_snp_sites()
@@ -977,7 +997,11 @@ def _snp_calls_for_contig(
977997

978998
# Handle contig in the reference genome.
979999
else:
980-
assert contig in self.contigs
1000+
if contig not in self.contigs:
1001+
raise ValueError(
1002+
f"Contig {contig!r} not found. "
1003+
f"Available contigs: {self.contigs}"
1004+
)
9811005

9821006
coords = dict()
9831007
data_vars = dict()
@@ -1159,7 +1183,12 @@ def _raw_snp_calls(
11591183
inline_array=inline_array,
11601184
chunks=chunks,
11611185
)
1162-
assert x.sizes["variants"] == loc_ann.shape[0]
1186+
if x.sizes["variants"] != loc_ann.shape[0]:
1187+
raise RuntimeError(
1188+
f"Variants dimension mismatch: dataset has "
1189+
f"{x.sizes['variants']} variants but annotation "
1190+
f"mask has {loc_ann.shape[0]}"
1191+
)
11631192
x = x.isel(variants=loc_ann)
11641193

11651194
lx.append(x)

0 commit comments

Comments
 (0)