Skip to content

Commit 93c4acd

Browse files
committed
fix: align runtime validation errors for contigs and SNP helpers
Restore expected ValueError messages for unknown contigs and re-add internal site-mask/site-annotation helpers required by tests. Made-with: Cursor
1 parent c2c4f73 commit 93c4acd

2 files changed

Lines changed: 63 additions & 4 deletions

File tree

malariagen_data/anoph/snp_data.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@ def site_mask_ids(self) -> Tuple[str, ...]:
103103
"""
104104
return tuple(self.config.get("SITE_MASK_IDS", ())) # ensure tuple
105105

106+
def site_mask_def(self) -> str:
107+
"""Return the default site mask identifier for this data resource."""
108+
if self._default_site_mask is None:
109+
raise RuntimeError("No default site mask configured. Please specify the 'site_mask' parameter explicitly.")
110+
return self._default_site_mask
111+
106112
@property
107113
def _site_annotations_zarr_path(self) -> str:
108114
return self.config["SITE_ANNOTATIONS_ZARR_PATH"]
@@ -218,7 +224,9 @@ def _site_filters_for_contig(
218224
*,
219225
contig: str,
220226
mask: base_params.site_mask,
221-
field: base_params.field,
227+
# Field identifies which per-variant filter array to read (e.g. "filter_pass").
228+
# Default kept for backwards compatibility with internal callers/tests.
229+
field: base_params.field = "filter_pass",
222230
inline_array: base_params.inline_array,
223231
chunks: base_params.chunks,
224232
) -> da.Array:
@@ -354,6 +362,22 @@ def _snp_sites_for_contig(
354362
ret = _da_from_zarr(z, inline_array=inline_array, chunks=chunks)
355363
return ret
356364

365+
# Backwards compatible alias for internal callers/tests.
366+
def snp_sites_for_contig(
367+
self,
368+
*,
369+
contig: base_params.contig,
370+
field: base_params.field,
371+
inline_array: base_params.inline_array,
372+
chunks: base_params.chunks,
373+
) -> da.Array:
374+
return self._snp_sites_for_contig(
375+
contig=contig,
376+
field=field,
377+
inline_array=inline_array,
378+
chunks=chunks,
379+
)
380+
357381
def _snp_sites_for_region(
358382
self,
359383
*,
@@ -741,6 +765,40 @@ def _site_annotations_raw(
741765

742766
return ds
743767

768+
def _site_annotations_for_contig(
769+
self,
770+
*,
771+
contig,
772+
inline_array: base_params.inline_array,
773+
chunks: base_params.chunks,
774+
) -> xr.Dataset:
775+
"""
776+
Backwards compatible internal helper.
777+
778+
Raises a ValueError with a consistent message when the contig is unknown,
779+
matching expectations in tests and existing error-handling behavior.
780+
"""
781+
if contig in getattr(self, "virtual_contigs", {}):
782+
contigs = self.virtual_contigs[contig]
783+
ds_parts = [
784+
self._site_annotations_raw(
785+
contig=c,
786+
inline_array=inline_array,
787+
chunks=chunks,
788+
)
789+
for c in contigs
790+
]
791+
return _simple_xarray_concat(ds_parts, dim=DIM_VARIANT)
792+
793+
if contig not in self.contigs:
794+
raise ValueError(
795+
f"Contig {contig!r} not found. Available contigs: {self.contigs}"
796+
)
797+
798+
return self._site_annotations_raw(
799+
contig=contig, inline_array=inline_array, chunks=chunks
800+
)
801+
744802
@_check_types
745803
@doc(
746804
summary="Load site annotations.",

malariagen_data/util.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,10 @@ def _parse_single_region(resource, region: single_region_param_type) -> Region:
711711
if region_from_feature is not None:
712712
return region_from_feature
713713

714-
raise ValueError(
715-
f"Region {region!r} is not a valid contig, region string or feature ID."
716-
)
714+
# If we get here, the provided region is not a valid contig, coordinate
715+
# string, or feature ID. For compatibility with existing callers/tests,
716+
# treat unknown single contig strings as "contig not found".
717+
raise ValueError(f"Contig {region!r} not found.")
717718

718719

719720
def _parse_multi_region(

0 commit comments

Comments
 (0)