From 0d0a33dbbc51d55015aa0ab1483ad7e68ccd3e94 Mon Sep 17 00:00:00 2001 From: 31puneet Date: Wed, 22 Apr 2026 13:30:31 +0000 Subject: [PATCH 1/2] add field constraints --- malariagen_data/anoph/base_params.py | 9 ++ malariagen_data/anoph/cnv_params.py | 2 + malariagen_data/anoph/dash_params.py | 4 +- malariagen_data/anoph/fst_params.py | 2 + malariagen_data/anoph/g123_params.py | 2 + malariagen_data/anoph/gplt_params.py | 5 + malariagen_data/anoph/h12_params.py | 2 + malariagen_data/anoph/hapnet_params.py | 3 + malariagen_data/anoph/het_params.py | 5 + malariagen_data/anoph/ihs_params.py | 9 +- malariagen_data/anoph/pca_params.py | 2 + malariagen_data/anoph/plotly_params.py | 3 + malariagen_data/anoph/xpehh_params.py | 6 +- tests/anoph/test_field_constraints.py | 212 +++++++++++++++++++++++++ 14 files changed, 263 insertions(+), 3 deletions(-) create mode 100644 tests/anoph/test_field_constraints.py diff --git a/malariagen_data/anoph/base_params.py b/malariagen_data/anoph/base_params.py index bd5e780c6..808381814 100644 --- a/malariagen_data/anoph/base_params.py +++ b/malariagen_data/anoph/base_params.py @@ -2,6 +2,7 @@ from typing import Final, List, Mapping, Optional, Sequence, Tuple, Union +from pydantic import Field from typing_extensions import Annotated, TypeAlias from ..util import ( @@ -168,6 +169,7 @@ def _validate_sample_selection_params( cohort_size: TypeAlias = Annotated[ int, + Field(ge=1), """ Randomly down-sample to this value if the number of samples in the cohort is greater. Raise an error if the number of samples is less @@ -177,6 +179,7 @@ def _validate_sample_selection_params( min_cohort_size: TypeAlias = Annotated[ int, + Field(ge=1), """ Minimum cohort size. Raise an error if the number of samples is less than this value. @@ -185,6 +188,7 @@ def _validate_sample_selection_params( max_cohort_size: TypeAlias = Annotated[ int, + Field(ge=1), """ Randomly down-sample to this value if the number of samples in the cohort is greater. @@ -193,6 +197,7 @@ def _validate_sample_selection_params( random_seed: TypeAlias = Annotated[ int, + Field(ge=0), "Random seed used for reproducible down-sampling.", ] @@ -228,6 +233,7 @@ def _validate_sample_selection_params( n_jack: TypeAlias = Annotated[ int, + Field(ge=1), """ Number of blocks to divide the data into for the block jackknife estimation of confidence intervals. N.B., larger is not necessarily @@ -237,6 +243,7 @@ def _validate_sample_selection_params( confidence_level: TypeAlias = Annotated[ float, + Field(gt=0, lt=1), """ Confidence level to use for confidence interval calculation. E.g., 0.95 means 95% confidence interval. @@ -286,6 +293,7 @@ def _validate_sample_selection_params( n_snps: TypeAlias = Annotated[ int, + Field(ge=1), """ The desired number of SNPs to use when running the analysis. SNPs will be evenly thinned to approximately this number. @@ -294,6 +302,7 @@ def _validate_sample_selection_params( thin_offset: TypeAlias = Annotated[ int, + Field(ge=0), """ Starting index for SNP thinning. Change this to repeat the analysis using a different set of SNPs. diff --git a/malariagen_data/anoph/cnv_params.py b/malariagen_data/anoph/cnv_params.py index 0c1a451bc..946beabae 100644 --- a/malariagen_data/anoph/cnv_params.py +++ b/malariagen_data/anoph/cnv_params.py @@ -2,10 +2,12 @@ from typing import Optional +from pydantic import Field from typing_extensions import Annotated, TypeAlias max_coverage_variance: TypeAlias = Annotated[ Optional[float], + Field(gt=0), """ Remove samples if coverage variance exceeds this value. """, diff --git a/malariagen_data/anoph/dash_params.py b/malariagen_data/anoph/dash_params.py index 361458dac..4c493e461 100644 --- a/malariagen_data/anoph/dash_params.py +++ b/malariagen_data/anoph/dash_params.py @@ -2,9 +2,10 @@ from typing import Literal, Union +from pydantic import Field from typing_extensions import Annotated, TypeAlias -height: TypeAlias = Annotated[int, "Height of the Dash app in pixels (px)."] +height: TypeAlias = Annotated[int, Field(gt=0), "Height of the Dash app in pixels (px)."] width: TypeAlias = Annotated[Union[int, str], "Width of the Dash app."] @@ -21,6 +22,7 @@ server_port: TypeAlias = Annotated[ int, + Field(gt=0), "Manually override the port on which the Dash app will run.", ] diff --git a/malariagen_data/anoph/fst_params.py b/malariagen_data/anoph/fst_params.py index 969997869..92476252d 100644 --- a/malariagen_data/anoph/fst_params.py +++ b/malariagen_data/anoph/fst_params.py @@ -3,6 +3,7 @@ from typing import Optional, Literal import pandas as pd +from pydantic import Field from typing_extensions import Annotated, TypeAlias from . import base_params @@ -10,6 +11,7 @@ # N.B., window size can mean different things for different functions window_size: TypeAlias = Annotated[ int, + Field(ge=1), "The size of windows (number of sites) used to calculate statistics within.", ] cohort_size_default: Optional[base_params.cohort_size] = None diff --git a/malariagen_data/anoph/g123_params.py b/malariagen_data/anoph/g123_params.py index 3a815be24..2133f6e95 100644 --- a/malariagen_data/anoph/g123_params.py +++ b/malariagen_data/anoph/g123_params.py @@ -2,6 +2,7 @@ from typing import Sequence +from pydantic import Field from typing_extensions import Annotated, TypeAlias from . import base_params @@ -29,6 +30,7 @@ window_size: TypeAlias = Annotated[ int, + Field(ge=1), """ The size of windows (number of sites) used to calculate statistics within. """, diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py index cf1857d4f..ba3b1d30b 100644 --- a/malariagen_data/anoph/gplt_params.py +++ b/malariagen_data/anoph/gplt_params.py @@ -4,6 +4,7 @@ from typing import Literal, Mapping, Optional, Union, Final, Sequence import bokeh.models +from pydantic import Field from typing_extensions import Annotated, TypeAlias sizing_mode: TypeAlias = Annotated[ @@ -33,21 +34,25 @@ height: TypeAlias = Annotated[ int, + Field(gt=0), "Plot height in pixels (px).", ] row_height: TypeAlias = Annotated[ int, + Field(gt=0), "Plot height per row (sample) in pixels (px).", ] track_height: TypeAlias = Annotated[ int, + Field(gt=0), "Main track height in pixels (px).", ] genes_height: TypeAlias = Annotated[ int, + Field(gt=0), "Genes track height in pixels (px).", ] diff --git a/malariagen_data/anoph/h12_params.py b/malariagen_data/anoph/h12_params.py index 0116bad23..51fa0a906 100644 --- a/malariagen_data/anoph/h12_params.py +++ b/malariagen_data/anoph/h12_params.py @@ -2,6 +2,7 @@ from typing import Optional, Sequence, Union +from pydantic import Field from typing_extensions import Annotated, TypeAlias from . import base_params @@ -17,6 +18,7 @@ window_size: TypeAlias = Annotated[ int, + Field(ge=1), """ The size of windows (number of SNPs) used to calculate statistics within. """, diff --git a/malariagen_data/anoph/hapnet_params.py b/malariagen_data/anoph/hapnet_params.py index 86ce5776a..bacd7a3fb 100644 --- a/malariagen_data/anoph/hapnet_params.py +++ b/malariagen_data/anoph/hapnet_params.py @@ -2,10 +2,12 @@ from typing import Mapping +from pydantic import Field from typing_extensions import Annotated, TypeAlias max_dist: TypeAlias = Annotated[ int, + Field(ge=0), "Join network components up to a maximum distance of 2 SNP differences.", ] @@ -13,6 +15,7 @@ node_size_factor: TypeAlias = Annotated[ int, + Field(gt=0), "Control the sizing of nodes.", ] diff --git a/malariagen_data/anoph/het_params.py b/malariagen_data/anoph/het_params.py index 621260b77..306f313db 100644 --- a/malariagen_data/anoph/het_params.py +++ b/malariagen_data/anoph/het_params.py @@ -3,10 +3,12 @@ from typing import Tuple import pandas as pd +from pydantic import Field from typing_extensions import Annotated, TypeAlias window_size: TypeAlias = Annotated[ int, + Field(ge=1), "Number of sites per window.", ] @@ -14,6 +16,7 @@ phet_roh: TypeAlias = Annotated[ float, + Field(ge=0, le=1), "Probability of observing a heterozygote in a ROH.", ] @@ -28,6 +31,7 @@ transition: TypeAlias = Annotated[ float, + Field(ge=0, le=1), """ Probability of moving between states. A larger window size may call for a larger transitional probability. @@ -38,6 +42,7 @@ y_max: TypeAlias = Annotated[ float, + Field(gt=0), "Y axis limit.", ] diff --git a/malariagen_data/anoph/ihs_params.py b/malariagen_data/anoph/ihs_params.py index f9eb1f928..0f108e56f 100644 --- a/malariagen_data/anoph/ihs_params.py +++ b/malariagen_data/anoph/ihs_params.py @@ -2,12 +2,14 @@ from typing import Tuple, Union +from pydantic import Field from typing_extensions import Annotated, TypeAlias from . import base_params window_size: TypeAlias = Annotated[ int, + Field(ge=1), """ The size of window in number of SNPs used to summarise iHS over. If None, per-variant iHS values are returned. @@ -41,6 +43,7 @@ standardization_n_bins: TypeAlias = Annotated[ int, + Field(ge=1), """ Number of allele count bins to use for standardization. Overrides standardization_bins. @@ -55,6 +58,7 @@ filter_min_maf: TypeAlias = Annotated[ float, + Field(ge=0, le=0.5), """ Minimum minor allele frequency to use for filtering prior to passing haplotypes to allel.ihs function @@ -65,6 +69,7 @@ compute_min_maf: TypeAlias = Annotated[ float, + Field(ge=0, le=0.5), """ Do not compute integrated haplotype homozygosity for variants with minor allele frequency below this threshold. @@ -75,6 +80,7 @@ min_ehh: TypeAlias = Annotated[ float, + Field(ge=0, le=1), """ Minimum EHH beyond which to truncate integrated haplotype homozygosity calculation. @@ -85,6 +91,7 @@ max_gap: TypeAlias = Annotated[ int, + Field(ge=1), """ Do not report scores if EHH spans a gap larger than this number of base pairs. @@ -94,7 +101,7 @@ max_gap_default: max_gap = 200_000 gap_scale: TypeAlias = Annotated[ - int, "Rescale distance between variants if gap is larger than this value." + int, Field(ge=1), "Rescale distance between variants if gap is larger than this value." ] gap_scale_default: gap_scale = 20_000 diff --git a/malariagen_data/anoph/pca_params.py b/malariagen_data/anoph/pca_params.py index deda3a060..fa3bad45e 100644 --- a/malariagen_data/anoph/pca_params.py +++ b/malariagen_data/anoph/pca_params.py @@ -2,11 +2,13 @@ import numpy as np import pandas as pd +from pydantic import Field from typing_extensions import Annotated, TypeAlias from . import base_params n_components: TypeAlias = Annotated[ int, + Field(ge=1), "Number of components to return.", ] diff --git a/malariagen_data/anoph/plotly_params.py b/malariagen_data/anoph/plotly_params.py index 5741494d4..b363552f6 100644 --- a/malariagen_data/anoph/plotly_params.py +++ b/malariagen_data/anoph/plotly_params.py @@ -7,6 +7,7 @@ from typing import List, Literal, Mapping, Optional, Union import plotly.graph_objects as go # type: ignore +from pydantic import Field from typing_extensions import Annotated, TypeAlias x_label: TypeAlias = Annotated[ @@ -118,11 +119,13 @@ jitter_frac: TypeAlias = Annotated[ Optional[float], + Field(ge=0, le=1), "Randomly jitter points by this fraction of their range.", ] marker_size: TypeAlias = Annotated[ Union[int, float], + Field(gt=0), "Marker size.", ] diff --git a/malariagen_data/anoph/xpehh_params.py b/malariagen_data/anoph/xpehh_params.py index e1223b34b..31255e765 100644 --- a/malariagen_data/anoph/xpehh_params.py +++ b/malariagen_data/anoph/xpehh_params.py @@ -3,6 +3,7 @@ from typing import Tuple, Union, Optional import numpy as np +from pydantic import Field from typing_extensions import Annotated, TypeAlias from . import base_params @@ -27,6 +28,7 @@ percentiles_default: percentiles = (50, 75, 100) filter_min_maf: TypeAlias = Annotated[ float, + Field(ge=0, le=0.5), """ Minimum minor allele frequency to use for filtering prior to passing haplotypes to allel.xpehh function @@ -41,6 +43,7 @@ ] min_ehh: TypeAlias = Annotated[ float, + Field(ge=0, le=1), """ Minimum EHH beyond which to truncate integrated haplotype homozygosity calculation. @@ -49,6 +52,7 @@ min_ehh_default: min_ehh = 0.05 max_gap: TypeAlias = Annotated[ int, + Field(ge=1), """ Do not report scores if EHH spans a gap larger than this number of base pairs. @@ -56,7 +60,7 @@ ] max_gap_default: max_gap = 200_000 gap_scale: TypeAlias = Annotated[ - int, "Rescale distance between variants if gap is larger than this value." + int, Field(ge=1), "Rescale distance between variants if gap is larger than this value." ] gap_scale_default: gap_scale = 20_000 include_edges: TypeAlias = Annotated[ diff --git a/tests/anoph/test_field_constraints.py b/tests/anoph/test_field_constraints.py new file mode 100644 index 000000000..a62c35105 --- /dev/null +++ b/tests/anoph/test_field_constraints.py @@ -0,0 +1,212 @@ +"""Tests for pydantic Field() numeric constraints on parameter type aliases. + +These tests verify that the Field() constraints added to *_params.py files +are enforced at runtime by the _check_types decorator (which uses pydantic +validate_call under the hood). +""" + +import pytest + +from malariagen_data.util import _check_types + + +# --------------------------------------------------------------------------- +# Helper: a minimal function decorated with @_check_types that exercises +# various constrained parameter types from the *_params.py modules. +# --------------------------------------------------------------------------- + + +@_check_types +def _fn_cohort_size( + cohort_size: int, +) -> int: + """Uses base_params.cohort_size (Field(ge=1)).""" + # Import the type alias so pydantic sees the Annotated metadata. + from malariagen_data.anoph.base_params import cohort_size as cohort_size_type # noqa: F401 + + return cohort_size + + +# Because _check_types reads the type hints from the function signature at +# decoration time, and base_params types are TypeAliases (not actual +# Annotated types when used bare as `int`), we need to use the TypeAlias +# directly in the signature. Let's create properly-typed helper functions. + + +from malariagen_data.anoph import base_params, het_params, ihs_params # noqa: E402 + + +@_check_types +def _fn_with_confidence_level( + confidence_level: base_params.confidence_level, +) -> float: + return confidence_level + + +@_check_types +def _fn_with_n_snps( + n_snps: base_params.n_snps, +) -> int: + return n_snps + + +@_check_types +def _fn_with_cohort_size( + cohort_size: base_params.cohort_size, +) -> int: + return cohort_size + + +@_check_types +def _fn_with_min_cohort_size( + min_cohort_size: base_params.min_cohort_size, +) -> int: + return min_cohort_size + + +@_check_types +def _fn_with_random_seed( + random_seed: base_params.random_seed, +) -> int: + return random_seed + + +@_check_types +def _fn_with_thin_offset( + thin_offset: base_params.thin_offset, +) -> int: + return thin_offset + + +@_check_types +def _fn_with_n_jack( + n_jack: base_params.n_jack, +) -> int: + return n_jack + + +@_check_types +def _fn_with_phet_roh( + phet_roh: het_params.phet_roh, +) -> float: + return phet_roh + + +@_check_types +def _fn_with_filter_min_maf( + filter_min_maf: ihs_params.filter_min_maf, +) -> float: + return filter_min_maf + + +# --------------------------------------------------------------------------- +# Tests: valid values should pass +# --------------------------------------------------------------------------- + + +class TestFieldConstraintsValidValues: + """Verify that valid values are accepted without error.""" + + def test_confidence_level_valid(self): + assert _fn_with_confidence_level(confidence_level=0.95) == 0.95 + assert _fn_with_confidence_level(confidence_level=0.5) == 0.5 + assert _fn_with_confidence_level(confidence_level=0.01) == 0.01 + + def test_n_snps_valid(self): + assert _fn_with_n_snps(n_snps=1) == 1 + assert _fn_with_n_snps(n_snps=1000) == 1000 + + def test_cohort_size_valid(self): + assert _fn_with_cohort_size(cohort_size=1) == 1 + assert _fn_with_cohort_size(cohort_size=50) == 50 + + def test_random_seed_valid(self): + assert _fn_with_random_seed(random_seed=0) == 0 + assert _fn_with_random_seed(random_seed=42) == 42 + + def test_thin_offset_valid(self): + assert _fn_with_thin_offset(thin_offset=0) == 0 + assert _fn_with_thin_offset(thin_offset=10) == 10 + + def test_phet_roh_valid(self): + assert _fn_with_phet_roh(phet_roh=0.0) == 0.0 + assert _fn_with_phet_roh(phet_roh=0.5) == 0.5 + assert _fn_with_phet_roh(phet_roh=1.0) == 1.0 + + def test_filter_min_maf_valid(self): + assert _fn_with_filter_min_maf(filter_min_maf=0.0) == 0.0 + assert _fn_with_filter_min_maf(filter_min_maf=0.05) == 0.05 + assert _fn_with_filter_min_maf(filter_min_maf=0.5) == 0.5 + + +# --------------------------------------------------------------------------- +# Tests: invalid values should raise TypeError (from _check_types wrapper) +# --------------------------------------------------------------------------- + + +class TestFieldConstraintsInvalidValues: + """Verify that out-of-range values are rejected.""" + + def test_confidence_level_zero(self): + with pytest.raises(TypeError): + _fn_with_confidence_level(confidence_level=0.0) + + def test_confidence_level_one(self): + with pytest.raises(TypeError): + _fn_with_confidence_level(confidence_level=1.0) + + def test_confidence_level_negative(self): + with pytest.raises(TypeError): + _fn_with_confidence_level(confidence_level=-0.5) + + def test_confidence_level_above_one(self): + with pytest.raises(TypeError): + _fn_with_confidence_level(confidence_level=1.5) + + def test_n_snps_zero(self): + with pytest.raises(TypeError): + _fn_with_n_snps(n_snps=0) + + def test_n_snps_negative(self): + with pytest.raises(TypeError): + _fn_with_n_snps(n_snps=-1) + + def test_cohort_size_zero(self): + with pytest.raises(TypeError): + _fn_with_cohort_size(cohort_size=0) + + def test_cohort_size_negative(self): + with pytest.raises(TypeError): + _fn_with_cohort_size(cohort_size=-5) + + def test_min_cohort_size_zero(self): + with pytest.raises(TypeError): + _fn_with_min_cohort_size(min_cohort_size=0) + + def test_random_seed_negative(self): + with pytest.raises(TypeError): + _fn_with_random_seed(random_seed=-1) + + def test_thin_offset_negative(self): + with pytest.raises(TypeError): + _fn_with_thin_offset(thin_offset=-1) + + def test_n_jack_zero(self): + with pytest.raises(TypeError): + _fn_with_n_jack(n_jack=0) + + def test_phet_roh_negative(self): + with pytest.raises(TypeError): + _fn_with_phet_roh(phet_roh=-0.1) + + def test_phet_roh_above_one(self): + with pytest.raises(TypeError): + _fn_with_phet_roh(phet_roh=1.1) + + def test_filter_min_maf_negative(self): + with pytest.raises(TypeError): + _fn_with_filter_min_maf(filter_min_maf=-0.01) + + def test_filter_min_maf_above_half(self): + with pytest.raises(TypeError): + _fn_with_filter_min_maf(filter_min_maf=0.6) From 6d66ca1703d1e8c141890fc42fa95ea6f88e2cf0 Mon Sep 17 00:00:00 2001 From: 31puneet Date: Wed, 22 Apr 2026 13:39:11 +0000 Subject: [PATCH 2/2] fixing lint --- malariagen_data/anoph/dash_params.py | 4 +++- malariagen_data/anoph/ihs_params.py | 4 +++- malariagen_data/anoph/xpehh_params.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/malariagen_data/anoph/dash_params.py b/malariagen_data/anoph/dash_params.py index 4c493e461..e99f49262 100644 --- a/malariagen_data/anoph/dash_params.py +++ b/malariagen_data/anoph/dash_params.py @@ -5,7 +5,9 @@ from pydantic import Field from typing_extensions import Annotated, TypeAlias -height: TypeAlias = Annotated[int, Field(gt=0), "Height of the Dash app in pixels (px)."] +height: TypeAlias = Annotated[ + int, Field(gt=0), "Height of the Dash app in pixels (px)." +] width: TypeAlias = Annotated[Union[int, str], "Width of the Dash app."] diff --git a/malariagen_data/anoph/ihs_params.py b/malariagen_data/anoph/ihs_params.py index 0f108e56f..4e0903c4d 100644 --- a/malariagen_data/anoph/ihs_params.py +++ b/malariagen_data/anoph/ihs_params.py @@ -101,7 +101,9 @@ max_gap_default: max_gap = 200_000 gap_scale: TypeAlias = Annotated[ - int, Field(ge=1), "Rescale distance between variants if gap is larger than this value." + int, + Field(ge=1), + "Rescale distance between variants if gap is larger than this value.", ] gap_scale_default: gap_scale = 20_000 diff --git a/malariagen_data/anoph/xpehh_params.py b/malariagen_data/anoph/xpehh_params.py index 31255e765..0131bd244 100644 --- a/malariagen_data/anoph/xpehh_params.py +++ b/malariagen_data/anoph/xpehh_params.py @@ -60,7 +60,9 @@ ] max_gap_default: max_gap = 200_000 gap_scale: TypeAlias = Annotated[ - int, Field(ge=1), "Rescale distance between variants if gap is larger than this value." + int, + Field(ge=1), + "Rescale distance between variants if gap is larger than this value.", ] gap_scale_default: gap_scale = 20_000 include_edges: TypeAlias = Annotated[