Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions malariagen_data/anoph/base_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Final, List, Mapping, Optional, Sequence, Tuple, Union

from pydantic import Field
from typing_extensions import Annotated, TypeAlias

from ..util import (
Expand Down Expand Up @@ -168,6 +169,7 @@ def _validate_sample_selection_params(

cohort_size: TypeAlias = Annotated[
int,
Field(ge=1),
"""
Randomly down-sample to this value if the number of samples in the
cohort is greater. Raise an error if the number of samples is less
Expand All @@ -177,6 +179,7 @@ def _validate_sample_selection_params(

min_cohort_size: TypeAlias = Annotated[
int,
Field(ge=1),
"""
Minimum cohort size. Raise an error if the number of samples is
less than this value.
Expand All @@ -185,6 +188,7 @@ def _validate_sample_selection_params(

max_cohort_size: TypeAlias = Annotated[
int,
Field(ge=1),
"""
Randomly down-sample to this value if the number of samples in the
cohort is greater.
Expand All @@ -193,6 +197,7 @@ def _validate_sample_selection_params(

random_seed: TypeAlias = Annotated[
int,
Field(ge=0),
"Random seed used for reproducible down-sampling.",
]

Expand Down Expand Up @@ -228,6 +233,7 @@ def _validate_sample_selection_params(

n_jack: TypeAlias = Annotated[
int,
Field(ge=1),
"""
Number of blocks to divide the data into for the block jackknife
estimation of confidence intervals. N.B., larger is not necessarily
Expand All @@ -237,6 +243,7 @@ def _validate_sample_selection_params(

confidence_level: TypeAlias = Annotated[
float,
Field(gt=0, lt=1),
"""
Confidence level to use for confidence interval calculation. E.g., 0.95
means 95% confidence interval.
Expand Down Expand Up @@ -286,6 +293,7 @@ def _validate_sample_selection_params(

n_snps: TypeAlias = Annotated[
int,
Field(ge=1),
"""
The desired number of SNPs to use when running the analysis.
SNPs will be evenly thinned to approximately this number.
Expand All @@ -294,6 +302,7 @@ def _validate_sample_selection_params(

thin_offset: TypeAlias = Annotated[
int,
Field(ge=0),
"""
Starting index for SNP thinning. Change this to repeat the analysis
using a different set of SNPs.
Expand Down
2 changes: 2 additions & 0 deletions malariagen_data/anoph/cnv_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

from typing import Optional

from pydantic import Field
from typing_extensions import Annotated, TypeAlias

max_coverage_variance: TypeAlias = Annotated[
Optional[float],
Field(gt=0),
"""
Remove samples if coverage variance exceeds this value.
""",
Expand Down
6 changes: 5 additions & 1 deletion malariagen_data/anoph/dash_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

from typing import Literal, Union

from pydantic import Field
from typing_extensions import Annotated, TypeAlias

height: TypeAlias = Annotated[int, "Height of the Dash app in pixels (px)."]
height: TypeAlias = Annotated[
int, Field(gt=0), "Height of the Dash app in pixels (px)."
]

width: TypeAlias = Annotated[Union[int, str], "Width of the Dash app."]

Expand All @@ -21,6 +24,7 @@

server_port: TypeAlias = Annotated[
int,
Field(gt=0),
"Manually override the port on which the Dash app will run.",
]

Expand Down
2 changes: 2 additions & 0 deletions malariagen_data/anoph/fst_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from typing import Optional, Literal

import pandas as pd
from pydantic import Field
from typing_extensions import Annotated, TypeAlias

from . import base_params

# N.B., window size can mean different things for different functions
window_size: TypeAlias = Annotated[
int,
Field(ge=1),
"The size of windows (number of sites) used to calculate statistics within.",
]
cohort_size_default: Optional[base_params.cohort_size] = None
Expand Down
2 changes: 2 additions & 0 deletions malariagen_data/anoph/g123_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Sequence

from pydantic import Field
from typing_extensions import Annotated, TypeAlias

from . import base_params
Expand Down Expand Up @@ -29,6 +30,7 @@

window_size: TypeAlias = Annotated[
int,
Field(ge=1),
"""
The size of windows (number of sites) used to calculate statistics within.
""",
Expand Down
5 changes: 5 additions & 0 deletions malariagen_data/anoph/gplt_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Literal, Mapping, Optional, Union, Final, Sequence

import bokeh.models
from pydantic import Field
from typing_extensions import Annotated, TypeAlias

sizing_mode: TypeAlias = Annotated[
Expand Down Expand Up @@ -33,21 +34,25 @@

height: TypeAlias = Annotated[
int,
Field(gt=0),
"Plot height in pixels (px).",
]

row_height: TypeAlias = Annotated[
int,
Field(gt=0),
"Plot height per row (sample) in pixels (px).",
]

track_height: TypeAlias = Annotated[
int,
Field(gt=0),
"Main track height in pixels (px).",
]

genes_height: TypeAlias = Annotated[
int,
Field(gt=0),
"Genes track height in pixels (px).",
]

Expand Down
2 changes: 2 additions & 0 deletions malariagen_data/anoph/h12_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Optional, Sequence, Union

from pydantic import Field
from typing_extensions import Annotated, TypeAlias

from . import base_params
Expand All @@ -17,6 +18,7 @@

window_size: TypeAlias = Annotated[
int,
Field(ge=1),
"""
The size of windows (number of SNPs) used to calculate statistics within.
""",
Expand Down
3 changes: 3 additions & 0 deletions malariagen_data/anoph/hapnet_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@

from typing import Mapping

from pydantic import Field
from typing_extensions import Annotated, TypeAlias

max_dist: TypeAlias = Annotated[
int,
Field(ge=0),
"Join network components up to a maximum distance of 2 SNP differences.",
]

max_dist_default: max_dist = 2

node_size_factor: TypeAlias = Annotated[
int,
Field(gt=0),
"Control the sizing of nodes.",
]

Expand Down
5 changes: 5 additions & 0 deletions malariagen_data/anoph/het_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,20 @@
from typing import Tuple

import pandas as pd
from pydantic import Field
from typing_extensions import Annotated, TypeAlias

window_size: TypeAlias = Annotated[
int,
Field(ge=1),
"Number of sites per window.",
]

window_size_default: window_size = 20_000

phet_roh: TypeAlias = Annotated[
float,
Field(ge=0, le=1),
"Probability of observing a heterozygote in a ROH.",
]

Expand All @@ -28,6 +31,7 @@

transition: TypeAlias = Annotated[
float,
Field(ge=0, le=1),
"""
Probability of moving between states. A larger window size may call
for a larger transitional probability.
Expand All @@ -38,6 +42,7 @@

y_max: TypeAlias = Annotated[
float,
Field(gt=0),
"Y axis limit.",
]

Expand Down
11 changes: 10 additions & 1 deletion malariagen_data/anoph/ihs_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

from typing import Tuple, Union

from pydantic import Field
from typing_extensions import Annotated, TypeAlias

from . import base_params

window_size: TypeAlias = Annotated[
int,
Field(ge=1),
"""
The size of window in number of SNPs used to summarise iHS over.
If None, per-variant iHS values are returned.
Expand Down Expand Up @@ -41,6 +43,7 @@

standardization_n_bins: TypeAlias = Annotated[
int,
Field(ge=1),
"""
Number of allele count bins to use for standardization.
Overrides standardization_bins.
Expand All @@ -55,6 +58,7 @@

filter_min_maf: TypeAlias = Annotated[
float,
Field(ge=0, le=0.5),
"""
Minimum minor allele frequency to use for filtering prior to passing
haplotypes to allel.ihs function
Expand All @@ -65,6 +69,7 @@

compute_min_maf: TypeAlias = Annotated[
float,
Field(ge=0, le=0.5),
"""
Do not compute integrated haplotype homozygosity for variants with
minor allele frequency below this threshold.
Expand All @@ -75,6 +80,7 @@

min_ehh: TypeAlias = Annotated[
float,
Field(ge=0, le=1),
"""
Minimum EHH beyond which to truncate integrated haplotype homozygosity
calculation.
Expand All @@ -85,6 +91,7 @@

max_gap: TypeAlias = Annotated[
int,
Field(ge=1),
"""
Do not report scores if EHH spans a gap larger than this number of
base pairs.
Expand All @@ -94,7 +101,9 @@
max_gap_default: max_gap = 200_000

gap_scale: TypeAlias = Annotated[
int, "Rescale distance between variants if gap is larger than this value."
int,
Field(ge=1),
"Rescale distance between variants if gap is larger than this value.",
]

gap_scale_default: gap_scale = 20_000
Expand Down
2 changes: 2 additions & 0 deletions malariagen_data/anoph/pca_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import numpy as np
import pandas as pd
from pydantic import Field
from typing_extensions import Annotated, TypeAlias
from . import base_params

n_components: TypeAlias = Annotated[
int,
Field(ge=1),
"Number of components to return.",
]

Expand Down
3 changes: 3 additions & 0 deletions malariagen_data/anoph/plotly_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import List, Literal, Mapping, Optional, Union

import plotly.graph_objects as go # type: ignore
from pydantic import Field
from typing_extensions import Annotated, TypeAlias

x_label: TypeAlias = Annotated[
Expand Down Expand Up @@ -118,11 +119,13 @@

jitter_frac: TypeAlias = Annotated[
Optional[float],
Field(ge=0, le=1),
"Randomly jitter points by this fraction of their range.",
]

marker_size: TypeAlias = Annotated[
Union[int, float],
Field(gt=0),
"Marker size.",
]

Expand Down
8 changes: 7 additions & 1 deletion malariagen_data/anoph/xpehh_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Tuple, Union, Optional

import numpy as np
from pydantic import Field
from typing_extensions import Annotated, TypeAlias

from . import base_params
Expand All @@ -27,6 +28,7 @@
percentiles_default: percentiles = (50, 75, 100)
filter_min_maf: TypeAlias = Annotated[
float,
Field(ge=0, le=0.5),
"""
Minimum minor allele frequency to use for filtering prior to passing
haplotypes to allel.xpehh function
Expand All @@ -41,6 +43,7 @@
]
min_ehh: TypeAlias = Annotated[
float,
Field(ge=0, le=1),
"""
Minimum EHH beyond which to truncate integrated haplotype homozygosity
calculation.
Expand All @@ -49,14 +52,17 @@
min_ehh_default: min_ehh = 0.05
max_gap: TypeAlias = Annotated[
int,
Field(ge=1),
"""
Do not report scores if EHH spans a gap larger than this number of
base pairs.
""",
]
max_gap_default: max_gap = 200_000
gap_scale: TypeAlias = Annotated[
int, "Rescale distance between variants if gap is larger than this value."
int,
Field(ge=1),
"Rescale distance between variants if gap is larger than this value.",
]
gap_scale_default: gap_scale = 20_000
include_edges: TypeAlias = Annotated[
Expand Down
Loading
Loading