Skip to content

Commit 3de4f22

Browse files
Merge branch 'master' into optimize/issue-926-vectorize-apply
2 parents 3118c0a + b1ab7fb commit 3de4f22

26 files changed

Lines changed: 1442 additions & 331 deletions

.github/actions/setup-python/action.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ runs:
1919
shell: bash
2020
run: |
2121
poetry env use ${{ inputs.python-version }}
22-
poetry install --extras dev
22+
poetry install --with dev,test,docs

CONTRIBUTING.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ This package provides Python tools for accessing and analyzing genomic data from
1212

1313
You'll need:
1414

15-
- [pipx](https://python-poetry.org/) for installing Python tools
15+
- [pipx](https://pipx.pypa.io/) for installing Python tools
1616
- [git](https://git-scm.com/) for version control
1717

1818
Both of these can be installed using your distribution's package manager or [Homebrew](https://brew.sh/) on Mac.
@@ -52,9 +52,13 @@ Both of these can be installed using your distribution's package manager or [Hom
5252

5353
```bash
5454
poetry env use 3.12
55-
poetry install --extras dev
55+
poetry install --with dev,test,docs
5656
```
5757

58+
This installs the runtime dependencies along with the `dev`, `test`, and `docs`
59+
[dependency groups](https://python-poetry.org/docs/managing-dependencies/#dependency-groups).
60+
If you only need to run tests, `poetry install --with test` is sufficient.
61+
5862
**Recommended**: Use `poetry run` to run commands inside the virtual environment:
5963

6064
```bash

malariagen_data/anoph/base_params.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@
6969
str,
7070
"""
7171
A pandas query string to be evaluated against the sample metadata, to
72-
select samples to be included in the returned data.
72+
select samples to be included in the returned data. E.g.,
73+
"country == 'Uganda'". If the query returns zero results, a warning
74+
will be emitted with fuzzy-match suggestions for possible typos or
75+
case mismatches.
7376
""",
7477
]
7578

malariagen_data/anoph/dipclust.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
from typing import Optional, Tuple
23

34
import allel # type: ignore
@@ -540,8 +541,9 @@ def _insert_dipclust_snp_trace(
540541
figures.append(snp_trace)
541542
subplot_heights.append(snp_row_height * n_snps_transcript)
542543
else:
543-
print(
544-
f"No SNPs were found below {snp_filter_min_maf} allele frequency. Omitting SNP genotype plot."
544+
warnings.warn(
545+
f"No SNPs were found below {snp_filter_min_maf} allele frequency. Omitting SNP genotype plot.",
546+
stacklevel=2,
545547
)
546548
return figures, subplot_heights, n_snps_transcript
547549

@@ -607,8 +609,9 @@ def plot_diplotype_clustering_advanced(
607609
cnv_colorscale = cnv_params.colorscale_default
608610
if cohort_size and snp_transcript:
609611
cohort_size = None
610-
print(
611-
"Cohort size is not supported with amino acid heatmap. Overriding cohort size to None."
612+
warnings.warn(
613+
"Cohort size is not supported with amino acid heatmap. Overriding cohort size to None.",
614+
stacklevel=2,
612615
)
613616

614617
res = self.plot_diplotype_clustering(

malariagen_data/anoph/frq_base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ def _prep_samples_for_cohort_grouping(
2828
# Users can explicitly override with True/False.
2929
filter_unassigned = taxon_by == "taxon"
3030

31+
# Validate taxon_by.
32+
if taxon_by not in df_samples.columns:
33+
raise ValueError(
34+
f"Invalid value for `taxon_by`: {taxon_by!r}. "
35+
f"Must be the name of an existing column in the sample metadata."
36+
)
37+
3138
if filter_unassigned:
3239
# Remove samples with "intermediate" or "unassigned" taxon values,
3340
# as we only want cohorts with clean taxon calls.
@@ -76,6 +83,13 @@ def _prep_samples_for_cohort_grouping(
7683
# Use the vectorized period creation function.
7784
df_samples["period"] = period_by_func_vectorized(df_samples)
7885

86+
# Validate area_by.
87+
if area_by not in df_samples.columns:
88+
raise ValueError(
89+
f"Invalid value for `area_by`: {area_by!r}. "
90+
f"Must be the name of an existing column in the sample metadata."
91+
)
92+
7993
# Copy the specified area_by column to a new "area" column.
8094
df_samples["area"] = df_samples[area_by]
8195

0 commit comments

Comments
 (0)