malariagen
diff --git a/‎.github/actions/setup-python/action.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/actions/setup-python/action.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 6 additions & 2 deletions b/‎CONTRIBUTING.md‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎malariagen_data/anoph/base_params.py‎
Lines changed: 4 additions & 1 deletion b/‎malariagen_data/anoph/base_params.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎malariagen_data/anoph/dipclust.py‎
Lines changed: 7 additions & 4 deletions b/‎malariagen_data/anoph/dipclust.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎malariagen_data/anoph/frq_base.py‎
Lines changed: 14 additions & 0 deletions b/‎malariagen_data/anoph/frq_base.py‎
Lines changed: 14 additions & 0 deletions
@@ -19,4 +19,4 @@ runs:
       shell: bash
       run: |
         poetry env use ${{ inputs.python-version }}
-        poetry install --extras dev
+        poetry install --with dev,test,docs
@@ -12,7 +12,7 @@ This package provides Python tools for accessing and analyzing genomic data from
 
 You'll need:
 
-- [pipx](https://python-poetry.org/) for installing Python tools
+- [pipx](https://pipx.pypa.io/) for installing Python tools
 - [git](https://git-scm.com/) for version control
 
 Both of these can be installed using your distribution's package manager or [Homebrew](https://brew.sh/) on Mac.
@@ -52,9 +52,13 @@ Both of these can be installed using your distribution's package manager or [Hom
 
    ```bash
    poetry env use 3.12
-   poetry install --extras dev
+   poetry install --with dev,test,docs
    ```
 
+   This installs the runtime dependencies along with the `dev`, `test`, and `docs`
+   [dependency groups](https://python-poetry.org/docs/managing-dependencies/#dependency-groups).
+   If you only need to run tests, `poetry install --with test` is sufficient.
+
    **Recommended**: Use `poetry run` to run commands inside the virtual environment:
 
    ```bash
 
@@ -69,7 +69,10 @@
     str,
     """
     A pandas query string to be evaluated against the sample metadata, to
-    select samples to be included in the returned data.
+    select samples to be included in the returned data. E.g.,
+    "country == 'Uganda'". If the query returns zero results, a warning
+    will be emitted with fuzzy-match suggestions for possible typos or
+    case mismatches.
     """,
 ]
 
 
@@ -1,3 +1,4 @@
+import warnings
 from typing import Optional, Tuple
 
 import allel  # type: ignore
@@ -540,8 +541,9 @@ def _insert_dipclust_snp_trace(
             figures.append(snp_trace)
             subplot_heights.append(snp_row_height * n_snps_transcript)
         else:
-            print(
-                f"No SNPs were found below {snp_filter_min_maf} allele frequency. Omitting SNP genotype plot."
+            warnings.warn(
+                f"No SNPs were found below {snp_filter_min_maf} allele frequency. Omitting SNP genotype plot.",
+                stacklevel=2,
             )
         return figures, subplot_heights, n_snps_transcript
 
@@ -607,8 +609,9 @@ def plot_diplotype_clustering_advanced(
             cnv_colorscale = cnv_params.colorscale_default
         if cohort_size and snp_transcript:
             cohort_size = None
-            print(
-                "Cohort size is not supported with amino acid heatmap. Overriding cohort size to None."
+            warnings.warn(
+                "Cohort size is not supported with amino acid heatmap. Overriding cohort size to None.",
+                stacklevel=2,
             )
 
         res = self.plot_diplotype_clustering(
 
@@ -28,6 +28,13 @@ def _prep_samples_for_cohort_grouping(
         # Users can explicitly override with True/False.
         filter_unassigned = taxon_by == "taxon"
 
+    # Validate taxon_by.
+    if taxon_by not in df_samples.columns:
+        raise ValueError(
+            f"Invalid value for `taxon_by`: {taxon_by!r}. "
+            f"Must be the name of an existing column in the sample metadata."
+        )
+
     if filter_unassigned:
         # Remove samples with "intermediate" or "unassigned" taxon values,
         # as we only want cohorts with clean taxon calls.
@@ -76,6 +83,13 @@ def _prep_samples_for_cohort_grouping(
         # Use the vectorized period creation function.
         df_samples["period"] = period_by_func_vectorized(df_samples)
 
+    # Validate area_by.
+    if area_by not in df_samples.columns:
+        raise ValueError(
+            f"Invalid value for `area_by`: {area_by!r}. "
+            f"Must be the name of an existing column in the sample metadata."
+        )
+
     # Copy the specified area_by column to a new "area" column.
     df_samples["area"] = df_samples[area_by]