diff --git a/malariagen_data/anoph/distance.py b/malariagen_data/anoph/distance.py
index 84d4f4820..2049929d1 100644
--- a/malariagen_data/anoph/distance.py
+++ b/malariagen_data/anoph/distance.py
@@ -527,6 +527,20 @@ def plot_njt(
             count_sort = True
             distance_sort = False
 
+        # Ensure we have enough samples for a tree.
+        # If we have 0 samples, `biallelic_snp_calls` or `snp_calls` should have already raised "No samples found".
+        # However, if we have 1 sample, it might pass through until here, where it would cause a failure in njt.
+        df_samples = self.sample_metadata(
+            sample_sets=sample_sets,
+            sample_query=sample_query,
+            sample_query_options=sample_query_options,
+            sample_indices=sample_indices,
+        )
+        if 0 < len(df_samples) < 2:
+            raise ValueError(
+                f"Not enough samples for neighbour-joining tree. Found {len(df_samples)}, needed at least 2."
+            )
+
         # Compute neighbour-joining tree.
         Z, samples, n_snps_used = self.njt(
             region=region,
diff --git a/malariagen_data/anoph/karyotype_params.py b/malariagen_data/anoph/karyotype_params.py
index e13eaffc9..930597dec 100644
--- a/malariagen_data/anoph/karyotype_params.py
+++ b/malariagen_data/anoph/karyotype_params.py
@@ -1,6 +1,5 @@
 """Parameter definitions for karyotype analysis functions."""
 
-
 from typing_extensions import Annotated, TypeAlias
 
 inversion_param: TypeAlias = Annotated[
diff --git a/malariagen_data/mjn.py b/malariagen_data/mjn.py
index 8b3f5bc11..520a51777 100644
--- a/malariagen_data/mjn.py
+++ b/malariagen_data/mjn.py
@@ -264,7 +264,7 @@ def _mjn_graph_edges(
 
                 # add further intermediate nodes as necessary
                 for k in range(1, sep - 1):
-                    source = f"anon_{i}_{j}_{k-1}"
+                    source = f"anon_{i}_{j}_{k - 1}"
                     target = f"anon_{i}_{j}_{k}"
                     graph_node = {
                         "id": target,
@@ -280,10 +280,10 @@ def _mjn_graph_edges(
                     graph_edges.append(graph_edge)
 
                 # add edge from final intermediate node to node j
-                source = f"anon_{i}_{j}_{sep-2}"
+                source = f"anon_{i}_{j}_{sep - 2}"
                 target = j
                 graph_edge = {
-                    "id": f"edge_{i}_{j}_{sep-1}",
+                    "id": f"edge_{i}_{j}_{sep - 1}",
                     "source": source,
                     "target": target,
                 }
diff --git a/notebooks/auto_chunks.ipynb b/notebooks/auto_chunks.ipynb
index eb637b87d..291f7318b 100644
--- a/notebooks/auto_chunks.ipynb
+++ b/notebooks/auto_chunks.ipynb
@@ -39,9 +39,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds = ag3.snp_calls(\n",
-    "    region=\"3R\", sample_sets=\"AG1000G-BF-A\"\n",
-    ")"
+    "ds = ag3.snp_calls(region=\"3R\", sample_sets=\"AG1000G-BF-A\")"
    ]
   },
   {
@@ -80,7 +78,9 @@
    "outputs": [],
    "source": [
     "ds = ag3.snp_calls(\n",
-    "    region=\"3R\", sample_sets=\"AG1000G-BF-A\", chunks=\"300MB\",\n",
+    "    region=\"3R\",\n",
+    "    sample_sets=\"AG1000G-BF-A\",\n",
+    "    chunks=\"300MB\",\n",
     ")"
    ]
   },
@@ -119,9 +119,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds = ag3.snp_calls(\n",
-    "    region=\"3R\", sample_sets=\"AG1000G-BF-A\", chunks=\"auto\"\n",
-    ")"
+    "ds = ag3.snp_calls(region=\"3R\", sample_sets=\"AG1000G-BF-A\", chunks=\"auto\")"
    ]
   },
   {
@@ -159,9 +157,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds = ag3.snp_calls(\n",
-    "    region=\"3R\", sample_sets=\"AG1000G-BF-A\", chunks=\"ndauto\"\n",
-    ")"
+    "ds = ag3.snp_calls(region=\"3R\", sample_sets=\"AG1000G-BF-A\", chunks=\"ndauto\")"
    ]
   },
   {
diff --git a/notebooks/extra_metadata.ipynb b/notebooks/extra_metadata.ipynb
index 466d74ac6..c75b619f0 100644
--- a/notebooks/extra_metadata.ipynb
+++ b/notebooks/extra_metadata.ipynb
@@ -112,7 +112,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ag3.haplotypes(region=\"3R\", analysis=\"gamb_colu_arab\", sample_query=sample_query, sample_sets=\"3.0\")"
+    "ag3.haplotypes(\n",
+    "    region=\"3R\", analysis=\"gamb_colu_arab\", sample_query=sample_query, sample_sets=\"3.0\"\n",
+    ")"
    ]
   },
   {
diff --git a/notebooks/karyotype.ipynb b/notebooks/karyotype.ipynb
index bd26b8a8d..857d560ca 100644
--- a/notebooks/karyotype.ipynb
+++ b/notebooks/karyotype.ipynb
@@ -120,7 +120,9 @@
    },
    "outputs": [],
    "source": [
-    "ag3.plot_pca_coords(pca_df_2la, color=\"karyotype_2La\", symbol=\"taxon\", width=600, height=500)"
+    "ag3.plot_pca_coords(\n",
+    "    pca_df_2la, color=\"karyotype_2La\", symbol=\"taxon\", width=600, height=500\n",
+    ")"
    ]
   },
   {
@@ -195,7 +197,9 @@
    },
    "outputs": [],
    "source": [
-    "ag3.plot_pca_coords(pca_df_2rb, color=\"karyotype_2Rb\", symbol=\"taxon\", width=600, height=500)"
+    "ag3.plot_pca_coords(\n",
+    "    pca_df_2rb, color=\"karyotype_2Rb\", symbol=\"taxon\", width=600, height=500\n",
+    ")"
    ]
   },
   {
@@ -228,7 +232,7 @@
    "outputs": [],
    "source": [
     "kt_df_2rc_gam = ag3.karyotype(\n",
-    "    inversion=\"2Rc_gam\", \n",
+    "    inversion=\"2Rc_gam\",\n",
     "    sample_sets=sample_sets,\n",
     "    sample_query=\"taxon == 'gambiae'\",\n",
     ")\n",
@@ -275,7 +279,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ag3.plot_pca_coords(pca_df_2rc_gam, color=\"karyotype_2Rc_gam\", symbol=\"taxon\", width=600, height=500)"
+    "ag3.plot_pca_coords(\n",
+    "    pca_df_2rc_gam, color=\"karyotype_2Rc_gam\", symbol=\"taxon\", width=600, height=500\n",
+    ")"
    ]
   },
   {
@@ -287,7 +293,9 @@
    },
    "outputs": [],
    "source": [
-    "ag3.plot_pca_coords(pca_df_2rc_gam, color=\"country\", symbol=\"taxon\", width=600, height=500)"
+    "ag3.plot_pca_coords(\n",
+    "    pca_df_2rc_gam, color=\"country\", symbol=\"taxon\", width=600, height=500\n",
+    ")"
    ]
   },
   {
@@ -308,7 +316,7 @@
    "outputs": [],
    "source": [
     "kt_df_2rc_col = ag3.karyotype(\n",
-    "    inversion=\"2Rc_col\", \n",
+    "    inversion=\"2Rc_col\",\n",
     "    sample_sets=sample_sets,\n",
     "    sample_query=\"taxon == 'coluzzii'\",\n",
     ")\n",
@@ -357,7 +365,9 @@
    },
    "outputs": [],
    "source": [
-    "ag3.plot_pca_coords(pca_df_2rc_col, color=\"karyotype_2Rc_col\", symbol=\"country\", width=600, height=500)"
+    "ag3.plot_pca_coords(\n",
+    "    pca_df_2rc_col, color=\"karyotype_2Rc_col\", symbol=\"country\", width=600, height=500\n",
+    ")"
    ]
   }
  ],
diff --git a/notebooks/local_cluster.ipynb b/notebooks/local_cluster.ipynb
index 0d77b5159..91d56dc41 100644
--- a/notebooks/local_cluster.ipynb
+++ b/notebooks/local_cluster.ipynb
@@ -44,6 +44,7 @@
    "outputs": [],
    "source": [
     "from distributed import LocalCluster, Client\n",
+    "\n",
     "cluster = LocalCluster()\n",
     "cluster"
    ]
diff --git a/notebooks/phenotype_data_demo.ipynb b/notebooks/phenotype_data_demo.ipynb
index 4db6d0306..ac1e224f8 100644
--- a/notebooks/phenotype_data_demo.ipynb
+++ b/notebooks/phenotype_data_demo.ipynb
@@ -382,7 +382,7 @@
     "ag3 = Ag3(pre=True)\n",
     "\n",
     "print(\"MalariaGEN Ag3 API client initialized.\")\n",
-    "print(ag3)\n"
+    "print(ag3)"
    ]
   },
   {
@@ -416,12 +416,14 @@
     "\n",
     "# We'll pick one sample set for demonstration, preferably one known to have data\n",
     "# For this example, we'll use '1237-VO-BJ-DJOGBENOU-VMF00050'\n",
-    "demo_sample_set = '1237-VO-BJ-DJOGBENOU-VMF00050'\n",
+    "demo_sample_set = \"1237-VO-BJ-DJOGBENOU-VMF00050\"\n",
     "if demo_sample_set not in phenotype_sample_sets:\n",
-    "    print(f\"Warning: '{demo_sample_set}' not found. Using the first available: {phenotype_sample_sets}\")\n",
+    "    print(\n",
+    "        f\"Warning: '{demo_sample_set}' not found. Using the first available: {phenotype_sample_sets}\"\n",
+    "    )\n",
     "    demo_sample_set = phenotype_sample_sets\n",
     "\n",
-    "print(f\"\\nUsing sample set for demonstration: {demo_sample_set}\")\n"
+    "print(f\"\\nUsing sample set for demonstration: {demo_sample_set}\")"
    ]
   },
   {
@@ -531,18 +533,21 @@
     }
    ],
    "source": [
-    "print(f\"\\n--- Loading phenotype data for '{demo_sample_set}' filtered by Deltamethrin ---\")\n",
+    "print(\n",
+    "    f\"\\n--- Loading phenotype data for '{demo_sample_set}' filtered by Deltamethrin ---\"\n",
+    ")\n",
     "df_deltamethrin = ag3.phenotype_data(\n",
-    "    sample_sets=[demo_sample_set],\n",
-    "    sample_query=\"insecticide == 'Deltamethrin'\"\n",
+    "    sample_sets=[demo_sample_set], sample_query=\"insecticide == 'Deltamethrin'\"\n",
     ")\n",
     "\n",
     "print(f\"Shape of DataFrame: {df_deltamethrin.shape}\")\n",
     "print(\"\\nFirst 5 rows of the filtered DataFrame:\")\n",
     "df_deltamethrin.head()\n",
-    "print(f\"\\nUnique insecticides in filtered data: {df_deltamethrin['insecticide'].unique()}\")\n",
+    "print(\n",
+    "    f\"\\nUnique insecticides in filtered data: {df_deltamethrin['insecticide'].unique()}\"\n",
+    ")\n",
     "print(\"\\nDataFrame Info:\")\n",
-    "df_deltamethrin.info() "
+    "df_deltamethrin.info()"
    ]
   },
   {
@@ -647,7 +652,7 @@
     "print(f\"\\n--- Loading phenotype data filtered by Deltamethrin and dose >= 1.0 ---\")\n",
     "df_filtered_multi = ag3.phenotype_data(\n",
     "    sample_sets=[demo_sample_set],\n",
-    "    sample_query=\"insecticide == 'Deltamethrin' and dose >= 1.0\"\n",
+    "    sample_query=\"insecticide == 'Deltamethrin' and dose >= 1.0\",\n",
     ")\n",
     "\n",
     "print(f\"Shape of DataFrame: {df_filtered_multi.shape}\")\n",
@@ -657,7 +662,7 @@
     "print(f\"\\nUnique insecticides: {df_filtered_multi['insecticide'].unique()}\")\n",
     "print(f\"Unique doses: {df_filtered_multi['dose'].unique()}\")\n",
     "print(\"\\nDataFrame Info:\")\n",
-    "df_filtered_multi.info()\n"
+    "df_filtered_multi.info()"
    ]
   },
   {
@@ -761,14 +766,14 @@
     "df_cohort_filtered = ag3.phenotype_data(\n",
     "    sample_sets=[demo_sample_set],\n",
     "    sample_query=\"insecticide == 'Deltamethrin'\",\n",
-    "    min_cohort_size=10\n",
+    "    min_cohort_size=10,\n",
     ")\n",
     "\n",
     "print(f\"Shape of DataFrame: {df_cohort_filtered.shape}\")\n",
     "print(\"\\nFirst 5 rows of the cohort-filtered DataFrame:\")\n",
-    "df_cohort_filtered.head() \n",
+    "df_cohort_filtered.head()\n",
     "print(\"\\nDataFrame Info:\")\n",
-    "df_cohort_filtered.info() \n",
+    "df_cohort_filtered.info()\n",
     "# Verify cohort sizes (optional, for internal testing)\n",
     "# if not df_cohort_filtered.empty:\n",
     "#     cohort_keys = [\"insecticide\", \"dose\", \"location\", \"country\", \"sample_set\"]\n",
@@ -777,7 +782,7 @@
     "#         cohort_sizes = df_cohort_filtered.groupby(available_keys).size()\n",
     "#         print(\"\\nCohort sizes after filtering:\")\n",
     "#         print(cohort_sizes)\n",
-    "#         print(f\"All cohorts meet min_cohort_size (>=10): {all(cohort_sizes >= 10)}\")\n"
+    "#         print(f\"All cohorts meet min_cohort_size (>=10): {all(cohort_sizes >= 10)}\")"
    ]
   },
   {
@@ -832,8 +837,7 @@
     "\n",
     "# Example 1: Binary outcomes for all Deltamethrin samples\n",
     "binary_deltamethrin = ag3.phenotype_binary(\n",
-    "    sample_sets=[demo_sample_set],\n",
-    "    sample_query=\"insecticide == 'Deltamethrin'\"\n",
+    "    sample_sets=[demo_sample_set], sample_query=\"insecticide == 'Deltamethrin'\"\n",
     ")\n",
     "\n",
     "print(f\"Shape of binary series: {binary_deltamethrin.shape}\")\n",
@@ -845,18 +849,19 @@
     "# Example 2: Binary outcomes for samples that were 'alive' with Deltamethrin\n",
     "binary_alive_deltamethrin = ag3.phenotype_binary(\n",
     "    sample_sets=[demo_sample_set],\n",
-    "    sample_query=\"insecticide == 'Deltamethrin' and phenotype == 'alive'\"\n",
+    "    sample_query=\"insecticide == 'Deltamethrin' and phenotype == 'alive'\",\n",
     ")\n",
     "\n",
-    "print(f\"\\nShape of binary series (alive Deltamethrin): {binary_alive_deltamethrin.shape}\")\n",
+    "print(\n",
+    "    f\"\\nShape of binary series (alive Deltamethrin): {binary_alive_deltamethrin.shape}\"\n",
+    ")\n",
     "print(\"First 5 entries:\")\n",
     "print(binary_alive_deltamethrin.head())\n",
     "print(f\"Unique values in series: {binary_alive_deltamethrin.unique()}\")\n",
     "\n",
     "# Example 3: Binary outcomes for samples with dose 0.5\n",
     "binary_dose_0_5 = ag3.phenotype_binary(\n",
-    "    sample_sets=[demo_sample_set],\n",
-    "    sample_query=\"dose == 0.5\"\n",
+    "    sample_sets=[demo_sample_set], sample_query=\"dose == 0.5\"\n",
     ")\n",
     "\n",
     "print(f\"\\nShape of binary series (dose 0.5): {binary_dose_0_5.shape}\")\n",
@@ -927,8 +932,7 @@
     "\n",
     "# Example 1: Binary outcomes for all Deltamethrin samples\n",
     "binary_deltamethrin = ag3.phenotype_binary(\n",
-    "    sample_sets=[demo_sample_set],\n",
-    "    sample_query=\"insecticide == 'Deltamethrin'\"\n",
+    "    sample_sets=[demo_sample_set], sample_query=\"insecticide == 'Deltamethrin'\"\n",
     ")\n",
     "\n",
     "print(f\"Shape of binary series: {binary_deltamethrin.shape}\")\n",
@@ -940,18 +944,19 @@
     "# Example 2: Binary outcomes for samples that were 'alive' with Deltamethrin\n",
     "binary_alive_deltamethrin = ag3.phenotype_binary(\n",
     "    sample_sets=[demo_sample_set],\n",
-    "    sample_query=\"insecticide == 'Deltamethrin' and phenotype == 'alive'\"\n",
+    "    sample_query=\"insecticide == 'Deltamethrin' and phenotype == 'alive'\",\n",
     ")\n",
     "\n",
-    "print(f\"\\nShape of binary series (alive Deltamethrin): {binary_alive_deltamethrin.shape}\")\n",
+    "print(\n",
+    "    f\"\\nShape of binary series (alive Deltamethrin): {binary_alive_deltamethrin.shape}\"\n",
+    ")\n",
     "print(\"First 5 entries:\")\n",
     "print(binary_alive_deltamethrin.head())\n",
     "print(f\"Unique values in series: {binary_alive_deltamethrin.unique()}\")\n",
     "\n",
     "# Example 3: Binary outcomes for samples with dose 0.5\n",
     "binary_dose_0_5 = ag3.phenotype_binary(\n",
-    "    sample_sets=[demo_sample_set],\n",
-    "    sample_query=\"dose == 0.5\"\n",
+    "    sample_sets=[demo_sample_set], sample_query=\"dose == 0.5\"\n",
     ")\n",
     "\n",
     "print(f\"\\nShape of binary series (dose 0.5): {binary_dose_0_5.shape}\")\n",
@@ -1038,7 +1043,7 @@
     "ds_snps = ag3.phenotypes_with_snps(\n",
     "    sample_sets=[demo_sample_set],\n",
     "    sample_query=\"insecticide == 'Deltamethrin' and phenotype == 'alive'\",\n",
-    "    region=demo_region_snps\n",
+    "    region=demo_region_snps,\n",
     ")\n",
     "\n",
     "print(f\"Dataset dimensions: {ds_snps.dims}\")\n",
@@ -1052,7 +1057,7 @@
     "print(\"\\nFirst 5 variant positions:\")\n",
     "print(ds_snps[\"variant_position\"].head(5).values)\n",
     "print(\"\\nDataset Info:\")\n",
-    "ds_snps.info()\n"
+    "ds_snps.info()"
    ]
   },
   {
@@ -1126,7 +1131,7 @@
     "ds_haps = ag3.phenotypes_with_haplotypes(\n",
     "    sample_sets=[demo_sample_set],\n",
     "    sample_query=\"insecticide == 'Deltamethrin' and phenotype == 'dead'\",\n",
-    "    region=demo_region_haps\n",
+    "    region=demo_region_haps,\n",
     ")\n",
     "\n",
     "print(f\"Dataset dimensions: {ds_haps.dims}\")\n",
@@ -1140,7 +1145,7 @@
     "print(\"\\nFirst 5 variant positions:\")\n",
     "print(ds_haps[\"variant_position\"].head(5).values)\n",
     "print(\"\\nDataset Info:\")\n",
-    "ds_haps.info()\n"
+    "ds_haps.info()"
    ]
   },
   {
diff --git a/notebooks/plink_convert.ipynb b/notebooks/plink_convert.ipynb
index deb9b984b..a82710ef2 100644
--- a/notebooks/plink_convert.ipynb
+++ b/notebooks/plink_convert.ipynb
@@ -7,7 +7,7 @@
    "outputs": [],
    "source": [
     "import malariagen_data\n",
-    "import os \n",
+    "import os\n",
     "\n",
     "ag3 = malariagen_data.Ag3(pre=True)"
    ]
@@ -18,11 +18,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ag3.biallelic_snps_to_plink(output_dir=os.getcwd(),\n",
-    "                            region='2L:100000-2000000',\n",
-    "                            n_snps=2000,\n",
-    "                            sample_sets='AG1000G-AO',\n",
-    "                            )"
+    "ag3.biallelic_snps_to_plink(\n",
+    "    output_dir=os.getcwd(),\n",
+    "    region=\"2L:100000-2000000\",\n",
+    "    n_snps=2000,\n",
+    "    sample_sets=\"AG1000G-AO\",\n",
+    ")"
    ]
   }
  ],
diff --git a/notebooks/plot_diplotype_clustering.ipynb b/notebooks/plot_diplotype_clustering.ipynb
index 83d9c5053..83a7df3ea 100644
--- a/notebooks/plot_diplotype_clustering.ipynb
+++ b/notebooks/plot_diplotype_clustering.ipynb
@@ -43,7 +43,7 @@
     "fig = ag3.plot_diplotype_clustering_advanced(\n",
     "    region=\"2R:28,480,000-28,500,000\",\n",
     "    cnv_region=\"2R:28,480,000-28,500,000\",\n",
-    "    snp_transcript='AGAP002862-RA',\n",
+    "    snp_transcript=\"AGAP002862-RA\",\n",
     "    snp_filter_min_maf=0.05,\n",
     "    sample_sets=\"AG1000G-GH\",\n",
     "    site_mask=\"gamb_colu\",\n",
@@ -65,8 +65,8 @@
    "outputs": [],
    "source": [
     "ag3.plot_diplotype_clustering_advanced(\n",
-    "    region='2R:28,480,000-28,490,000',\n",
-    "    snp_transcript=['AGAP002862-RA', 'AGAP002864-RA'],\n",
+    "    region=\"2R:28,480,000-28,490,000\",\n",
+    "    snp_transcript=[\"AGAP002862-RA\", \"AGAP002864-RA\"],\n",
     "    snp_query=\"effect == 'NON_SYNONYMOUS_CODING'\",\n",
     "    snp_filter_min_maf=0.1,\n",
     "    sample_sets=\"AG1000G-GH\",\n",
@@ -88,9 +88,9 @@
    "source": [
     "ag3.plot_diplotype_clustering_advanced(\n",
     "    region=\"2R:28,480,000-28,500,000\",\n",
-    "    cnv_region = \"2R:28,480,000-28,500,000\",\n",
+    "    cnv_region=\"2R:28,480,000-28,500,000\",\n",
     "    snp_transcript=None,\n",
-    "    sample_sets=[\"AG1000G-GH\", 'AG1000G-BF-A'],\n",
+    "    sample_sets=[\"AG1000G-GH\", \"AG1000G-BF-A\"],\n",
     "    snp_filter_min_maf=0.05,\n",
     "    site_mask=\"gamb_colu\",\n",
     "    color=\"taxon\",\n",
@@ -411,11 +411,15 @@
    "outputs": [],
    "source": [
     "af1.plot_diplotype_clustering_advanced(\n",
-    "    region = \"X:8,438,477-8,460,887\",\n",
+    "    region=\"X:8,438,477-8,460,887\",\n",
     "    snp_transcript=[\"LOC125764232_t1\"],\n",
     "    cnv_region=\"X:8,418,477-8,480,887\",\n",
-    "    sample_sets=[\"1232-VO-KE-OCHOMO-VMF00044\", \"1231-VO-MULTI-WONDJI-VMF00043\", \"1236-VO-TZ-OKUMU-VMF00090\"],\n",
-    "    sample_query=\"country in ['Kenya', 'Uganda', 'Tanzania'] and taxon == 'funestus'\"\n",
+    "    sample_sets=[\n",
+    "        \"1232-VO-KE-OCHOMO-VMF00044\",\n",
+    "        \"1231-VO-MULTI-WONDJI-VMF00043\",\n",
+    "        \"1236-VO-TZ-OKUMU-VMF00090\",\n",
+    "    ],\n",
+    "    sample_query=\"country in ['Kenya', 'Uganda', 'Tanzania'] and taxon == 'funestus'\",\n",
     ")"
    ]
   },
diff --git a/notebooks/plot_frequencies_heatmap.ipynb b/notebooks/plot_frequencies_heatmap.ipynb
index 56414912a..daab3576e 100644
--- a/notebooks/plot_frequencies_heatmap.ipynb
+++ b/notebooks/plot_frequencies_heatmap.ipynb
@@ -388,7 +388,7 @@
     "    \"AGAP002865\",  # Cyp6p3\n",
     "    \"AGAP000818\",  # Cyp9k1\n",
     "    \"AGAP008212\",  # Cyp6m2\n",
-    "    \"AGAP008218\",  # Cyp6z2    \n",
+    "    \"AGAP008218\",  # Cyp6z2\n",
     "]\n",
     "\n",
     "cyp_cnv_freqs_df = ag3.gene_cnv_frequencies(\n",
diff --git a/notebooks/plot_frequencies_space_time.ipynb b/notebooks/plot_frequencies_space_time.ipynb
index 273ba679b..231d86aaa 100644
--- a/notebooks/plot_frequencies_space_time.ipynb
+++ b/notebooks/plot_frequencies_space_time.ipynb
@@ -92,7 +92,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ag3.plot_frequencies_time_series(ds, taxa=(\"gambiae\", \"arabiensis\"), height=500, width=1000)"
+    "ag3.plot_frequencies_time_series(\n",
+    "    ds, taxa=(\"gambiae\", \"arabiensis\"), height=500, width=1000\n",
+    ")"
    ]
   },
   {
@@ -506,7 +508,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "type(extra_metadata_df['random_year_as_period'][0])"
+    "type(extra_metadata_df[\"random_year_as_period\"][0])"
    ]
   },
   {
@@ -546,7 +548,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "extra_sample_metadata_df['random_year_as_period'][:3]"
+    "extra_sample_metadata_df[\"random_year_as_period\"][:3]"
    ]
   },
   {
diff --git a/notebooks/plot_g123_gwss.ipynb b/notebooks/plot_g123_gwss.ipynb
index 73ac584af..f2f31a42f 100644
--- a/notebooks/plot_g123_gwss.ipynb
+++ b/notebooks/plot_g123_gwss.ipynb
@@ -56,7 +56,7 @@
     "contig = \"3L\"\n",
     "sample_set = \"AG1000G-BF-A\"\n",
     "sample_query = 'taxon == \"gambiae\"'\n",
-    "site_mask = \"gamb_colu\"\n"
+    "site_mask = \"gamb_colu\""
    ]
   },
   {
@@ -208,7 +208,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "af1.sample_metadata(sample_sets=\"1.0\").groupby([\"sample_set\", \"cohort_admin1_year\"]).size()"
+    "af1.sample_metadata(sample_sets=\"1.0\").groupby(\n",
+    "    [\"sample_set\", \"cohort_admin1_year\"]\n",
+    ").size()"
    ]
   },
   {
diff --git a/notebooks/plot_genes.ipynb b/notebooks/plot_genes.ipynb
index ee5ac1a17..8e7e9ff02 100644
--- a/notebooks/plot_genes.ipynb
+++ b/notebooks/plot_genes.ipynb
@@ -243,15 +243,15 @@
    "outputs": [],
    "source": [
     "ag3.plot_genes(\n",
-    "  region=\"2R\",\n",
-    "  gene_labels={\n",
-    "    \"AGAP001096\": \"far left + gene\",\n",
-    "    \"AGAP001099\": \"far left - gene\",\n",
-    "    \"AGAP002942\": \"central + gene\",\n",
-    "    \"AGAP002949\": \"central - gene\",\n",
-    "    \"AGAP004676\": \"far right + gene\",\n",
-    "    \"AGAP004674\": \"far right - gene\",\n",
-    "  }\n",
+    "    region=\"2R\",\n",
+    "    gene_labels={\n",
+    "        \"AGAP001096\": \"far left + gene\",\n",
+    "        \"AGAP001099\": \"far left - gene\",\n",
+    "        \"AGAP002942\": \"central + gene\",\n",
+    "        \"AGAP002949\": \"central - gene\",\n",
+    "        \"AGAP004676\": \"far right + gene\",\n",
+    "        \"AGAP004674\": \"far right - gene\",\n",
+    "    },\n",
     ")"
    ]
   },
@@ -264,29 +264,30 @@
    "source": [
     "import bokeh\n",
     "import pandas as pd\n",
+    "\n",
     "data = pd.DataFrame.from_dict(\n",
-    "  [\n",
-    "    {'pos': 10_000_000, 'y': 2, 'label': 'Custom Label A'},\n",
-    "    {'pos': 30_000_000, 'y': 0.9, 'label': 'Custom Label B'},\n",
-    "    {'pos': 50_000_000, 'y': -0.1, 'label': 'Custom Label C'},\n",
-    "  ]\n",
+    "    [\n",
+    "        {\"pos\": 10_000_000, \"y\": 2, \"label\": \"Custom Label A\"},\n",
+    "        {\"pos\": 30_000_000, \"y\": 0.9, \"label\": \"Custom Label B\"},\n",
+    "        {\"pos\": 50_000_000, \"y\": -0.1, \"label\": \"Custom Label C\"},\n",
+    "    ]\n",
     ")\n",
     "data_as_cds = bokeh.models.ColumnDataSource(data)\n",
     "gene_labelset = bokeh.models.LabelSet(\n",
     "    source=data_as_cds,\n",
-    "    x='pos',\n",
-    "    y='y',\n",
-    "    text='label',\n",
-    "    text_align='center',\n",
-    "    text_baseline='middle',\n",
-    "    text_font_size='9pt',\n",
-    "    text_color='blue',\n",
+    "    x=\"pos\",\n",
+    "    y=\"y\",\n",
+    "    text=\"label\",\n",
+    "    text_align=\"center\",\n",
+    "    text_baseline=\"middle\",\n",
+    "    text_font_size=\"9pt\",\n",
+    "    text_color=\"blue\",\n",
     ")\n",
     "\n",
     "ag3.plot_genes(\n",
-    "  region=\"2R\",\n",
-    "  gene_labelset=gene_labelset,\n",
-    "  height=200,\n",
+    "    region=\"2R\",\n",
+    "    gene_labelset=gene_labelset,\n",
+    "    height=200,\n",
     ")"
    ]
   },
diff --git a/notebooks/plot_h12_h1x.ipynb b/notebooks/plot_h12_h1x.ipynb
index 33e00f53a..2f751b5c8 100644
--- a/notebooks/plot_h12_h1x.ipynb
+++ b/notebooks/plot_h12_h1x.ipynb
@@ -206,7 +206,7 @@
     "    sample_sets=\"3.0\",\n",
     "    analysis=\"gamb_colu\",\n",
     "    cohort_size=20,\n",
-    "    contig_colors=[\"red\", \"green\"]\n",
+    "    contig_colors=[\"red\", \"green\"],\n",
     ")"
    ]
   },
diff --git a/notebooks/plot_haplotype_networks.ipynb b/notebooks/plot_haplotype_networks.ipynb
index 38adaa9a8..0ecc0d121 100644
--- a/notebooks/plot_haplotype_networks.ipynb
+++ b/notebooks/plot_haplotype_networks.ipynb
@@ -121,10 +121,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "color_mapping = {\n",
-    "    \"Ghana\": \"country == 'Ghana'\",\n",
-    "    \"Other\": \"country != 'Ghana'\"\n",
-    "}\n",
+    "color_mapping = {\"Ghana\": \"country == 'Ghana'\", \"Other\": \"country != 'Ghana'\"}\n",
     "ag3.plot_haplotype_network(\n",
     "    region=\"2L:2,358,158-2,431,617\",\n",
     "    analysis=\"gamb_colu\",\n",
@@ -276,10 +273,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "color_mapping = {\n",
-    "    \"2012\": \"year == 2012\",\n",
-    "    \"2014\": \"year == 2014\"\n",
-    "}\n",
+    "color_mapping = {\"2012\": \"year == 2012\", \"2014\": \"year == 2014\"}\n",
     "af1.plot_haplotype_network(\n",
     "    region=\"2RL:2,358,158-2,431,617\",\n",
     "    sample_query=\"country == 'Ghana'\",\n",
diff --git a/notebooks/plot_haplotypes_frequencies.ipynb b/notebooks/plot_haplotypes_frequencies.ipynb
index 0704d537c..9ad777363 100644
--- a/notebooks/plot_haplotypes_frequencies.ipynb
+++ b/notebooks/plot_haplotypes_frequencies.ipynb
@@ -32,7 +32,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "hap_df = ag3.haplotypes_frequencies(\"2L:2,358,158-2,431,617\", \"admin1_year\", sample_sets=(\"AG1000G-BF-A\", \"AG1000G-BF-B\", \"AG1000G-BF-C\"))\n",
+    "hap_df = ag3.haplotypes_frequencies(\n",
+    "    \"2L:2,358,158-2,431,617\",\n",
+    "    \"admin1_year\",\n",
+    "    sample_sets=(\"AG1000G-BF-A\", \"AG1000G-BF-B\", \"AG1000G-BF-C\"),\n",
+    ")\n",
     "hap_df"
    ]
   },
@@ -53,7 +57,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "hap_xr = ag3.haplotypes_frequencies_advanced(region=\"2L:2,358,158-2,431,617\", area_by=\"admin1_iso\", period_by=\"year\", sample_sets = [\"AG1000G-BF-A\", \"AG1000G-BF-B\"])"
+    "hap_xr = ag3.haplotypes_frequencies_advanced(\n",
+    "    region=\"2L:2,358,158-2,431,617\",\n",
+    "    area_by=\"admin1_iso\",\n",
+    "    period_by=\"year\",\n",
+    "    sample_sets=[\"AG1000G-BF-A\", \"AG1000G-BF-B\"],\n",
+    ")"
    ]
   },
   {
diff --git a/notebooks/plot_pairwise_average_fst.ipynb b/notebooks/plot_pairwise_average_fst.ipynb
index cefddfe75..0b41014a6 100644
--- a/notebooks/plot_pairwise_average_fst.ipynb
+++ b/notebooks/plot_pairwise_average_fst.ipynb
@@ -47,9 +47,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "region=\"3L:15,000,000-16,000,000\"\n",
-    "site_mask='gamb_colu'\n",
-    "n_jack=200"
+    "region = \"3L:15,000,000-16,000,000\"\n",
+    "site_mask = \"gamb_colu\"\n",
+    "n_jack = 200"
    ]
   },
   {
@@ -65,7 +65,8 @@
     "    cohort2_query=\"cohort_admin2_year == 'ML-2_Kati_gamb_2014'\",\n",
     "    sample_sets=\"3.0\",\n",
     "    n_jack=n_jack,\n",
-    "    site_mask=site_mask)\n",
+    "    site_mask=site_mask,\n",
+    ")\n",
     "fst_hudson, se_hudson"
    ]
   },
@@ -134,9 +135,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "region=\"3L:15,000,000-16,000,000\"\n",
-    "site_mask='arab'\n",
-    "n_jack=200"
+    "region = \"3L:15,000,000-16,000,000\"\n",
+    "site_mask = \"arab\"\n",
+    "n_jack = 200"
    ]
   },
   {
@@ -203,7 +204,7 @@
     "    \"Kilifi_2012\": \"taxon == 'arabiensis' and location == 'Kilifi' and year == 2012\",\n",
     "}\n",
     "fst_df = ag3.pairwise_average_fst(\n",
-    "    region=\"3L:15,000,000-16,000,000\", \n",
+    "    region=\"3L:15,000,000-16,000,000\",\n",
     "    cohorts=wild_cohorts,\n",
     "    min_cohort_size=10,\n",
     "    site_mask=\"arab\",\n",
diff --git a/notebooks/plot_pca.ipynb b/notebooks/plot_pca.ipynb
index fcda6d53a..74421ccc3 100644
--- a/notebooks/plot_pca.ipynb
+++ b/notebooks/plot_pca.ipynb
@@ -314,7 +314,9 @@
     "ag3.plot_pca_coords_3d(\n",
     "    df_pca,\n",
     "    color=\"taxon\",\n",
-    "    category_orders=dict(taxon=[\"coluzzii\", \"gambiae\", \"arabiensis\", \"gcx1\", \"gcx2\", \"gcx3\"]),\n",
+    "    category_orders=dict(\n",
+    "        taxon=[\"coluzzii\", \"gambiae\", \"arabiensis\", \"gcx1\", \"gcx2\", \"gcx3\"]\n",
+    "    ),\n",
     "    marker_size=2,\n",
     ")"
    ]
diff --git a/notebooks/plot_samples.ipynb b/notebooks/plot_samples.ipynb
index b4b9f2834..2f7f31c8a 100644
--- a/notebooks/plot_samples.ipynb
+++ b/notebooks/plot_samples.ipynb
@@ -116,10 +116,7 @@
    "outputs": [],
    "source": [
     "ag3.plot_sample_location_mapbox(\n",
-    "  color='country',\n",
-    "  sample_sets=[\"3.0\"],\n",
-    "  sample_query=\"year > 2012\",\n",
-    "  zoom=2\n",
+    "    color=\"country\", sample_sets=[\"3.0\"], sample_query=\"year > 2012\", zoom=2\n",
     ")"
    ]
   },
@@ -130,9 +127,9 @@
    "outputs": [],
    "source": [
     "ag3.plot_sample_location_geo(\n",
-    "  color='country',\n",
-    "  sample_sets=[\"3.0\"],\n",
-    "  sample_query=\"year > 2012\",\n",
+    "    color=\"country\",\n",
+    "    sample_sets=[\"3.0\"],\n",
+    "    sample_query=\"year > 2012\",\n",
     ")"
    ]
   },
@@ -196,10 +193,7 @@
    "outputs": [],
    "source": [
     "af1.plot_sample_location_mapbox(\n",
-    "  color='country',\n",
-    "  sample_sets=[\"1.0\"],\n",
-    "  sample_query=\"year > 2015\",\n",
-    "  zoom=2\n",
+    "    color=\"country\", sample_sets=[\"1.0\"], sample_query=\"year > 2015\", zoom=2\n",
     ")"
    ]
   },
@@ -210,9 +204,9 @@
    "outputs": [],
    "source": [
     "af1.plot_sample_location_geo(\n",
-    "  color='country',\n",
-    "  sample_sets=[\"1.0\"],\n",
-    "  sample_query=\"year > 2015\",\n",
+    "    color=\"country\",\n",
+    "    sample_sets=[\"1.0\"],\n",
+    "    sample_query=\"year > 2015\",\n",
     ")"
    ]
   },
diff --git a/tests/anoph/test_distance_errors.py b/tests/anoph/test_distance_errors.py
new file mode 100644
index 000000000..0d224d15d
--- /dev/null
+++ b/tests/anoph/test_distance_errors.py
@@ -0,0 +1,66 @@
+import pytest
+from malariagen_data import ag3 as _ag3
+from malariagen_data.anoph.distance import AnophelesDistanceAnalysis
+
+
+@pytest.fixture
+def ag3_sim_api(ag3_sim_fixture):
+    return AnophelesDistanceAnalysis(
+        url=ag3_sim_fixture.url,
+        public_url=ag3_sim_fixture.url,
+        config_path=_ag3.CONFIG_PATH,
+        major_version_number=_ag3.MAJOR_VERSION_NUMBER,
+        major_version_path=_ag3.MAJOR_VERSION_PATH,
+        pre=True,
+        aim_metadata_dtype={
+            "aim_species_fraction_arab": "float64",
+            "aim_species_fraction_colu": "float64",
+            "aim_species_fraction_colu_no2l": "float64",
+            "aim_species_gambcolu_arabiensis": object,
+            "aim_species_gambiae_coluzzii": object,
+            "aim_species": object,
+        },
+        gff_gene_type="gene",
+        gff_gene_name_attribute="Name",
+        gff_default_attributes=("ID", "Parent", "Name", "description"),
+        default_site_mask="gamb_colu_arab",
+        results_cache=ag3_sim_fixture.results_cache_path.as_posix(),
+        taxon_colors=_ag3.TAXON_COLORS,
+        virtual_contigs=_ag3.VIRTUAL_CONTIGS,
+    )
+
+
+def test_plot_njt_no_samples(ag3_sim_api):
+    # Test with a query matching no samples.
+    with pytest.raises(ValueError) as e:
+        ag3_sim_api.plot_njt(
+            region="2L", n_snps=10, sample_query="sex_call == 'Impossible_Value'"
+        )
+    assert "No samples found for query" in str(
+        e.value
+    ) or "No relevant samples found" in str(e.value)
+
+
+def test_plot_njt_not_enough_snps(ag3_sim_api):
+    # Request more SNPs than available in the region
+    with pytest.raises(ValueError) as e:
+        ag3_sim_api.plot_njt(region="2L", n_snps=10000000, sample_query=None)
+    assert "Not enough SNPs." in str(e.value)
+    assert "Requested 10000000" in str(e.value)
+
+
+def test_plot_njt_one_sample(ag3_sim_api):
+    # Test with a query that returns only 1 sample.
+    # This should trigger the minimum sample check in plot_njt.
+
+    # First, find a sample so we can query for just one
+    df_samples = ag3_sim_api.sample_metadata()
+    sample_id = df_samples.iloc[0]["sample_id"]
+
+    with pytest.raises(ValueError) as e:
+        ag3_sim_api.plot_njt(
+            region="2L", n_snps=10, sample_query=f"sample_id == '{sample_id}'"
+        )
+    assert "Not enough samples for neighbour-joining tree" in str(e.value)
+    assert "Found 1" in str(e.value)
+    assert "needed at least 2" in str(e.value)
diff --git a/tests/anoph/test_pca.py b/tests/anoph/test_pca.py
index 4a603da39..13b0eec42 100644
--- a/tests/anoph/test_pca.py
+++ b/tests/anoph/test_pca.py
@@ -160,7 +160,7 @@ def test_pca_plotting(fixture, api: AnophelesPca):
     # Check sizes.
     assert len(pca_df) == ds.sizes["samples"]
     for i in range(n_components):
-        assert f"PC{i+1}" in pca_df.columns, (
+        assert f"PC{i + 1}" in pca_df.columns, (
             "n_components",
             n_components,
             "n_samples",
@@ -252,7 +252,7 @@ def test_pca_exclude_samples(fixture, api: AnophelesPca):
     # Check sizes.
     assert len(pca_df) == n_samples
     for i in range(n_components):
-        assert f"PC{i+1}" in pca_df.columns, (
+        assert f"PC{i + 1}" in pca_df.columns, (
             "n_components",
             n_components,
             "n_samples",
@@ -262,7 +262,7 @@ def test_pca_exclude_samples(fixture, api: AnophelesPca):
             "n_snps",
             n_snps,
         )
-    assert f"PC{n_components+1}" not in pca_df.columns
+    assert f"PC{n_components + 1}" not in pca_df.columns
     assert "pca_fit" in pca_df.columns
     assert pca_df["pca_fit"].all()
     assert pca_evr.ndim == 1
@@ -315,7 +315,7 @@ def test_pca_fit_exclude_samples(fixture, api: AnophelesPca):
     # Check sizes.
     assert len(pca_df) == n_samples
     for i in range(n_components):
-        assert f"PC{i+1}" in pca_df.columns, (
+        assert f"PC{i + 1}" in pca_df.columns, (
             "n_components",
             n_components,
             "n_samples",
@@ -325,7 +325,7 @@ def test_pca_fit_exclude_samples(fixture, api: AnophelesPca):
             "n_snps",
             n_snps,
         )
-    assert f"PC{n_components+1}" not in pca_df.columns
+    assert f"PC{n_components + 1}" not in pca_df.columns
     assert "pca_fit" in pca_df.columns
     assert pca_evr.ndim == 1
     assert pca_evr.shape[0] == n_components