diff --git a/malariagen_data/anoph/dipclust.py b/malariagen_data/anoph/dipclust.py
index d2852816f..92d69329e 100644
--- a/malariagen_data/anoph/dipclust.py
+++ b/malariagen_data/anoph/dipclust.py
@@ -88,112 +88,117 @@ def plot_diplotype_clustering(
distance_sort = False
# This is needed to avoid RecursionError on some clustering analyses
- # with larger numbers of nodes.
- sys.setrecursionlimit(10_000)
-
- # Load sample metadata.
- df_samples = self.sample_metadata(
- sample_sets=sample_sets,
- sample_query=sample_query,
- sample_query_options=sample_query_options,
- )
+ # with larger numbers of nodes. Save and restore the original limit to
+ # avoid permanently modifying global interpreter state.
+ _original_limit = sys.getrecursionlimit()
+ try:
+ sys.setrecursionlimit(10_000)
- dist, gt_samples, n_snps_used = self.diplotype_pairwise_distances(
- region=region,
- site_mask=site_mask,
- sample_sets=sample_sets,
- sample_query=sample_query,
- sample_query_options=sample_query_options,
- cohort_size=cohort_size,
- distance_metric=distance_metric,
- random_seed=random_seed,
- chunks=chunks,
- inline_array=inline_array,
- )
+ # Load sample metadata.
+ df_samples = self.sample_metadata(
+ sample_sets=sample_sets,
+ sample_query=sample_query,
+ sample_query_options=sample_query_options,
+ )
- # Align sample metadata with genotypes.
- df_samples = (
- df_samples.set_index("sample_id").loc[gt_samples.tolist()].reset_index()
- )
+ dist, gt_samples, n_snps_used = self.diplotype_pairwise_distances(
+ region=region,
+ site_mask=site_mask,
+ sample_sets=sample_sets,
+ sample_query=sample_query,
+ sample_query_options=sample_query_options,
+ cohort_size=cohort_size,
+ distance_metric=distance_metric,
+ random_seed=random_seed,
+ chunks=chunks,
+ inline_array=inline_array,
+ )
- # Normalise color and symbol parameters.
- symbol_prepped = self._setup_sample_symbol(
- data=df_samples,
- symbol=symbol,
- )
- del symbol
- (
- color_prepped,
- color_discrete_map_prepped,
- category_orders_prepped,
- ) = self._setup_sample_colors_plotly(
- data=df_samples,
- color=color,
- color_discrete_map=color_discrete_map,
- color_discrete_sequence=color_discrete_sequence,
- category_orders=category_orders,
- )
- del color
- del color_discrete_map
- del color_discrete_sequence
+ # Align sample metadata with genotypes.
+ df_samples = (
+ df_samples.set_index("sample_id").loc[gt_samples.tolist()].reset_index()
+ )
- # Configure hover data.
- hover_data = self._setup_sample_hover_data_plotly(
- color=color_prepped, symbol=symbol_prepped
- )
+ # Normalise color and symbol parameters.
+ symbol_prepped = self._setup_sample_symbol(
+ data=df_samples,
+ symbol=symbol,
+ )
+ del symbol
+ (
+ color_prepped,
+ color_discrete_map_prepped,
+ category_orders_prepped,
+ ) = self._setup_sample_colors_plotly(
+ data=df_samples,
+ color=color,
+ color_discrete_map=color_discrete_map,
+ color_discrete_sequence=color_discrete_sequence,
+ category_orders=category_orders,
+ )
+ del color
+ del color_discrete_map
+ del color_discrete_sequence
- # Construct plot title.
- if title is True:
- title_lines = []
- if sample_sets is not None:
- title_lines.append(f"Sample sets: {sample_sets}")
- if sample_query is not None:
- title_lines.append(f"Sample query: {sample_query}")
- title_lines.append(f"Genomic region: {region} ({n_snps_used:,} SNPs)")
- title = "
".join(title_lines)
-
- # Create the plot.
- with self._spinner("Plot dendrogram"):
- fig, leaf_data = _plot_dendrogram(
- dist=dist,
- linkage_method=linkage_method,
- count_sort=count_sort,
- distance_sort=distance_sort,
- render_mode=render_mode,
- width=width,
- height=height,
- title=title,
- line_width=line_width,
- line_color=line_color,
- marker_size=marker_size,
- leaf_data=df_samples,
- leaf_hover_name="sample_id",
- leaf_hover_data=hover_data,
- leaf_color=color_prepped,
- leaf_symbol=symbol_prepped,
- leaf_y=leaf_y,
- leaf_color_discrete_map=color_discrete_map_prepped,
- leaf_category_orders=category_orders_prepped,
- template="simple_white",
- y_axis_title=f"Distance ({distance_metric})",
- y_axis_buffer=0.1,
+ # Configure hover data.
+ hover_data = self._setup_sample_hover_data_plotly(
+ color=color_prepped, symbol=symbol_prepped
)
- # Tidy up.
- fig.update_layout(
- title_font=dict(
- size=title_font_size,
- ),
- legend=dict(itemsizing=legend_sizing, tracegroupgap=0),
- )
+ # Construct plot title.
+ if title is True:
+ title_lines = []
+ if sample_sets is not None:
+ title_lines.append(f"Sample sets: {sample_sets}")
+ if sample_query is not None:
+ title_lines.append(f"Sample query: {sample_query}")
+ title_lines.append(f"Genomic region: {region} ({n_snps_used:,} SNPs)")
+ title = "
".join(title_lines)
+
+ # Create the plot.
+ with self._spinner("Plot dendrogram"):
+ fig, leaf_data = _plot_dendrogram(
+ dist=dist,
+ linkage_method=linkage_method,
+ count_sort=count_sort,
+ distance_sort=distance_sort,
+ render_mode=render_mode,
+ width=width,
+ height=height,
+ title=title,
+ line_width=line_width,
+ line_color=line_color,
+ marker_size=marker_size,
+ leaf_data=df_samples,
+ leaf_hover_name="sample_id",
+ leaf_hover_data=hover_data,
+ leaf_color=color_prepped,
+ leaf_symbol=symbol_prepped,
+ leaf_y=leaf_y,
+ leaf_color_discrete_map=color_discrete_map_prepped,
+ leaf_category_orders=category_orders_prepped,
+ template="simple_white",
+ y_axis_title=f"Distance ({distance_metric})",
+ y_axis_buffer=0.1,
+ )
- if show: # pragma: no cover
- fig.show(renderer=renderer)
- return {
- "figure": fig,
- "dendro_sample_id_order": np.asarray(leaf_data["sample_id"].to_list()),
- "n_snps": n_snps_used,
- }
+ # Tidy up.
+ fig.update_layout(
+ title_font=dict(
+ size=title_font_size,
+ ),
+ legend=dict(itemsizing=legend_sizing, tracegroupgap=0),
+ )
+
+ if show: # pragma: no cover
+ fig.show(renderer=renderer)
+ return {
+ "figure": fig,
+ "dendro_sample_id_order": np.asarray(leaf_data["sample_id"].to_list()),
+ "n_snps": n_snps_used,
+ }
+ finally:
+ sys.setrecursionlimit(_original_limit)
def diplotype_pairwise_distances(
self,
diff --git a/malariagen_data/anoph/hapclust.py b/malariagen_data/anoph/hapclust.py
index 6a7000647..75eecc656 100644
--- a/malariagen_data/anoph/hapclust.py
+++ b/malariagen_data/anoph/hapclust.py
@@ -86,120 +86,127 @@ def plot_haplotype_clustering(
distance_sort = False
# This is needed to avoid RecursionError on some haplotype clustering analyses
- # with larger numbers of haplotypes.
- sys.setrecursionlimit(10_000)
+ # with larger numbers of haplotypes. Save and restore the original limit to
+ # avoid permanently modifying global interpreter state.
+ _original_limit = sys.getrecursionlimit()
+ try:
+ sys.setrecursionlimit(10_000)
- # Load sample metadata.
- df_samples = self.sample_metadata(
- sample_sets=sample_sets,
- sample_query=sample_query,
- sample_query_options=sample_query_options,
- )
+ # Load sample metadata.
+ df_samples = self.sample_metadata(
+ sample_sets=sample_sets,
+ sample_query=sample_query,
+ sample_query_options=sample_query_options,
+ )
- # Compute pairwise distances.
- dist, phased_samples, n_snps_used = self.haplotype_pairwise_distances(
- region=region,
- analysis=analysis,
- distance_metric=distance_metric,
- sample_sets=sample_sets,
- sample_query=sample_query,
- sample_query_options=sample_query_options,
- cohort_size=cohort_size,
- random_seed=random_seed,
- chunks=chunks,
- inline_array=inline_array,
- )
+ # Compute pairwise distances.
+ dist, phased_samples, n_snps_used = self.haplotype_pairwise_distances(
+ region=region,
+ analysis=analysis,
+ distance_metric=distance_metric,
+ sample_sets=sample_sets,
+ sample_query=sample_query,
+ sample_query_options=sample_query_options,
+ cohort_size=cohort_size,
+ random_seed=random_seed,
+ chunks=chunks,
+ inline_array=inline_array,
+ )
- # Align sample metadata with haplotypes.
- df_samples_phased = (
- df_samples.set_index("sample_id").loc[phased_samples.tolist()].reset_index()
- )
+ # Align sample metadata with haplotypes.
+ df_samples_phased = (
+ df_samples.set_index("sample_id")
+ .loc[phased_samples.tolist()]
+ .reset_index()
+ )
- # Normalise color and symbol parameters.
- symbol_prepped = self._setup_sample_symbol(
- data=df_samples_phased,
- symbol=symbol,
- )
- del symbol
- (
- color_prepped,
- color_discrete_map_prepped,
- category_orders_prepped,
- ) = self._setup_sample_colors_plotly(
- data=df_samples_phased,
- color=color,
- color_discrete_map=color_discrete_map,
- color_discrete_sequence=color_discrete_sequence,
- category_orders=category_orders,
- )
- del color
- del color_discrete_map
- del color_discrete_sequence
-
- # Repeat the dataframe so there is one row of metadata for each haplotype.
- df_haps = pd.DataFrame(np.repeat(df_samples_phased.values, 2, axis=0))
- df_haps.columns = df_samples_phased.columns
- leaf_data = df_haps.assign(sample_id=_make_unique(df_haps.sample_id))
-
- # Configure hover data.
- hover_data = self._setup_sample_hover_data_plotly(
- color=color_prepped, symbol=symbol_prepped
- )
+ # Normalise color and symbol parameters.
+ symbol_prepped = self._setup_sample_symbol(
+ data=df_samples_phased,
+ symbol=symbol,
+ )
+ del symbol
+ (
+ color_prepped,
+ color_discrete_map_prepped,
+ category_orders_prepped,
+ ) = self._setup_sample_colors_plotly(
+ data=df_samples_phased,
+ color=color,
+ color_discrete_map=color_discrete_map,
+ color_discrete_sequence=color_discrete_sequence,
+ category_orders=category_orders,
+ )
+ del color
+ del color_discrete_map
+ del color_discrete_sequence
+
+ # Repeat the dataframe so there is one row of metadata for each haplotype.
+ df_haps = pd.DataFrame(np.repeat(df_samples_phased.values, 2, axis=0))
+ df_haps.columns = df_samples_phased.columns
+ leaf_data = df_haps.assign(sample_id=_make_unique(df_haps.sample_id))
+
+ # Configure hover data.
+ hover_data = self._setup_sample_hover_data_plotly(
+ color=color_prepped, symbol=symbol_prepped
+ )
- # Construct plot title.
- if title is True:
- title_lines = []
- if sample_sets is not None:
- title_lines.append(f"Sample sets: {sample_sets}")
- if sample_query is not None:
- title_lines.append(f"Sample query: {sample_query}")
- title_lines.append(f"Genomic region: {region} ({n_snps_used:,} SNPs)")
- title = "
".join(title_lines)
+ # Construct plot title.
+ if title is True:
+ title_lines = []
+ if sample_sets is not None:
+ title_lines.append(f"Sample sets: {sample_sets}")
+ if sample_query is not None:
+ title_lines.append(f"Sample query: {sample_query}")
+ title_lines.append(f"Genomic region: {region} ({n_snps_used:,} SNPs)")
+ title = "
".join(title_lines)
+
+ # Create the plot.
+ with self._spinner("Plot dendrogram"):
+ fig, leaf_data = _plot_dendrogram(
+ dist=dist,
+ linkage_method=linkage_method,
+ count_sort=count_sort,
+ distance_sort=distance_sort,
+ render_mode=render_mode,
+ width=width,
+ height=height,
+ title=title,
+ line_width=line_width,
+ line_color=line_color,
+ marker_size=marker_size,
+ leaf_data=leaf_data,
+ leaf_hover_name="sample_id",
+ leaf_hover_data=hover_data,
+ leaf_color=color_prepped,
+ leaf_symbol=symbol_prepped,
+ leaf_y=leaf_y,
+ leaf_color_discrete_map=color_discrete_map_prepped,
+ leaf_category_orders=category_orders_prepped,
+ template="simple_white",
+ y_axis_title=f"Distance ({distance_metric})",
+ y_axis_buffer=1,
+ )
- # Create the plot.
- with self._spinner("Plot dendrogram"):
- fig, leaf_data = _plot_dendrogram(
- dist=dist,
- linkage_method=linkage_method,
- count_sort=count_sort,
- distance_sort=distance_sort,
- render_mode=render_mode,
- width=width,
- height=height,
- title=title,
- line_width=line_width,
- line_color=line_color,
- marker_size=marker_size,
- leaf_data=leaf_data,
- leaf_hover_name="sample_id",
- leaf_hover_data=hover_data,
- leaf_color=color_prepped,
- leaf_symbol=symbol_prepped,
- leaf_y=leaf_y,
- leaf_color_discrete_map=color_discrete_map_prepped,
- leaf_category_orders=category_orders_prepped,
- template="simple_white",
- y_axis_title=f"Distance ({distance_metric})",
- y_axis_buffer=1,
+ # Tidy up.
+ fig.update_layout(
+ title_font=dict(
+ size=title_font_size,
+ ),
+ legend=dict(itemsizing=legend_sizing, tracegroupgap=0),
)
- # Tidy up.
- fig.update_layout(
- title_font=dict(
- size=title_font_size,
- ),
- legend=dict(itemsizing=legend_sizing, tracegroupgap=0),
- )
-
- if show: # pragma: no cover
- fig.show(renderer=renderer)
- return {
- "figure": fig,
- "n_snps": n_snps_used,
- "dist": dist,
- "dist_samples": phased_samples,
- "leaf_data": leaf_data,
- }
+ if show: # pragma: no cover
+ fig.show(renderer=renderer)
+ return {
+ "figure": fig,
+ "n_snps": n_snps_used,
+ "dist": dist,
+ "dist_samples": phased_samples,
+ "leaf_data": leaf_data,
+ }
+ finally:
+ sys.setrecursionlimit(_original_limit)
@doc(
summary="""