Skip to content

Commit 9295ec4

Browse files
authored
Merge branch 'master' into fix/issue-1303-cloud-storage-retry-backoff
2 parents 170ecf3 + 6e15613 commit 9295ec4

File tree

2 files changed

+216
-204
lines changed

2 files changed

+216
-204
lines changed

malariagen_data/anoph/dipclust.py

Lines changed: 103 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -88,112 +88,117 @@ def plot_diplotype_clustering(
8888
distance_sort = False
8989

9090
# This is needed to avoid RecursionError on some clustering analyses
91-
# with larger numbers of nodes.
92-
sys.setrecursionlimit(10_000)
93-
94-
# Load sample metadata.
95-
df_samples = self.sample_metadata(
96-
sample_sets=sample_sets,
97-
sample_query=sample_query,
98-
sample_query_options=sample_query_options,
99-
)
91+
# with larger numbers of nodes. Save and restore the original limit to
92+
# avoid permanently modifying global interpreter state.
93+
_original_limit = sys.getrecursionlimit()
94+
try:
95+
sys.setrecursionlimit(10_000)
10096

101-
dist, gt_samples, n_snps_used = self.diplotype_pairwise_distances(
102-
region=region,
103-
site_mask=site_mask,
104-
sample_sets=sample_sets,
105-
sample_query=sample_query,
106-
sample_query_options=sample_query_options,
107-
cohort_size=cohort_size,
108-
distance_metric=distance_metric,
109-
random_seed=random_seed,
110-
chunks=chunks,
111-
inline_array=inline_array,
112-
)
97+
# Load sample metadata.
98+
df_samples = self.sample_metadata(
99+
sample_sets=sample_sets,
100+
sample_query=sample_query,
101+
sample_query_options=sample_query_options,
102+
)
113103

114-
# Align sample metadata with genotypes.
115-
df_samples = (
116-
df_samples.set_index("sample_id").loc[gt_samples.tolist()].reset_index()
117-
)
104+
dist, gt_samples, n_snps_used = self.diplotype_pairwise_distances(
105+
region=region,
106+
site_mask=site_mask,
107+
sample_sets=sample_sets,
108+
sample_query=sample_query,
109+
sample_query_options=sample_query_options,
110+
cohort_size=cohort_size,
111+
distance_metric=distance_metric,
112+
random_seed=random_seed,
113+
chunks=chunks,
114+
inline_array=inline_array,
115+
)
118116

119-
# Normalise color and symbol parameters.
120-
symbol_prepped = self._setup_sample_symbol(
121-
data=df_samples,
122-
symbol=symbol,
123-
)
124-
del symbol
125-
(
126-
color_prepped,
127-
color_discrete_map_prepped,
128-
category_orders_prepped,
129-
) = self._setup_sample_colors_plotly(
130-
data=df_samples,
131-
color=color,
132-
color_discrete_map=color_discrete_map,
133-
color_discrete_sequence=color_discrete_sequence,
134-
category_orders=category_orders,
135-
)
136-
del color
137-
del color_discrete_map
138-
del color_discrete_sequence
117+
# Align sample metadata with genotypes.
118+
df_samples = (
119+
df_samples.set_index("sample_id").loc[gt_samples.tolist()].reset_index()
120+
)
139121

140-
# Configure hover data.
141-
hover_data = self._setup_sample_hover_data_plotly(
142-
color=color_prepped, symbol=symbol_prepped
143-
)
122+
# Normalise color and symbol parameters.
123+
symbol_prepped = self._setup_sample_symbol(
124+
data=df_samples,
125+
symbol=symbol,
126+
)
127+
del symbol
128+
(
129+
color_prepped,
130+
color_discrete_map_prepped,
131+
category_orders_prepped,
132+
) = self._setup_sample_colors_plotly(
133+
data=df_samples,
134+
color=color,
135+
color_discrete_map=color_discrete_map,
136+
color_discrete_sequence=color_discrete_sequence,
137+
category_orders=category_orders,
138+
)
139+
del color
140+
del color_discrete_map
141+
del color_discrete_sequence
144142

145-
# Construct plot title.
146-
if title is True:
147-
title_lines = []
148-
if sample_sets is not None:
149-
title_lines.append(f"Sample sets: {sample_sets}")
150-
if sample_query is not None:
151-
title_lines.append(f"Sample query: {sample_query}")
152-
title_lines.append(f"Genomic region: {region} ({n_snps_used:,} SNPs)")
153-
title = "<br>".join(title_lines)
154-
155-
# Create the plot.
156-
with self._spinner("Plot dendrogram"):
157-
fig, leaf_data = _plot_dendrogram(
158-
dist=dist,
159-
linkage_method=linkage_method,
160-
count_sort=count_sort,
161-
distance_sort=distance_sort,
162-
render_mode=render_mode,
163-
width=width,
164-
height=height,
165-
title=title,
166-
line_width=line_width,
167-
line_color=line_color,
168-
marker_size=marker_size,
169-
leaf_data=df_samples,
170-
leaf_hover_name="sample_id",
171-
leaf_hover_data=hover_data,
172-
leaf_color=color_prepped,
173-
leaf_symbol=symbol_prepped,
174-
leaf_y=leaf_y,
175-
leaf_color_discrete_map=color_discrete_map_prepped,
176-
leaf_category_orders=category_orders_prepped,
177-
template="simple_white",
178-
y_axis_title=f"Distance ({distance_metric})",
179-
y_axis_buffer=0.1,
143+
# Configure hover data.
144+
hover_data = self._setup_sample_hover_data_plotly(
145+
color=color_prepped, symbol=symbol_prepped
180146
)
181147

182-
# Tidy up.
183-
fig.update_layout(
184-
title_font=dict(
185-
size=title_font_size,
186-
),
187-
legend=dict(itemsizing=legend_sizing, tracegroupgap=0),
188-
)
148+
# Construct plot title.
149+
if title is True:
150+
title_lines = []
151+
if sample_sets is not None:
152+
title_lines.append(f"Sample sets: {sample_sets}")
153+
if sample_query is not None:
154+
title_lines.append(f"Sample query: {sample_query}")
155+
title_lines.append(f"Genomic region: {region} ({n_snps_used:,} SNPs)")
156+
title = "<br>".join(title_lines)
157+
158+
# Create the plot.
159+
with self._spinner("Plot dendrogram"):
160+
fig, leaf_data = _plot_dendrogram(
161+
dist=dist,
162+
linkage_method=linkage_method,
163+
count_sort=count_sort,
164+
distance_sort=distance_sort,
165+
render_mode=render_mode,
166+
width=width,
167+
height=height,
168+
title=title,
169+
line_width=line_width,
170+
line_color=line_color,
171+
marker_size=marker_size,
172+
leaf_data=df_samples,
173+
leaf_hover_name="sample_id",
174+
leaf_hover_data=hover_data,
175+
leaf_color=color_prepped,
176+
leaf_symbol=symbol_prepped,
177+
leaf_y=leaf_y,
178+
leaf_color_discrete_map=color_discrete_map_prepped,
179+
leaf_category_orders=category_orders_prepped,
180+
template="simple_white",
181+
y_axis_title=f"Distance ({distance_metric})",
182+
y_axis_buffer=0.1,
183+
)
189184

190-
if show: # pragma: no cover
191-
fig.show(renderer=renderer)
192-
return {
193-
"figure": fig,
194-
"dendro_sample_id_order": np.asarray(leaf_data["sample_id"].to_list()),
195-
"n_snps": n_snps_used,
196-
}
185+
# Tidy up.
186+
fig.update_layout(
187+
title_font=dict(
188+
size=title_font_size,
189+
),
190+
legend=dict(itemsizing=legend_sizing, tracegroupgap=0),
191+
)
192+
193+
if show: # pragma: no cover
194+
fig.show(renderer=renderer)
195+
return {
196+
"figure": fig,
197+
"dendro_sample_id_order": np.asarray(leaf_data["sample_id"].to_list()),
198+
"n_snps": n_snps_used,
199+
}
200+
finally:
201+
sys.setrecursionlimit(_original_limit)
197202

198203
def diplotype_pairwise_distances(
199204
self,

0 commit comments

Comments
 (0)