Skip to content

Commit 46e0cfe

Browse files
committed
Add gene_labels and labels to plot_genes()
1 parent 20efee0 commit 46e0cfe

3 files changed

Lines changed: 164 additions & 2 deletions

File tree

malariagen_data/anoph/genome_features.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,8 @@ def plot_genes(
332332
x_range: Optional[gplt_params.x_range] = None,
333333
title: Optional[gplt_params.title] = None,
334334
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
335+
gene_labels: Optional[gplt_params.gene_labels] = None,
336+
labels: Optional[gplt_params.labels] = None,
335337
) -> gplt_params.figure:
336338
debug = self._log.debug
337339

@@ -407,6 +409,101 @@ def plot_genes(
407409
line_width=0,
408410
)
409411

412+
if gene_labels:
413+
debug("determine new figure height and range to accommodate gene labels")
414+
415+
# Increase the figure height by a certain factor, to accommodate labels.
416+
height_increase_factor = 1.2
417+
fig.height = int(fig.height * height_increase_factor)
418+
419+
# Get the original y_range.
420+
# Note: fig.y_range is not subscriptable.
421+
orig_y_range = fig.y_range.start, fig.y_range.end
422+
423+
# Determine the midpoint of the original range, to rescale outward from there.
424+
orig_mid_y_range = (orig_y_range[0] + orig_y_range[1]) / 2
425+
orig_y_range_extent = orig_y_range[1] - orig_y_range[0]
426+
427+
# Determine the new start and end points of the extended range.
428+
new_y_range_extent = orig_y_range_extent * height_increase_factor
429+
new_y_range_extent_half = new_y_range_extent / 2
430+
new_y_start = orig_mid_y_range - new_y_range_extent_half
431+
new_y_end = orig_mid_y_range + new_y_range_extent_half
432+
433+
# Set the new y_range.
434+
fig.y_range = bokeh.models.Range1d(new_y_start, new_y_end)
435+
436+
debug("determine midpoint of each gene rectangle")
437+
data["mid_x"] = (data["start"] + data["end"]) / 2
438+
439+
debug("make gene labels and pointers")
440+
441+
# Put gene_labels into a new column, where the gene_id matches.
442+
# Fill unmapped genes with empty strings, otherwise "NaN" would be displayed.
443+
data["gene_label"] = data["ID"].map(gene_labels).fillna("")
444+
445+
# Put gene pointers (▲ or ▼) in a new column, depending on the strand.
446+
# Except if the gene_label is null or an empty string, which should not be shown.
447+
data["gene_pointer"] = data.apply(
448+
lambda row: ("▼" if row["strand"] == "+" else "▲")
449+
if row["gene_label"]
450+
else "",
451+
axis=1,
452+
)
453+
454+
# Put the pointer above or below the gene rectangle, depending on + or - strand.
455+
neg_strand_pointer_y = orig_mid_y_range - 1.2
456+
pos_strand_pointer_y = orig_mid_y_range + 1.2
457+
data["pointer_y"] = data["strand"].apply(
458+
lambda strand: pos_strand_pointer_y
459+
if strand == "+"
460+
else neg_strand_pointer_y
461+
)
462+
463+
# Put the label above or below the gene rectangle, depending on + or - strand.
464+
neg_strand_label_y = orig_mid_y_range - 1.15
465+
pos_strand_label_y = orig_mid_y_range + 1.25
466+
data["label_y"] = data["strand"].apply(
467+
lambda strand: pos_strand_label_y
468+
if strand == "+"
469+
else neg_strand_label_y
470+
)
471+
472+
# Get the data as a ColumnDataSource.
473+
data_as_cds = bokeh.models.ColumnDataSource(data)
474+
475+
# Create a LabelSet for the gene pointers.
476+
gene_pointers_ls = bokeh.models.LabelSet(
477+
source=data_as_cds,
478+
x="mid_x",
479+
y="pointer_y",
480+
text="gene_pointer",
481+
text_align="center",
482+
text_baseline="middle",
483+
text_font_size="9pt",
484+
text_color="#444444",
485+
)
486+
487+
# Create a LabelSet for the gene labels.
488+
gene_labels_ls = bokeh.models.LabelSet(
489+
source=data_as_cds,
490+
x="mid_x",
491+
y="label_y",
492+
text="gene_label",
493+
text_align="left",
494+
text_baseline="middle",
495+
text_font_size="9pt",
496+
text_color="#444444",
497+
x_offset=8,
498+
)
499+
500+
# Add the markers and labels to the figure.
501+
fig.add_layout(gene_pointers_ls)
502+
fig.add_layout(gene_labels_ls)
503+
504+
if labels:
505+
fig.add_layout(labels)
506+
410507
debug("tidy up the plot")
411508
fig.ygrid.visible = False
412509
yticks = [0.4, 1.4]

malariagen_data/anoph/gplt_params.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,13 @@
113113
]
114114

115115
colors: TypeAlias = Annotated[Sequence[str], "List of colors."]
116+
117+
gene_labels: TypeAlias = Annotated[
118+
Mapping[str, str],
119+
"A mapping of gene identifiers to custom labels, which will appear in the plot.",
120+
]
121+
122+
labels: TypeAlias = Annotated[
123+
bokeh.models.LabelSet,
124+
"A LabelSet to use in the plot.",
125+
]

notebooks/plot_genes.ipynb

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,67 @@
241241
"id": "c27741d9",
242242
"metadata": {},
243243
"outputs": [],
244+
"source": [
245+
"ag3.plot_genes(\n",
246+
" region=\"2R\",\n",
247+
" gene_labels={\n",
248+
" \"AGAP001096\": \"far left + gene\",\n",
249+
" \"AGAP001099\": \"far left - gene\",\n",
250+
" \"AGAP002942\": \"central + gene\",\n",
251+
" \"AGAP002949\": \"central - gene\",\n",
252+
" \"AGAP004676\": \"far right + gene\",\n",
253+
" \"AGAP004674\": \"far right - gene\",\n",
254+
" }\n",
255+
")"
256+
]
257+
},
258+
{
259+
"cell_type": "code",
260+
"execution_count": null,
261+
"id": "523197c3",
262+
"metadata": {},
263+
"outputs": [],
264+
"source": [
265+
"import bokeh\n",
266+
"import pandas as pd\n",
267+
"data = pd.DataFrame.from_dict(\n",
268+
" [\n",
269+
" {'pos': 10_000_000, 'y': 2, 'label': 'Custom Label A'},\n",
270+
" {'pos': 30_000_000, 'y': 0.9, 'label': 'Custom Label B'},\n",
271+
" {'pos': 50_000_000, 'y': -0.1, 'label': 'Custom Label C'},\n",
272+
" ]\n",
273+
")\n",
274+
"data_as_cds = bokeh.models.ColumnDataSource(data)\n",
275+
"labels = bokeh.models.LabelSet(\n",
276+
" source=data_as_cds,\n",
277+
" x='pos',\n",
278+
" y='y',\n",
279+
" text='label',\n",
280+
" text_align='center',\n",
281+
" text_baseline='middle',\n",
282+
" text_font_size='9pt',\n",
283+
" text_color='blue',\n",
284+
")\n",
285+
"\n",
286+
"ag3.plot_genes(\n",
287+
" region=\"2R\",\n",
288+
" labels=labels,\n",
289+
" height=200,\n",
290+
")"
291+
]
292+
},
293+
{
294+
"cell_type": "code",
295+
"execution_count": null,
296+
"id": "e2a3a927",
297+
"metadata": {},
298+
"outputs": [],
244299
"source": []
245300
}
246301
],
247302
"metadata": {
248303
"kernelspec": {
249-
"display_name": "Python 3 (ipykernel)",
304+
"display_name": "mgen_data_py3.11",
250305
"language": "python",
251306
"name": "python3"
252307
},
@@ -260,7 +315,7 @@
260315
"name": "python",
261316
"nbconvert_exporter": "python",
262317
"pygments_lexer": "ipython3",
263-
"version": "3.10.12"
318+
"version": "3.11.5"
264319
},
265320
"widgets": {
266321
"application/vnd.jupyter.widget-state+json": {

0 commit comments

Comments
 (0)