Skip to content

Commit c88f248

Browse files
committed
Clearing the way to use haplotype_frequencies in the haplotype_frequencies code.
1 parent ed875a8 commit c88f248

4 files changed

Lines changed: 21 additions & 18 deletions

File tree

malariagen_data/anoph/h12.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from collections import Counter
21
from typing import Optional, Tuple, Dict, Mapping
32

43
import allel # type: ignore
@@ -7,7 +6,7 @@
76
import bokeh.plotting
87

98
from .hap_data import AnophelesHapData
10-
from ..util import hash_columns, check_types, CacheMiss
9+
from ..util import check_types, CacheMiss, haplotype_frequencies
1110
from . import base_params
1211
from . import h12_params, gplt_params, hap_params
1312

@@ -515,21 +514,11 @@ def plot_h12_gwss(
515514
return fig
516515

517516

518-
def haplotype_frequencies(h):
519-
"""Compute haplotype frequencies, returning a dictionary that maps
520-
haplotype hash values to frequencies."""
521-
n = h.shape[1]
522-
hashes = hash_columns(np.asarray(h))
523-
counts = Counter(hashes)
524-
freqs = {key: count / n for key, count in counts.items()}
525-
return freqs
526-
527-
528517
def garud_h12(ht):
529518
"""Compute Garud's H12."""
530519

531520
# Compute haplotype frequencies.
532-
frq_counter = haplotype_frequencies(ht)
521+
frq_counter, _, _ = haplotype_frequencies(ht)
533522

534523
# Convert to array of sorted frequencies.
535524
f = np.sort(np.fromiter(frq_counter.values(), dtype=float))[::-1]

malariagen_data/anoph/h1x.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@
66
import bokeh.plotting
77

88
from .hap_data import AnophelesHapData
9-
from ..util import check_types, CacheMiss
9+
from ..util import check_types, CacheMiss, haplotype_frequencies
1010
from . import base_params
1111
from . import h12_params, gplt_params, hap_params
12-
from .h12 import haplotype_frequencies
1312

1413

1514
class AnophelesH1XAnalysis(
@@ -342,8 +341,8 @@ def haplotype_joint_frequencies(ha, hb):
342341
"""Compute the joint frequency of haplotypes in two difference
343342
cohorts. Returns a dictionary mapping haplotype hash values to
344343
the product of frequencies in each cohort."""
345-
frqa = haplotype_frequencies(ha)
346-
frqb = haplotype_frequencies(hb)
344+
frqa, _, _ = haplotype_frequencies(ha)
345+
frqb, _, _ = haplotype_frequencies(hb)
347346
keys = set(frqa.keys()) | set(frqb.keys())
348347
joint_freqs = {key: frqa.get(key, 0) * frqb.get(key, 0) for key in keys}
349348
return joint_freqs

malariagen_data/util.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import re
55
import sys
66
import warnings
7+
from collections import Counter
78
from enum import Enum
89
from math import prod
910
from functools import wraps
@@ -1606,6 +1607,18 @@ def hash_columns(x):
16061607
return out
16071608

16081609

1610+
def haplotype_frequencies(h):
1611+
"""Compute haplotype frequencies, returning a dictionary that maps
1612+
haplotype hash values to frequencies."""
1613+
n = h.shape[1]
1614+
hashes = hash_columns(np.asarray(h))
1615+
count = Counter(hashes)
1616+
freqs = {key: count / n for key, count in count.items()}
1617+
counts = {key: count for key, count in count.items()}
1618+
nobs = {key: n for key, count in count.items()}
1619+
return freqs, counts, nobs
1620+
1621+
16091622
def distributed_client():
16101623
from distributed import get_client
16111624

tests/anoph/test_h12.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,10 @@ def test_haplotype_frequencies():
8787
dtype="i1",
8888
)
8989

90-
f = haplotype_frequencies(h1)
90+
f, c, o = haplotype_frequencies(h1)
9191
assert isinstance(f, dict)
92+
assert isinstance(c, dict)
93+
assert isinstance(o, dict)
9294
vals = np.array(list(f.values()))
9395
vals.sort()
9496
assert np.all(vals >= 0)

0 commit comments

Comments
 (0)