Skip to content

Commit d24b5c5

Browse files
jonbrenasYashsingh045
authored andcommitted
Merge branch 'master' into GH1223-seed-random
2 parents ae4e0ff + 10b360b commit d24b5c5

File tree

5 files changed

+177
-13
lines changed

5 files changed

+177
-13
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ To get setup for development, see [this video if you prefer VS Code](https://you
4949
For detailed setup instructions, see:
5050
- [Linux setup guide](LINUX_SETUP.md)
5151
- [macOS setup guide](MACOS_SETUP.md)
52+
- [Windows setup guide](WINDOWS_SETUP.md)
5253
- [Google Colab (TPU) setup guide](docs/source/colab_tpu_runtime.rst)
5354
Detailed instructions can be found in the [Contributors guide](https://github.com/malariagen/malariagen-data-python/blob/master/CONTRIBUTING.md).
5455

WINDOWS_SETUP.md

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Windows Setup Guide
2+
3+
To get setup for development on Windows, see
4+
[this video if you prefer VS Code](https://youtu.be/zddl3n1DCFM),
5+
or [this older video if you prefer PyCharm](https://youtu.be/QniQi-Hoo9A),
6+
and the instructions below.
7+
8+
## 1. Fork and clone this repo
9+
```bash
10+
git clone https://github.com/[username]/malariagen-data-python.git
11+
cd malariagen-data-python
12+
```
13+
14+
## 2. Install Python
15+
16+
Download and install Python 3.10 from the official website:
17+
https://www.python.org/downloads/windows/
18+
19+
During installation, check the box that says Add Python to PATH
20+
before clicking Install.
21+
22+
Verify the installation worked:
23+
```bash
24+
python --version
25+
```
26+
27+
## 3. Install pipx and poetry
28+
```bash
29+
python -m pip install --user pipx
30+
python -m pipx ensurepath
31+
pipx install poetry
32+
```
33+
34+
After running ensurepath, close and reopen PowerShell before continuing.
35+
36+
## 4. Create and activate development environment
37+
```bash
38+
poetry install
39+
poetry shell
40+
```
41+
42+
## 5. Install pre-commit hooks
43+
```bash
44+
pipx install pre-commit
45+
pre-commit install
46+
```
47+
48+
## 6. Add upstream remote and get latest code
49+
```bash
50+
git remote add upstream https://github.com/malariagen/malariagen-data-python
51+
git pull upstream master
52+
```
53+
54+
Note: On Windows the default branch is called master, not main.
55+
56+
## 7. Verify everything works
57+
```bash
58+
python -c "import malariagen_data; print('Setup successful!')"
59+
```
60+
61+
## Common Issues on Windows
62+
63+
**poetry not found after install**
64+
65+
Close and reopen PowerShell, then try again.
66+
67+
**git not recognized**
68+
69+
Install Git from https://git-scm.com/download/win
70+
and restart PowerShell.
71+
72+
**python not recognized**
73+
74+
Reinstall Python and make sure to check
75+
Add Python to PATH during installation.
76+
77+
**fatal: not a git repository**
78+
79+
Make sure you are inside the malariagen-data-python
80+
folder before running any git commands.
81+
```bash
82+
cd malariagen-data-python
83+
```
84+
85+
**error: pathspec main did not match**
86+
87+
On Windows use master instead of main.
88+
```bash
89+
git checkout master
90+
```

malariagen_data/util.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,9 @@ def __eq__(self, other):
570570
and (self.end == other.end)
571571
)
572572

573+
def __repr__(self):
574+
return f"Region({self._contig!r}, {self._start!r}, {self._end!r})"
575+
573576
def __str__(self):
574577
out = self._contig
575578
if self._start is not None or self._end is not None:
@@ -927,7 +930,20 @@ def _jitter(a, fraction, random_state=np.random):
927930

928931

929932
class CacheMiss(Exception):
930-
pass
933+
"""Raised when a requested item is not present in the cache."""
934+
935+
def __init__(self, key=None):
936+
self.key = key
937+
if key is not None:
938+
message = f"Cache miss for key: {key!r}"
939+
else:
940+
message = "Cache miss: requested item not found in cache."
941+
super().__init__(message)
942+
943+
def __repr__(self):
944+
if self.key is not None:
945+
return f"CacheMiss({self.key!r})"
946+
return "CacheMiss()"
931947

932948

933949
class LoggingHelper:
@@ -1531,12 +1547,10 @@ def _apply_allele_mapping(x, mapping, max_allele):
15311547

15321548
def _dask_apply_allele_mapping(v, mapping, max_allele):
15331549
if not isinstance(v, da.Array):
1534-
raise TypeError(
1535-
f"Expected v to be a dask.array.Array, " f"got {type(v).__name__}"
1536-
)
1550+
raise TypeError(f"Expected v to be a dask.array.Array, got {type(v).__name__}")
15371551
if not isinstance(mapping, np.ndarray):
15381552
raise TypeError(
1539-
f"Expected mapping to be a numpy.ndarray, " f"got {type(mapping).__name__}"
1553+
f"Expected mapping to be a numpy.ndarray, got {type(mapping).__name__}"
15401554
)
15411555
assert v.ndim == 2
15421556
assert mapping.ndim == 2
@@ -1558,12 +1572,10 @@ def _genotype_array_map_alleles(gt, mapping):
15581572
# N.B., scikit-allel does not handle empty blocks well, so we
15591573
# include some extra logic to handle that better.
15601574
if not isinstance(gt, np.ndarray):
1561-
raise TypeError(
1562-
f"Expected gt to be a numpy.ndarray, " f"got {type(gt).__name__}"
1563-
)
1575+
raise TypeError(f"Expected gt to be a numpy.ndarray, got {type(gt).__name__}")
15641576
if not isinstance(mapping, np.ndarray):
15651577
raise TypeError(
1566-
f"Expected mapping to be a numpy.ndarray, " f"got {type(mapping).__name__}"
1578+
f"Expected mapping to be a numpy.ndarray, got {type(mapping).__name__}"
15671579
)
15681580
assert gt.ndim == 3
15691581
assert mapping.ndim == 3
@@ -1585,11 +1597,11 @@ def _genotype_array_map_alleles(gt, mapping):
15851597
def _dask_genotype_array_map_alleles(gt, mapping):
15861598
if not isinstance(gt, da.Array):
15871599
raise TypeError(
1588-
f"Expected gt to be a dask.array.Array, " f"got {type(gt).__name__}"
1600+
f"Expected gt to be a dask.array.Array, got {type(gt).__name__}"
15891601
)
15901602
if not isinstance(mapping, np.ndarray):
15911603
raise TypeError(
1592-
f"Expected mapping to be a numpy.ndarray, " f"got {type(mapping).__name__}"
1604+
f"Expected mapping to be a numpy.ndarray, got {type(mapping).__name__}"
15931605
)
15941606
assert gt.ndim == 3
15951607
assert mapping.ndim == 2

tests/conftest.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
import random
21
import numpy as np
32
import pytest
43

54

65
@pytest.fixture(autouse=True, scope="session")
76
def seed_random():
8-
random.seed(42)
97
np.random.seed(42)

tests/test_util.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""Tests for Region.__repr__ and CacheMiss.__repr__ / default message."""
2+
3+
import pytest
4+
5+
from malariagen_data.util import CacheMiss, Region
6+
7+
8+
# ---------------------------------------------------------------------------
9+
# Region
10+
# ---------------------------------------------------------------------------
11+
12+
13+
def test_region_repr_contig_only():
14+
r = Region("2L")
15+
assert repr(r) == "Region('2L', None, None)"
16+
assert str(r) == "2L"
17+
18+
19+
def test_region_repr_with_coords():
20+
r = Region("2L", 100_000, 200_000)
21+
assert repr(r) == "Region('2L', 100000, 200000)"
22+
assert str(r) == "2L:100,000-200,000"
23+
24+
25+
def test_region_repr_in_list():
26+
regions = [Region("2L", 10, 20), Region("3R", 30, 40)]
27+
assert repr(regions) == "[Region('2L', 10, 20), Region('3R', 30, 40)]"
28+
29+
30+
def test_region_repr_start_only():
31+
r = Region("X", start=500, end=None)
32+
assert repr(r) == "Region('X', 500, None)"
33+
assert str(r) == "X:500-"
34+
35+
36+
# ---------------------------------------------------------------------------
37+
# CacheMiss
38+
# ---------------------------------------------------------------------------
39+
40+
41+
def test_cache_miss_no_key():
42+
cm = CacheMiss()
43+
assert repr(cm) == "CacheMiss()"
44+
assert "Cache miss" in str(cm)
45+
46+
47+
def test_cache_miss_string_key():
48+
cm = CacheMiss("my_key")
49+
assert repr(cm) == "CacheMiss('my_key')"
50+
assert "my_key" in str(cm)
51+
52+
53+
def test_cache_miss_tuple_key():
54+
cm = CacheMiss(("contig", 100))
55+
assert repr(cm) == "CacheMiss(('contig', 100))"
56+
assert "('contig', 100)" in str(cm)
57+
58+
59+
def test_cache_miss_is_exception():
60+
with pytest.raises(CacheMiss) as exc_info:
61+
raise CacheMiss("lookup_key")
62+
assert "lookup_key" in str(exc_info.value)
63+
assert repr(exc_info.value) == "CacheMiss('lookup_key')"

0 commit comments

Comments
 (0)