Skip to content

Commit 894ce4e

Browse files
authored
Test on more platforms (#41)
Testing on macOS, Windows, Python 3.10-3.12. We encounter strange issues in macOS, which look like a bug in python-geometric: the _first_ attempt to download a dataset fails, unless we have called sleep() first.
1 parent 387e686 commit 894ce4e

9 files changed

Lines changed: 225 additions & 147 deletions

File tree

.github/workflows/test.yml

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@ concurrency:
1616
cancel-in-progress: true
1717

1818
jobs:
19-
test_qek_ubuntu:
20-
name: Run unit/integration tests (ubuntu)
21-
runs-on: ubuntu-latest
19+
test_qek:
20+
name: Run unit/integration tests
2221
strategy:
2322
matrix:
24-
python-version: ["3.10", "3.11"]
23+
os: [ubuntu-latest, windows-latest, macos-latest]
24+
python-version: ["3.10", "3.11", "3.12"]
25+
runs-on: ${{ matrix.os }}
2526
steps:
2627
- name: Checkout QEK
2728
uses: actions/checkout@v4
@@ -32,6 +33,22 @@ jobs:
3233
- name: Install Hatch
3334
run: |
3435
pip install hatch
36+
- name: Pre-download dataset
37+
# On macOS (and only on macOS), we encounter a strange issue:
38+
#
39+
# - loading the dataset causes a FileNotFoundException to be raised
40+
# - checking afterwards, the dataset *is* on the disk
41+
# - rerunning the process, the dataset is found.
42+
#
43+
# The running hypothesis is that pytorch-geometric downloads the
44+
# dataset asynchronously, but fails to wait until the download is
45+
# complete.
46+
#
47+
# With this small script, we force the pipeline to wait until the
48+
# download is complete. So far, this seems to solve the issue.
49+
if: ${{ matrix.os == 'macos-latest' }}
50+
run: |
51+
hatch -v run before_tests
3552
- name: Run tests
3653
run: |
3754
hatch -v run test
@@ -44,11 +61,12 @@ jobs:
4461
if-no-files-found: ignore
4562

4663
test_notebook:
47-
name: Run the Jupyter notebook tutorials (ubuntu)
48-
runs-on: ubuntu-latest
64+
name: Run the Jupyter notebook tutorial (Unix)
4965
strategy:
5066
matrix:
51-
python-version: ["3.10", "3.11"]
67+
os: [ubuntu-latest, macos-latest]
68+
python-version: ["3.10", "3.11", "3.12"]
69+
runs-on: ${{ matrix.os }}
5270
steps:
5371
- name: Checkout QEK
5472
uses: actions/checkout@v4
@@ -59,20 +77,64 @@ jobs:
5977
- name: Install Hatch
6078
run: |
6179
pip install hatch
80+
- name: Pre-download dataset
81+
if: ${{ matrix.os == 'macos-latest' }}
82+
# On macOS (and only on macOS), we encounter a strange issue:
83+
#
84+
# - loading the dataset causes a FileNotFoundException to be raised
85+
# - checking afterwards, the dataset *is* on the disk
86+
# - rerunning the process, the dataset is found.
87+
#
88+
# The running hypothesis is that pytorch-geometric downloads the
89+
# dataset asynchronously, but fails to wait until the download is
90+
# complete.
91+
#
92+
# With this small script, we force the pipeline to wait until the
93+
# download is complete. So far, this seems to solve the issue.
94+
run: |
95+
hatch -v run before_tests
6296
- name: Copy samples
6397
run: |
6498
cp examples/ptcfm_processed_dataset.json .
65-
- name: Run notebook
99+
- name: Run notebooks
66100
run: |
67101
hatch run pip install jupyter
68102
for tutorial in examples/tutorial*.ipynb; do
69103
hatch run jupyter execute "$tutorial";
70104
done
71105
106+
test_notebook_windows:
107+
name: Run the Jupyter notebook tutorial (Windows)
108+
runs-on: ${{ matrix.os }}
109+
strategy:
110+
matrix:
111+
os: [windows-latest]
112+
python-version: ["3.10", "3.11", "3.12"]
113+
steps:
114+
- name: Checkout QEK
115+
uses: actions/checkout@v4
116+
- name: Set up Python ${{ matrix.python-version }}
117+
uses: actions/setup-python@v5
118+
with:
119+
python-version: ${{ matrix.python-version }}
120+
- name: Install Hatch
121+
run: |
122+
pip install hatch
123+
- name: Copy samples
124+
run: |
125+
cp examples/ptcfm_processed_dataset.json .
126+
- name: Run notebook
127+
run: |
128+
hatch run pip install jupyter
129+
$files = Get-ChildItem -Path examples -Include *.ipynb
130+
foreach($tutorial in $files) {
131+
hatch run jupyter execute "$tutorial"
132+
}
133+
72134
publish:
73135
name: Publish to PyPI
74136
if: startsWith(github.ref, 'refs/tags/v')
75-
needs: test_qek_ubuntu
137+
needs: test_qek
76138
runs-on: ubuntu-latest
77139
permissions:
78140
# IMPORTANT: this permission is mandatory for trusted publishing
@@ -108,7 +170,7 @@ jobs:
108170
deploy_docs:
109171
name: Deploy QEK docs (ubuntu)
110172
if: startsWith(github.ref, 'refs/tags/v')
111-
needs: test_qek_ubuntu
173+
needs: test_qek
112174
runs-on: ubuntu-latest
113175
steps:
114176
- name: Checkout QEK

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ dependencies = [
7474
]
7575

7676
[tool.hatch.envs.default.scripts]
77+
before_tests = "python scripts/before_tests.py"
7778
test = "pytest -n auto --cov-report=term-missing --cov-config=pyproject.toml --cov=qek --cov=tests --markdown-docs {args}"
7879
test_readme = "pytest --markdown-docs README.md"
7980

qek/data/extractors.py

Lines changed: 90 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from uuid import UUID
99
import time
1010
from typing import Any, Callable, Generator, Generic, Sequence, TypeVar, cast
11-
import emu_mps
1211
from numpy.typing import NDArray
1312
from pasqal_cloud import SDK
1413
from pasqal_cloud.batch import Batch
@@ -17,6 +16,7 @@
1716
from pasqal_cloud.utils.filters import BatchFilters
1817
from pathlib import Path
1918
import numpy as np
19+
import os
2020
import pulser as pl
2121
from pulser.devices import Device
2222
from pulser.json.abstract_repr.deserializer import deserialize_device
@@ -438,97 +438,102 @@ def run(self, max_qubits: int = 8) -> SyncExtracted:
438438
return result
439439

440440

441-
class EmuMPSExtractor(BaseExtractor[GraphType]):
442-
"""
443-
A Extractor that uses the emu-mps Emulator to run sequences compiled
444-
from graphs.
445-
446-
Performance note: emulating a quantum device on a classical
447-
computer requires considerable amount of resources, so this
448-
Extractor may be slow or require too much memory. If should,
449-
however, be faster than QutipExtractor in most cases.
441+
if os.name == "posix":
442+
# Any Unix including Linux and macOS
450443

451-
See also:
452-
- QPUExtractor (run on a physical QPU)
453-
454-
Args:
455-
path: Path to store the result of the run, for future uses.
456-
To reload the result of a previous run, use `LoadExtractor`.
457-
compiler: A graph compiler, in charge of converting graphs to Pulser Sequences,
458-
the format that can be executed on a quantum device.
459-
device: A device to use. For general experiments, the default
460-
device `AnalogDevice` is a perfectly reasonable choice.
461-
"""
462-
463-
def __init__(
464-
self,
465-
compiler: BaseGraphCompiler[GraphType],
466-
device: Device = pl.devices.AnalogDevice,
467-
path: Path | None = None,
468-
):
469-
super().__init__(device=device, compiler=compiler, path=path)
470-
self.graphs: list[BaseGraph]
471-
self.device = device
444+
import emu_mps
472445

473-
def run(self, max_qubits: int = 10, dt: int = 10) -> BaseExtracted:
446+
class EmuMPSExtractor(BaseExtractor[GraphType]):
474447
"""
475-
Run the compiled graphs.
448+
A Extractor that uses the emu-mps Emulator to run sequences compiled
449+
from graphs.
476450
477-
As emulating a quantum device is slow consumes resources and time exponential in the
478-
number of qubits, for the sake of performance, we limit the number of qubits in the execution
479-
of this extractor.
480-
481-
Args:
482-
max_qubits: Skip any sequence that require strictly more than `max_qubits`. Defaults to 8.
483-
dt: The duration of the simulation step, in us. Defaults to 10.
484-
485-
Returns:
486-
Processed data for all the sequences that were executed.
487-
"""
488-
if len(self.sequences) == 0:
489-
logger.warning("No sequences to run, did you forget to call compile()?")
490-
return SyncExtracted(raw_data=[], targets=[], sequences=[], states=[])
451+
Performance note: emulating a quantum device on a classical
452+
computer requires considerable amount of resources, so this
453+
Extractor may be slow or require too much memory. If should,
454+
however, be faster than QutipExtractor in most cases.
491455
492-
backend = emu_mps.MPSBackend()
493-
raw_data = []
494-
targets: list[int] = []
495-
sequences = []
496-
states = []
497-
for compiled in self.sequences:
498-
qubits_used = len(compiled.sequence.qubit_info)
499-
if qubits_used > max_qubits:
500-
logger.info(
501-
"Graph %s exceeds the qubit limit specified in EmuMPSExtractor (%s > %s), skipping",
502-
id,
503-
qubits_used,
504-
max_qubits,
505-
)
506-
continue
507-
logger.debug("Executing compiled graph # %s", id)
456+
See also:
457+
- QPUExtractor (run on a physical QPU)
508458
509-
# Configure observable.
510-
cutoff_duration = int(ceil(compiled.sequence.get_duration() / dt) * dt)
511-
observable = emu_mps.BitStrings(evaluation_times={cutoff_duration})
512-
config = emu_mps.MPSConfig(observables=[observable], dt=dt)
513-
counter: dict[str, Any] = backend.run(compiled.sequence, config)[observable.name][
514-
cutoff_duration
515-
]
516-
logger.debug("Execution of compiled graph # %s complete", id)
517-
raw_data.append(compiled.graph)
518-
if compiled.graph.target is not None:
519-
targets.append(compiled.graph.target)
520-
sequences.append(compiled.sequence)
521-
states.append(counter)
459+
Args:
460+
path: Path to store the result of the run, for future uses.
461+
To reload the result of a previous run, use `LoadExtractor`.
462+
compiler: A graph compiler, in charge of converting graphs to Pulser Sequences,
463+
the format that can be executed on a quantum device.
464+
device: A device to use. For general experiments, the default
465+
device `AnalogDevice` is a perfectly reasonable choice.
466+
"""
467+
468+
def __init__(
469+
self,
470+
compiler: BaseGraphCompiler[GraphType],
471+
device: Device = pl.devices.AnalogDevice,
472+
path: Path | None = None,
473+
):
474+
super().__init__(device=device, compiler=compiler, path=path)
475+
self.graphs: list[BaseGraph]
476+
self.device = device
477+
478+
def run(self, max_qubits: int = 10, dt: int = 10) -> BaseExtracted:
479+
"""
480+
Run the compiled graphs.
481+
482+
As emulating a quantum device is slow consumes resources and time exponential in the
483+
number of qubits, for the sake of performance, we limit the number of qubits in the execution
484+
of this extractor.
485+
486+
Args:
487+
max_qubits: Skip any sequence that require strictly more than `max_qubits`. Defaults to 8.
488+
dt: The duration of the simulation step, in us. Defaults to 10.
489+
490+
Returns:
491+
Processed data for all the sequences that were executed.
492+
"""
493+
if len(self.sequences) == 0:
494+
logger.warning("No sequences to run, did you forget to call compile()?")
495+
return SyncExtracted(raw_data=[], targets=[], sequences=[], states=[])
496+
497+
backend = emu_mps.MPSBackend()
498+
raw_data = []
499+
targets: list[int] = []
500+
sequences = []
501+
states = []
502+
for compiled in self.sequences:
503+
qubits_used = len(compiled.sequence.qubit_info)
504+
if qubits_used > max_qubits:
505+
logger.info(
506+
"Graph %s exceeds the qubit limit specified in EmuMPSExtractor (%s > %s), skipping",
507+
id,
508+
qubits_used,
509+
max_qubits,
510+
)
511+
continue
512+
logger.debug("Executing compiled graph # %s", id)
513+
514+
# Configure observable.
515+
cutoff_duration = int(ceil(compiled.sequence.get_duration() / dt) * dt)
516+
observable = emu_mps.BitStrings(evaluation_times={cutoff_duration})
517+
config = emu_mps.MPSConfig(observables=[observable], dt=dt)
518+
counter: dict[str, Any] = backend.run(compiled.sequence, config)[observable.name][
519+
cutoff_duration
520+
]
521+
logger.debug("Execution of compiled graph # %s complete", id)
522+
raw_data.append(compiled.graph)
523+
if compiled.graph.target is not None:
524+
targets.append(compiled.graph.target)
525+
sequences.append(compiled.sequence)
526+
states.append(counter)
522527

523-
logger.debug("Emulation step complete, %s compiled graphs executed", len(raw_data))
528+
logger.debug("Emulation step complete, %s compiled graphs executed", len(raw_data))
524529

525-
result = SyncExtracted(
526-
raw_data=raw_data, targets=targets, sequences=sequences, states=states
527-
)
528-
logger.debug("Emulation step complete, %s compiled graphs executed", len(raw_data))
529-
if self.path is not None:
530-
result.save_dataset(self.path)
531-
return result
530+
result = SyncExtracted(
531+
raw_data=raw_data, targets=targets, sequences=sequences, states=states
532+
)
533+
logger.debug("Emulation step complete, %s compiled graphs executed", len(raw_data))
534+
if self.path is not None:
535+
result.save_dataset(self.path)
536+
return result
532537

533538

534539
# How many seconds to sleep while waiting for the results from the cloud.
@@ -781,7 +786,7 @@ def run(
781786
"""
782787
Launch the extraction.
783788
"""
784-
raise Exception("Not implemented")
789+
raise NotImplementedError()
785790

786791
def _run(
787792
self,

qek/main.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

scripts/before_tests.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from time import sleep
2+
import torch_geometric.datasets as pyg_dataset
3+
4+
5+
if __name__ == "__main__":
6+
# Attempt to force download of the PTC_FM dataset, which has difficulties on some
7+
# platforms. We suspect that it's a race condition somewhere in pytorch geometric.
8+
exn = None
9+
for i in range(0, 10):
10+
sleep(i * i)
11+
try:
12+
print(f"Attempt {i+1} to download dataset")
13+
pyg_dataset.TUDataset(root="dataset", name="PTC_FM")
14+
print(f"Attempt {i+1} to download dataset succeeded")
15+
exn = None
16+
break
17+
except FileNotFoundError as e:
18+
print(f"Attempt {i+1} to download failed: {e}")
19+
exn = e
20+
if exn is not None:
21+
raise exn

0 commit comments

Comments
 (0)