pasqal-io
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 72 additions & 10 deletions b/‎.github/workflows/test.yml‎
Lines changed: 72 additions & 10 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎qek/data/extractors.py‎
Lines changed: 90 additions & 85 deletions b/‎qek/data/extractors.py‎
Lines changed: 90 additions & 85 deletions
diff --git a/‎qek/main.py‎
Lines changed: 0 additions & 15 deletions b/‎qek/main.py‎
Lines changed: 0 additions & 15 deletions
diff --git a/‎scripts/before_tests.py‎
Lines changed: 21 additions & 0 deletions b/‎scripts/before_tests.py‎
Lines changed: 21 additions & 0 deletions
@@ -16,12 +16,13 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  test_qek_ubuntu:
-    name: Run unit/integration tests (ubuntu)
-    runs-on: ubuntu-latest
+  test_qek:
+    name: Run unit/integration tests
     strategy:
       matrix:
-        python-version: ["3.10", "3.11"]
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: ["3.10", "3.11", "3.12"]
+    runs-on: ${{ matrix.os }}
     steps:
     - name: Checkout QEK
       uses: actions/checkout@v4
@@ -32,6 +33,22 @@ jobs:
     - name: Install Hatch
       run: |
         pip install hatch
+    - name: Pre-download dataset
+        # On macOS (and only on macOS), we encounter a strange issue:
+        #
+        # - loading the dataset causes a FileNotFoundException to be raised
+        # - checking afterwards, the dataset *is* on the disk
+        # - rerunning the process, the dataset is found.
+        #
+        # The running hypothesis is that pytorch-geometric downloads the
+        # dataset asynchronously, but fails to wait until the download is
+        # complete.
+        #
+        # With this small script, we force the pipeline to wait until the
+        # download is complete. So far, this seems to solve the issue.
+      if: ${{ matrix.os == 'macos-latest' }}
+      run: |
+        hatch -v run before_tests
     - name: Run tests
       run: |
         hatch -v run test
@@ -44,11 +61,12 @@ jobs:
         if-no-files-found: ignore
 
   test_notebook:
-    name: Run the Jupyter notebook tutorials (ubuntu)
-    runs-on: ubuntu-latest
+    name: Run the Jupyter notebook tutorial (Unix)
     strategy:
       matrix:
-        python-version: ["3.10", "3.11"]
+        os: [ubuntu-latest, macos-latest]
+        python-version: ["3.10", "3.11", "3.12"]
+    runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout QEK
         uses: actions/checkout@v4
@@ -59,20 +77,64 @@ jobs:
       - name: Install Hatch
         run: |
           pip install hatch
+      - name: Pre-download dataset
+        if: ${{ matrix.os == 'macos-latest' }}
+        # On macOS (and only on macOS), we encounter a strange issue:
+        #
+        # - loading the dataset causes a FileNotFoundException to be raised
+        # - checking afterwards, the dataset *is* on the disk
+        # - rerunning the process, the dataset is found.
+        #
+        # The running hypothesis is that pytorch-geometric downloads the
+        # dataset asynchronously, but fails to wait until the download is
+        # complete.
+        #
+        # With this small script, we force the pipeline to wait until the
+        # download is complete. So far, this seems to solve the issue.
+        run: |
+          hatch -v run before_tests
       - name: Copy samples
         run: |
           cp examples/ptcfm_processed_dataset.json .
-      - name: Run notebook
+      - name: Run notebooks
         run: |
           hatch run pip install jupyter
           for tutorial in examples/tutorial*.ipynb; do
             hatch run jupyter execute "$tutorial";
           done
 
+  test_notebook_windows:
+    name: Run the Jupyter notebook tutorial (Windows)
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [windows-latest]
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+      - name: Checkout QEK
+        uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install Hatch
+        run: |
+          pip install hatch
+      - name: Copy samples
+        run: |
+          cp examples/ptcfm_processed_dataset.json .
+      - name: Run notebook
+        run: |
+          hatch run pip install jupyter
+          $files = Get-ChildItem -Path examples -Include *.ipynb
+          foreach($tutorial in $files) {
+            hatch run jupyter execute "$tutorial"
+          }
+
   publish:
     name: Publish to PyPI
     if: startsWith(github.ref, 'refs/tags/v')
-    needs: test_qek_ubuntu
+    needs: test_qek
     runs-on: ubuntu-latest
     permissions:
       # IMPORTANT: this permission is mandatory for trusted publishing
@@ -108,7 +170,7 @@ jobs:
   deploy_docs:
     name: Deploy QEK docs (ubuntu)
     if: startsWith(github.ref, 'refs/tags/v')
-    needs: test_qek_ubuntu
+    needs: test_qek
     runs-on: ubuntu-latest
     steps:
     - name: Checkout QEK
 
@@ -74,6 +74,7 @@ dependencies = [
 ]
 
 [tool.hatch.envs.default.scripts]
+before_tests = "python scripts/before_tests.py"
 test = "pytest -n auto --cov-report=term-missing --cov-config=pyproject.toml --cov=qek --cov=tests --markdown-docs {args}"
 test_readme = "pytest --markdown-docs README.md"
 
 
@@ -8,7 +8,6 @@
 from uuid import UUID
 import time
 from typing import Any, Callable, Generator, Generic, Sequence, TypeVar, cast
-import emu_mps
 from numpy.typing import NDArray
 from pasqal_cloud import SDK
 from pasqal_cloud.batch import Batch
@@ -17,6 +16,7 @@
 from pasqal_cloud.utils.filters import BatchFilters
 from pathlib import Path
 import numpy as np
+import os
 import pulser as pl
 from pulser.devices import Device
 from pulser.json.abstract_repr.deserializer import deserialize_device
@@ -438,97 +438,102 @@ def run(self, max_qubits: int = 8) -> SyncExtracted:
         return result
 
 
-class EmuMPSExtractor(BaseExtractor[GraphType]):
-    """
-    A Extractor that uses the emu-mps Emulator to run sequences compiled
-    from graphs.
-
-    Performance note: emulating a quantum device on a classical
-    computer requires considerable amount of resources, so this
-    Extractor may be slow or require too much memory. If should,
-    however, be faster than QutipExtractor in most cases.
+if os.name == "posix":
+    # Any Unix including Linux and macOS
 
-    See also:
-    - QPUExtractor (run on a physical QPU)
-
-    Args:
-        path: Path to store the result of the run, for future uses.
-            To reload the result of a previous run, use `LoadExtractor`.
-        compiler: A graph compiler, in charge of converting graphs to Pulser Sequences,
-            the format that can be executed on a quantum device.
-        device: A device to use. For general experiments, the default
-            device `AnalogDevice` is a perfectly reasonable choice.
-    """
-
-    def __init__(
-        self,
-        compiler: BaseGraphCompiler[GraphType],
-        device: Device = pl.devices.AnalogDevice,
-        path: Path | None = None,
-    ):
-        super().__init__(device=device, compiler=compiler, path=path)
-        self.graphs: list[BaseGraph]
-        self.device = device
+    import emu_mps
 
-    def run(self, max_qubits: int = 10, dt: int = 10) -> BaseExtracted:
+    class EmuMPSExtractor(BaseExtractor[GraphType]):
         """
-        Run the compiled graphs.
+        A Extractor that uses the emu-mps Emulator to run sequences compiled
+        from graphs.
 
-        As emulating a quantum device is slow consumes resources and time exponential in the
-        number of qubits, for the sake of performance, we limit the number of qubits in the execution
-        of this extractor.
-
-        Args:
-            max_qubits: Skip any sequence that require strictly more than `max_qubits`. Defaults to 8.
-            dt: The duration of the simulation step, in us. Defaults to 10.
-
-        Returns:
-            Processed data for all the sequences that were executed.
-        """
-        if len(self.sequences) == 0:
-            logger.warning("No sequences to run, did you forget to call compile()?")
-            return SyncExtracted(raw_data=[], targets=[], sequences=[], states=[])
+        Performance note: emulating a quantum device on a classical
+        computer requires considerable amount of resources, so this
+        Extractor may be slow or require too much memory. If should,
+        however, be faster than QutipExtractor in most cases.
 
-        backend = emu_mps.MPSBackend()
-        raw_data = []
-        targets: list[int] = []
-        sequences = []
-        states = []
-        for compiled in self.sequences:
-            qubits_used = len(compiled.sequence.qubit_info)
-            if qubits_used > max_qubits:
-                logger.info(
-                    "Graph %s exceeds the qubit limit specified in EmuMPSExtractor (%s > %s), skipping",
-                    id,
-                    qubits_used,
-                    max_qubits,
-                )
-                continue
-            logger.debug("Executing compiled graph # %s", id)
+        See also:
+        - QPUExtractor (run on a physical QPU)
 
-            # Configure observable.
-            cutoff_duration = int(ceil(compiled.sequence.get_duration() / dt) * dt)
-            observable = emu_mps.BitStrings(evaluation_times={cutoff_duration})
-            config = emu_mps.MPSConfig(observables=[observable], dt=dt)
-            counter: dict[str, Any] = backend.run(compiled.sequence, config)[observable.name][
-                cutoff_duration
-            ]
-            logger.debug("Execution of compiled graph # %s complete", id)
-            raw_data.append(compiled.graph)
-            if compiled.graph.target is not None:
-                targets.append(compiled.graph.target)
-            sequences.append(compiled.sequence)
-            states.append(counter)
+        Args:
+            path: Path to store the result of the run, for future uses.
+                To reload the result of a previous run, use `LoadExtractor`.
+            compiler: A graph compiler, in charge of converting graphs to Pulser Sequences,
+                the format that can be executed on a quantum device.
+            device: A device to use. For general experiments, the default
+                device `AnalogDevice` is a perfectly reasonable choice.
+        """
+
+        def __init__(
+            self,
+            compiler: BaseGraphCompiler[GraphType],
+            device: Device = pl.devices.AnalogDevice,
+            path: Path | None = None,
+        ):
+            super().__init__(device=device, compiler=compiler, path=path)
+            self.graphs: list[BaseGraph]
+            self.device = device
+
+        def run(self, max_qubits: int = 10, dt: int = 10) -> BaseExtracted:
+            """
+            Run the compiled graphs.
+
+            As emulating a quantum device is slow consumes resources and time exponential in the
+            number of qubits, for the sake of performance, we limit the number of qubits in the execution
+            of this extractor.
+
+            Args:
+                max_qubits: Skip any sequence that require strictly more than `max_qubits`. Defaults to 8.
+                dt: The duration of the simulation step, in us. Defaults to 10.
+
+            Returns:
+                Processed data for all the sequences that were executed.
+            """
+            if len(self.sequences) == 0:
+                logger.warning("No sequences to run, did you forget to call compile()?")
+                return SyncExtracted(raw_data=[], targets=[], sequences=[], states=[])
+
+            backend = emu_mps.MPSBackend()
+            raw_data = []
+            targets: list[int] = []
+            sequences = []
+            states = []
+            for compiled in self.sequences:
+                qubits_used = len(compiled.sequence.qubit_info)
+                if qubits_used > max_qubits:
+                    logger.info(
+                        "Graph %s exceeds the qubit limit specified in EmuMPSExtractor (%s > %s), skipping",
+                        id,
+                        qubits_used,
+                        max_qubits,
+                    )
+                    continue
+                logger.debug("Executing compiled graph # %s", id)
+
+                # Configure observable.
+                cutoff_duration = int(ceil(compiled.sequence.get_duration() / dt) * dt)
+                observable = emu_mps.BitStrings(evaluation_times={cutoff_duration})
+                config = emu_mps.MPSConfig(observables=[observable], dt=dt)
+                counter: dict[str, Any] = backend.run(compiled.sequence, config)[observable.name][
+                    cutoff_duration
+                ]
+                logger.debug("Execution of compiled graph # %s complete", id)
+                raw_data.append(compiled.graph)
+                if compiled.graph.target is not None:
+                    targets.append(compiled.graph.target)
+                sequences.append(compiled.sequence)
+                states.append(counter)
 
-        logger.debug("Emulation step complete, %s compiled graphs executed", len(raw_data))
+            logger.debug("Emulation step complete, %s compiled graphs executed", len(raw_data))
 
-        result = SyncExtracted(
-            raw_data=raw_data, targets=targets, sequences=sequences, states=states
-        )
-        logger.debug("Emulation step complete, %s compiled graphs executed", len(raw_data))
-        if self.path is not None:
-            result.save_dataset(self.path)
-        return result
+            result = SyncExtracted(
+                raw_data=raw_data, targets=targets, sequences=sequences, states=states
+            )
+            logger.debug("Emulation step complete, %s compiled graphs executed", len(raw_data))
+            if self.path is not None:
+                result.save_dataset(self.path)
+            return result
 
 
 # How many seconds to sleep while waiting for the results from the cloud.
@@ -781,7 +786,7 @@ def run(
         """
         Launch the extraction.
         """
-        raise Exception("Not implemented")
+        raise NotImplementedError()
 
     def _run(
         self,
 
@@ -0,0 +1,21 @@
+from time import sleep
+import torch_geometric.datasets as pyg_dataset
+
+
+if __name__ == "__main__":
+    # Attempt to force download of the PTC_FM dataset, which has difficulties on some
+    # platforms. We suspect that it's a race condition somewhere in pytorch geometric.
+    exn = None
+    for i in range(0, 10):
+        sleep(i * i)
+        try:
+            print(f"Attempt {i+1} to download dataset")
+            pyg_dataset.TUDataset(root="dataset", name="PTC_FM")
+            print(f"Attempt {i+1} to download dataset succeeded")
+            exn = None
+            break
+        except FileNotFoundError as e:
+            print(f"Attempt {i+1} to download failed: {e}")
+            exn = e
+    if exn is not None:
+        raise exn
Original file line number	Diff line number	Diff line change
`@@ -74,6 +74,7 @@ dependencies = [`
`74`	`74`	`]`
`75`	`75`
`76`	`76`	`[tool.hatch.envs.default.scripts]`
	`77`	`+before_tests = "python scripts/before_tests.py"`
`77`	`78`	`test = "pytest -n auto --cov-report=term-missing --cov-config=pyproject.toml --cov=qek --cov=tests --markdown-docs {args}"`
`78`	`79`	`test_readme = "pytest --markdown-docs README.md"`
`79`	`80`