pasqal-io
diff --git a/‎examples/tutorial.ipynb‎
Lines changed: 87 additions & 47 deletions b/‎examples/tutorial.ipynb‎
Lines changed: 87 additions & 47 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎qek/data/dataset.py‎
Lines changed: 133 additions & 4 deletions b/‎qek/data/dataset.py‎
Lines changed: 133 additions & 4 deletions
diff --git a/‎qek/data/datatools.py‎
Lines changed: 8 additions & 57 deletions b/‎qek/data/datatools.py‎
Lines changed: 8 additions & 57 deletions
@@ -28,15 +28,14 @@ classifiers = [
 # always specify a version for each package
 # to maintain consistency
 dependencies = [
-  "matplotlib",
   "networkx",
   "numpy",
   "pulser==1.1.1",
   "rdkit",
   "scikit-learn",
   "torch",
   "torch_geometric",
-  "tqdm",
+  "matplotlib",
 ]
 
 [tool.hatch.metadata]
@@ -46,6 +45,7 @@ allow-ambiguous-features = true
 [project.optional-dependencies]
 extras = [
   "jupyter",
+  "tqdm",
 ]
 
 [project.urls]
 
@@ -1,13 +1,13 @@
 from __future__ import annotations
 
+import collections
 import json
-from dataclasses import dataclass
+import matplotlib
 
 import numpy as np
 import pulser as pl
 
 
-@dataclass
 class ProcessedData:
     """
     Data on a single graph obtained from the Quantum Device.
@@ -37,10 +37,14 @@ class ProcessedData:
 
     sequence: pl.Sequence
     state_dict: dict[str, int]
+    _dist_excitation: np.ndarray
     target: int
 
-    def __post_init__(self) -> None:
-        self.state_dict = _convert_np_int64_to_int(data=self.state_dict)
+    def __init__(self, sequence: pl.Sequence, state_dict: dict[str, np.int64], target: int):
+        self.sequence = sequence
+        self.state_dict = _convert_np_int64_to_int(data=state_dict)
+        self._dist_excitation = dist_excitation(self.state_dict)
+        self.target = target
 
     def save_to_file(self, file_path: str) -> None:
         with open(file_path, "w") as file:
@@ -61,6 +65,20 @@ def load_from_file(cls, file_path: str) -> "ProcessedData":
                 target=tmp_data["target"],
             )
 
+    def dist_excitation(self, size: int | None = None) -> np.ndarray:
+        """
+        Return the distribution of excitations for this graph.
+
+        Arguments:
+            size: If specified, truncate or pad the array to this
+                size.
+        """
+        if size is None or size == len(self._dist_excitation):
+            return self._dist_excitation.copy()
+        if size < len(self._dist_excitation):
+            return np.resize(self._dist_excitation, size)
+        return np.pad(self._dist_excitation, (0, size - len(self._dist_excitation)))
+
     def draw_sequence(self) -> None:
         """
         Draw the sequence on screen
@@ -73,8 +91,119 @@ def draw_register(self) -> None:
         """
         self.sequence.register.draw(blockade_radius=self.sequence.device.min_atom_distance + 0.01)
 
+    def draw_excitation(self) -> None:
+        """
+        Draw an histogram for the excitation level on screen
+        """
+        x = [str(i) for i in range(len(self._dist_excitation))]
+        matplotlib.pyplot.bar(x, self._dist_excitation)
+
+
+def dist_excitation(state_dict: dict[str, int], size: int | None = None) -> np.ndarray:
+    """
+    Calculates the distribution of excitation energies from a dictionary of
+    bitstrings to their respective counts.
+
+    Args:
+        size (int | None): If specified, only keep `size` energy
+            distributions in the output. Otherwise, keep all values.
+
+    Returns:
+        A histogram of excitation energies.
+        - index: an excitation level (i.e. a number of `1` bits in a
+            bitstring)
+        - value: normalized count of samples with this excitation level.
+    """
+
+    if len(state_dict) == 0:
+        return np.ndarray(0)
+
+    if size is None:
+        # If size is not specified, it's the length of bitstrings.
+        # We assume that all bitstrings in `count_bitstring` have the
+        # same length and we have just checked that it's not empty.
+
+        # Pick the length of the first bitstring.
+        # We have already checked that `count_bitstring` is not empty.
+        bitstring = next(iter(state_dict.keys()))
+        size = len(bitstring)
+
+    # Make mypy realize that `size` is now always an `int`.
+    assert type(size) is int
+
+    count_occupation: dict[int, int] = collections.defaultdict(int)
+    total = 0.0
+    for bitstring, number in state_dict.items():
+        occupation = sum(1 for bit in bitstring if bit == "1")
+        count_occupation[occupation] += number
+        total += number
+
+    result = np.zeros(size + 1, dtype=float)
+    for occupation, count in count_occupation.items():
+        if occupation < size:
+            result[occupation] = count / total
+
+    return result
+
 
 def _convert_np_int64_to_int(data: dict[str, np.int64]) -> dict[str, int]:
+    """
+    Utility function: convert the values of a dict from `np.int64` to `int`,
+    for serialization purposes.
+    """
     return {
         key: (int(value) if isinstance(value, np.integer) else value) for key, value in data.items()
     }
+
+
+def save_dataset(dataset: list[ProcessedData], file_path: str) -> None:
+    """Saves a dataset to a JSON file.
+
+    Args:
+        dataset (list[ProcessedData]): The dataset to be saved, containing
+            RegisterData instances.
+        file_path (str): The path where the dataset will be saved as a JSON
+            file.
+
+    Note:
+        The data is stored in a format suitable for loading with load_dataset.
+
+    Returns:
+        None
+    """
+    with open(file_path, "w") as file:
+        data = [
+            {
+                "sequence": instance.sequence.to_abstract_repr(),
+                "state_dict": instance.state_dict,
+                "target": instance.target,
+            }
+            for instance in dataset
+        ]
+        json.dump(data, file)
+
+
+def load_dataset(file_path: str) -> list[ProcessedData]:
+    """Loads a dataset from a JSON file.
+
+    Args:
+        file_path (str): The path to the JSON file containing the dataset.
+
+    Note:
+        The data is loaded in the format that was used when saving with
+            save_dataset.
+
+    Returns:
+        A list of ProcessedData instances, corresponding to the data stored in
+            the JSON file.
+    """
+    with open(file_path) as file:
+        data = json.load(file)
+        return [
+            ProcessedData(
+                sequence=pl.Sequence.from_abstract_repr(item["sequence"]),
+                state_dict=item["state_dict"],
+                target=item["target"],
+            )
+            for item in data
+        ]
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import json
 from typing import Final
 
 import networkx as nx
@@ -13,7 +12,6 @@
 import torch_geometric.utils as pyg_utils
 from rdkit.Chem import AllChem
 
-from qek.data.dataset import ProcessedData
 from qek.utils import graph_to_mol
 
 
@@ -44,59 +42,6 @@ def split_train_test(
     return train, val
 
 
-def save_dataset(dataset: list[ProcessedData], file_path: str) -> None:
-    """Saves a dataset to a JSON file.
-
-    Args:
-        dataset (list[ProcessedData]): The dataset to be saved, containing
-            RegisterData instances.
-        file_path (str): The path where the dataset will be saved as a JSON
-            file.
-
-    Note:
-        The data is stored in a format suitable for loading with load_dataset.
-
-    Returns:
-        None
-    """
-    with open(file_path, "w") as file:
-        data = [
-            {
-                "sequence": instance.sequence.to_abstract_repr(),
-                "state_dict": instance.state_dict,
-                "target": instance.target,
-            }
-            for instance in dataset
-        ]
-        json.dump(data, file)
-
-
-def load_dataset(file_path: str) -> list[ProcessedData]:
-    """Loads a dataset from a JSON file.
-
-    Args:
-        file_path (str): The path to the JSON file containing the dataset.
-
-    Note:
-        The data is loaded in the format that was used when saving with
-            save_dataset.
-
-    Returns:
-        A list of ProcessedData instances, corresponding to the data stored in
-            the JSON file.
-    """
-    with open(file_path) as file:
-        data = json.load(file)
-        return [
-            ProcessedData(
-                sequence=pl.Sequence.from_abstract_repr(item["sequence"]),
-                state_dict=item["state_dict"],
-                target=item["target"],
-            )
-            for item in data
-        ]
-
-
 EPSILON_DISTANCE_UM = 0.01
 
 
@@ -157,8 +102,10 @@ def is_disk_graph(self, radius: float) -> bool:
         if len(self.nx_graph) == 0 or not nx.is_connected(self.nx_graph):
             return False
 
-        # Check the distances between all pairs of nodes.
         pos = self.pyg.pos
+        assert pos is not None
+
+        # Check the distances between all pairs of nodes.
         for u, v in nx.non_edges(self.nx_graph):
             distance_um = np.linalg.norm(np.array(pos[u]) - np.array(pos[v]))
             if distance_um <= radius:
@@ -204,7 +151,9 @@ def is_embeddable(self) -> bool:
             return False
 
         # Check the distance from the center
+
         pos = self.pyg.pos
+        assert pos is not None
         distance_from_center = np.linalg.norm(pos, ord=2, axis=-1)
         if any(distance_from_center > self.device.max_radial_distance):
             return False
@@ -228,7 +177,9 @@ def compute_register(self) -> pl.Register:
         Returns:
             pulser.Register: register
         """
-        return pl.Register.from_coordinates(coords=self.pyg.pos)
+        pos = self.pyg.pos
+        assert pos is not None
+        return pl.Register.from_coordinates(coords=pos)
 
     def compute_sequence(self) -> pl.Sequence:
         """