|
1 | 1 | import io |
2 | 2 | from itertools import cycle |
3 | | -from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union |
| 3 | +from typing import ( |
| 4 | + Any, |
| 5 | + Callable, |
| 6 | + Dict, |
| 7 | + List, |
| 8 | + Mapping, |
| 9 | + Optional, |
| 10 | + Sequence, |
| 11 | + Tuple, |
| 12 | + Union, |
| 13 | + cast, |
| 14 | +) |
4 | 15 | import warnings |
5 | | -from collections import defaultdict |
6 | 16 |
|
7 | 17 | import ipyleaflet # type: ignore |
8 | 18 | import numpy as np |
@@ -41,11 +51,21 @@ def __init__( |
41 | 51 | # data resources, and so column names and dtype need to be |
42 | 52 | # passed in as parameters. |
43 | 53 | self._aim_metadata_columns: Optional[List[str]] = None |
44 | | - # `dtype` of `dict[str, Any]` is incompatible with `read_csv` |
45 | | - self._aim_metadata_dtype: defaultdict[str, Any] = defaultdict() |
| 54 | + # `dtype` of `dict[str, Any]` is incompatible with `pd.read_csv` |
| 55 | + self._aim_metadata_dtype: Mapping[ |
| 56 | + str, Union[str, type, np.dtype, pd.api.extensions.ExtensionDtype] |
| 57 | + ] = {} |
46 | 58 | if isinstance(aim_metadata_dtype, Mapping): |
47 | 59 | self._aim_metadata_columns = list(aim_metadata_dtype.keys()) |
48 | | - self._aim_metadata_dtype.update(aim_metadata_dtype) |
| 60 | + self._aim_metadata_dtype.update( |
| 61 | + cast( |
| 62 | + Mapping[ |
| 63 | + str, |
| 64 | + Union[str, type, np.dtype, pd.api.extensions.ExtensionDtype], |
| 65 | + ], |
| 66 | + aim_metadata_dtype, |
| 67 | + ) |
| 68 | + ) |
49 | 69 | self._aim_metadata_dtype["sample_id"] = "object" |
50 | 70 |
|
51 | 71 | # Set up taxon colors. |
@@ -143,9 +163,14 @@ def _parse_general_metadata( |
143 | 163 | "longitude": "float64", |
144 | 164 | "sex_call": "object", |
145 | 165 | } |
146 | | - # `dtype` of `dict[str, str]` is incompatible with `read_csv` |
147 | | - dtype = defaultdict(str, dtype) |
148 | | - df = pd.read_csv(io.BytesIO(data), dtype=dtype, na_values="") |
| 166 | + # `dtype` of `dict[str, str]` is incompatible with `pd.read_csv` |
| 167 | + dtype_mapping = cast( |
| 168 | + Mapping[ |
| 169 | + str, Union[str, type, np.dtype, pd.api.extensions.ExtensionDtype] |
| 170 | + ], |
| 171 | + dtype, |
| 172 | + ) |
| 173 | + df = pd.read_csv(io.BytesIO(data), dtype=dtype_mapping, na_values="") |
149 | 174 |
|
150 | 175 | # Ensure all column names are lower case. |
151 | 176 | df.columns = [c.lower() for c in df.columns] # type: ignore |
@@ -349,12 +374,15 @@ def _parse_surveillance_flags( |
349 | 374 | # Specify the expected data type for each column. |
350 | 375 | # Note: "bool" is not nullable and does not support `NaN`, which is required when missing data. |
351 | 376 | # Otherwise `NaN` will be mis-translated to `True` when the dtype is applied to the DataFrame. |
352 | | - dtype = { |
| 377 | + dtype_dict = { |
353 | 378 | "sample_id": "object", |
354 | 379 | "is_surveillance": "boolean", |
355 | 380 | } |
356 | 381 | # `dtype` of `dict[str, str]` is incompatible with `read_csv` |
357 | | - dtype = defaultdict(str, dtype) |
| 382 | + dtype = cast( |
| 383 | + Mapping[str, Union[str, type, np.dtype, pd.api.extensions.ExtensionDtype]], |
| 384 | + dtype_dict, |
| 385 | + ) |
358 | 386 |
|
359 | 387 | if isinstance(data, bytes): |
360 | 388 | # Read the CSV data. |
|
0 commit comments