Skip to content

Commit fa998c6

Browse files
committed
fix dask PerformanceWarning: Slicing is producing a large chunk
1 parent 5b9585c commit fa998c6

7 files changed

Lines changed: 21 additions & 0 deletions

File tree

PyStemmusScope/global_data/cams_co2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""Module for loading and validating the CAMS CO2 dataset."""
22
from pathlib import Path
33
from typing import Union
4+
import dask
45
import numpy as np
56
import xarray as xr
67
from PyStemmusScope.global_data import utils
78

9+
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
10+
dask.config.set(**{'array.slicing.split_large_chunks': True})
811

912
RESOLUTION_CAMS = 0.75 # Resolution of the dataset in degrees
1013

PyStemmusScope/global_data/cci_landcover.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""Module for loading and validating the ESA CCI land cover dataset."""
22
from pathlib import Path
33
from typing import Union
4+
import dask
45
import numpy as np
56
import pandas as pd
67
import xarray as xr
78
from PyStemmusScope.global_data import utils
89

10+
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
11+
dask.config.set(**{'array.slicing.split_large_chunks': True})
912

1013
RESOLUTION_CCI = 1 / 360 # Resolution of the dataset in degrees
1114
FILEPATH_LANDCOVER_TABLE = Path(__file__).parent / "assets" / "lccs_to_igbp_table.csv"

PyStemmusScope/global_data/copernicus_lai.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""Module for loading and validating the Copernicus LAI dataset."""
22
from pathlib import Path
33
from typing import Union
4+
import dask
45
import numpy as np
56
import xarray as xr
67
from PyStemmusScope.global_data import utils
78

9+
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
10+
dask.config.set(**{'array.slicing.split_large_chunks': True})
811

912
RESOLUTION_LAI = 1 / 112 # Resolution of the LAI dataset in degrees
1013

PyStemmusScope/global_data/era5.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22
from pathlib import Path
33
from typing import Literal
44
from typing import Union
5+
import dask
56
import numpy as np
67
import PyStemmusScope.variable_conversion as vc
78
import xarray as xr
89
from PyStemmusScope.global_data import utils
910

11+
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
12+
dask.config.set(**{'array.slicing.split_large_chunks': True})
1013

1114
ERA5_VARIABLES = ["u10", "v10", "mtpr", "sp", "ssrd", "strd"]
1215
ERA5LAND_VARIABLES = ["t2m", "d2m"]

PyStemmusScope/global_data/eth_canopy_height.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
import gzip
33
from pathlib import Path
44
from typing import Union
5+
import dask
56
import xarray as xr
67
from PyStemmusScope.global_data import utils
78

9+
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
10+
dask.config.set(**{'array.slicing.split_large_chunks': True})
811

912
MAX_DISTANCE = 0.01 # Maximum lat/lon distance to be considered nearby.
1013

PyStemmusScope/global_data/prism_dem.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
import gzip
33
from pathlib import Path
44
from typing import Union
5+
import dask
56
import xarray as xr
67
from PyStemmusScope.global_data import utils
78

9+
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
10+
dask.config.set(**{'array.slicing.split_large_chunks': True})
811

912
MAX_DISTANCE = 0.01 # Maximum lat/lon distance to be considered nearby. Approx 1km.
1013

PyStemmusScope/global_data/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
"""Utility funtions for the global data IO."""
22
from typing import Union
3+
import dask
34
import numpy as np
45
import xarray as xr
56

7+
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
8+
dask.config.set(**{'array.slicing.split_large_chunks': True})
69

710
class MissingDataError(Exception):
811
"""Error to be raised when requested data is missing."""

0 commit comments

Comments
 (0)