Skip to content

Commit e97a9f6

Browse files
Merge pull request #95 from EcoExtreML/fix_94
Setting chunks auto in open_mfdataset
2 parents 177b897 + bb3b7f9 commit e97a9f6

6 files changed

Lines changed: 25 additions & 21 deletions

File tree

PyStemmusScope/forcing_io.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Module for forcing data input and output operations."""
22
from pathlib import Path
3+
import dask
34
import hdf5storage
45
import numpy as np
56
import xarray as xr
@@ -114,7 +115,7 @@ def read_forcing_data_global( # noqa:PLR0913 (too many arguments)
114115
lon: float,
115116
start_time: np.datetime64,
116117
end_time: np.datetime64,
117-
timestep: str = "1800S",
118+
timestep: str = "1800s",
118119
) -> dict:
119120
"""Read forcing data for a certain location, based on global datasets.
120121
@@ -130,12 +131,14 @@ def read_forcing_data_global( # noqa:PLR0913 (too many arguments)
130131
Returns:
131132
Dictionary containing the forcing data.
132133
"""
133-
return global_data.collect_datasets(
134-
global_data_dir=global_data_dir,
135-
latlon=(lat, lon),
136-
time_range=(start_time, end_time),
137-
timestep=timestep,
138-
)
134+
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
135+
with dask.config.set(**{"array.slicing.split_large_chunks": True}): # type: ignore
136+
return global_data.collect_datasets(
137+
global_data_dir=global_data_dir,
138+
latlon=(lat, lon),
139+
time_range=(start_time, end_time),
140+
timestep=timestep,
141+
)
139142

140143

141144
def write_dat_files(data: dict, input_dir: Path):

PyStemmusScope/global_data/cams_co2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def retrieve_co2_data(
2222
latlon: Latitude and longitude of the site.
2323
time_range: Start and end time of the model run.
2424
timestep: Desired timestep of the model, this is derived from the forcing data.
25-
In a pandas-timedelta compatible format. For example: "1800S"
25+
In a pandas-timedelta compatible format. For example: "1800s"
2626
2727
Returns:
2828
DataArray containing the CO2 at the specified site for the given time range.
@@ -55,12 +55,12 @@ def extract_cams_data(
5555
latlon: Latitude and longitude of the site.
5656
time_range: Start and end time of the model run.
5757
timestep: Desired timestep of the model, this is derived from the forcing data.
58-
In a pandas-timedelta compatible format. For example: "1800S"
58+
In a pandas-timedelta compatible format. For example: "1800s"
5959
6060
Returns:
6161
DataArray containing the CO2 concentration.
6262
"""
63-
ds = xr.open_mfdataset(files_cams)
63+
ds = xr.open_mfdataset(files_cams, chunks="auto")
6464

6565
check_cams_dataset(cams_data=ds, latlon=latlon, time_range=time_range)
6666

PyStemmusScope/global_data/cci_landcover.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def retrieve_landcover_data(
2424
latlon: Latitude and longitude of the site.
2525
time_range: Start and end time of the model run.
2626
timestep: Desired timestep of the model, this is derived from the forcing data.
27-
In a pandas-timedelta compatible format. For example: "1800S"
27+
In a pandas-timedelta compatible format. For example: "1800s"
2828
2929
Returns:
3030
Dictionary containing IGBP and LCCS land cover classes.
@@ -57,12 +57,12 @@ def extract_landcover_data(
5757
latlon: Latitude and longitude of the site.
5858
time_range: Start and end time of the model run.
5959
timestep: Desired timestep of the model, this is derived from the forcing data.
60-
In a pandas-timedelta compatible format. For example: "1800S"
60+
In a pandas-timedelta compatible format. For example: "1800s"
6161
6262
Returns:
6363
Dictionary containing IGBP and LCCS land cover classes.
6464
"""
65-
cci_dataset = xr.open_mfdataset(files_cci)
65+
cci_dataset = xr.open_mfdataset(files_cci, chunks="auto")
6666

6767
check_cci_dataset(cci_dataset, latlon, time_range) # Assert spatial/temporal bounds
6868

PyStemmusScope/global_data/copernicus_lai.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def retrieve_lai_data(
2222
latlon: Latitude and longitude of the site.
2323
time_range: Start and end time of the model run.
2424
timestep: Desired timestep of the model, this is derived from the forcing data.
25-
In a pandas-timedelta compatible format. For example: "1800S"
25+
In a pandas-timedelta compatible format. For example: "1800s"
2626
2727
Returns:
2828
DataArray containing the LAI of the specified site for the given time range.
@@ -55,12 +55,12 @@ def extract_lai_data(
5555
latlon: Latitude and longitude of the site.
5656
time_range: Start and end time of the model run.
5757
timestep: Desired timestep of the model, this is derived from the forcing data.
58-
In a pandas-timedelta compatible format. For example: "1800S"
58+
In a pandas-timedelta compatible format. For example: "1800s"
5959
6060
Returns:
6161
DataArray containing the LAI of the specified site for the given time range.
6262
"""
63-
ds = xr.open_mfdataset(files_lai)
63+
ds = xr.open_mfdataset(files_lai, chunks="auto")
6464

6565
check_lai_dataset(ds, latlon, time_range)
6666

PyStemmusScope/global_data/era5.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def retrieve_era5_data(
2727
latlon: Latitude and longitude of the site.
2828
time_range: Start and end time of the model run.
2929
timestep: Desired timestep of the model, this is derived from the forcing data.
30-
In a pandas-timedelta compatible format. For example: "1800S"
30+
In a pandas-timedelta compatible format. For example: "1800s"
3131
3232
Returns:
3333
Dictionary containing the variables extracted from ERA5.
@@ -68,7 +68,7 @@ def load_era5_data(
6868
latlon: Latitude and longitude of the site.
6969
time_range: Start and end time of the model run.
7070
timestep: Desired timestep of the model, this is derived from the forcing data.
71-
In a pandas-timedelta compatible format. For example: "1800S"
71+
In a pandas-timedelta compatible format. For example: "1800s"
7272
7373
Returns:
7474
Dictionary containing the variables extracted from ERA5.
@@ -117,14 +117,15 @@ def get_era5_dataset(
117117
name: Either "ERA5" or "ERA5-land".
118118
time_range: Start and end time of the model run.
119119
timestep: Desired timestep of the model, this is derived from the forcing data.
120-
In a pandas-timedelta compatible format. For example: "1800S"
120+
In a pandas-timedelta compatible format. For example: "1800s"
121121
122122
Returns:
123123
The ERA5 or ERA5-land dataset.
124124
"""
125125
tol = RESOLUTION_ERA5 if name == "ERA5" else RESOLUTION_ERA5LAND
126126

127-
ds = xr.open_mfdataset(files)
127+
ds = xr.open_mfdataset(files, chunks="auto")
128+
128129
check_era5_dataset(ds, name, latlon, time_range)
129130

130131
try:

PyStemmusScope/global_data/global_data_selection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def collect_datasets(
2222
latlon: Latitude and longitude of the site.
2323
time_range: Start and end time of the model run.
2424
timestep: Desired timestep of the model, this is derived from the forcing data.
25-
In a pandas-timedelta compatible format. For example: "1800S"
25+
In a pandas-timedelta compatible format. For example: "1800s"
2626
2727
Returns:
2828
Dictionary containing the variables extracted from the global datasets.

0 commit comments

Comments
 (0)