Skip to content

Commit 031b99c

Browse files
YangBSchilperoortSarahAlidoost
authored
Added support to select location and time range (#50)
* get forcing file from location * check location format for bbox * get forcing file with given site name * add start time and end time * update forcing_filename in all relevant functions * slice forcing file based on the given time range * replace NumberOfTimeSteps with start/end time * fix save module * update run model notebook * fix save module * fix typo * fix old tests * fix linter * add tests for locations and time check * add test for get forcing file * test to check minutes * add doc string * fix linter * Update PyStemmusScope/forcing_io.py Co-authored-by: Bart Schilperoort <b.schilperoort@esciencecenter.nl> * revise regex and use parametrize for test * silent linter * Refactored loc parsing with re.findall. Support +/- numbers * Added support for "NA" start/end time input. * Pin prospector version * Fix linter/sonarcloud/pyroma issues * Apply Sarah's suggestions Co-authored-by: SarahAlidoost <55081872+SarahAlidoost@users.noreply.github.com> * Added "validate config" function to config_io * Refactoring to remove "forcing_filename" kwarg * Last changes based on Sarah's review, Updated notebook * Apply Sarah's suggestions from code review Co-authored-by: SarahAlidoost <55081872+SarahAlidoost@users.noreply.github.com> Co-authored-by: Bart Schilperoort <b.schilperoort@esciencecenter.nl> Co-authored-by: Bart Schilperoort <b.schilperoort@gmail.com> Co-authored-by: SarahAlidoost <55081872+SarahAlidoost@users.noreply.github.com>
1 parent 7e008ea commit 031b99c

18 files changed

Lines changed: 459 additions & 175 deletions

PyStemmusScope/config_io.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import os
88
import shutil
99
import time
10+
from pathlib import Path
11+
from typing import Union
1012
from . import utils
1113

1214

@@ -29,9 +31,24 @@ def read_config(path_to_config_file):
2931
(key, val) = line.split("=")
3032
config[key] = val.rstrip('\n')
3133

34+
validate_config(config)
35+
3236
return config
3337

34-
def create_io_dir(forcing_filename, config):
38+
39+
def validate_config(config: Union[Path, dict]):
40+
if isinstance(config, Path):
41+
config = read_config(config)
42+
elif not isinstance(config, dict):
43+
raise ValueError("The input to validate_config should be either a Path or dict"
44+
f" object, but a {type(config)} object was passed.")
45+
46+
# TODO: add check if the input data directories/file exist, and return clear error to user.
47+
_ = utils.check_location_fmt(config["Location"])
48+
utils.check_time_fmt(config["StartTime"], config["EndTime"])
49+
50+
51+
def create_io_dir(config):
3552
"""Create input directory and copy required files.
3653
3754
Work flow executor to create work directory and all sub-directories.
@@ -41,7 +58,12 @@ def create_io_dir(forcing_filename, config):
4158
"""
4259
# get start time with the format Y-M-D-HM
4360
timestamp = time.strftime('%Y-%m-%d-%H%M')
44-
station_name = forcing_filename.split('_')[0]
61+
62+
loc, fmt = utils.check_location_fmt(config["Location"])
63+
if fmt == "site":
64+
station_name = loc
65+
else:
66+
raise NotImplementedError()
4567

4668
# create input directory
4769
work_dir = utils.to_absolute_path(config['WorkDir'])
@@ -60,11 +82,12 @@ def create_io_dir(forcing_filename, config):
6082
logger.info("%s", message)
6183

6284
# update config file for ForcingFileName and InputPath
63-
config_file_path = _update_config_file(forcing_filename, input_dir, output_dir,
85+
config_file_path = _update_config_file(input_dir, output_dir,
6486
config, station_name, timestamp)
6587

6688
return str(input_dir), str(output_dir), config_file_path
6789

90+
6891
def _copy_data(input_dir, config):
6992
"""Copy required data to the work directory.
7093
@@ -83,7 +106,8 @@ def _copy_data(input_dir, config):
83106
# copy input_data.xlsx
84107
shutil.copy(str(config["input_data"]), str(input_dir))
85108

86-
def _update_config_file(nc_file, input_dir, output_dir, config, station_name, timestamp): #pylint: disable=too-many-arguments
109+
110+
def _update_config_file(input_dir, output_dir, config, station_name, timestamp):
87111
"""Update config file for each station.
88112
89113
Create config file for each forcing/station under the work directory.
@@ -102,9 +126,7 @@ def _update_config_file(nc_file, input_dir, output_dir, config, station_name, ti
102126
config_file_path = input_dir / f"{station_name}_{timestamp}_config.txt"
103127
with open(config_file_path, 'w', encoding="utf8") as f:
104128
for key, value in config.items():
105-
if key == "ForcingFileName":
106-
update_entry = f"{key}={nc_file}\n"
107-
elif key == "InputPath":
129+
if key == "InputPath":
108130
update_entry = f"{key}={str(input_dir)}/\n"
109131
elif key == "OutputPath":
110132
update_entry = f"{key}={str(output_dir)}/\n"

PyStemmusScope/forcing_io.py

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ def _write_matlab_ascii(fname, data, ncols):
2222
np.savetxt(fname, data, multi_fmt)
2323

2424

25-
def read_forcing_data(forcing_file):
25+
def read_forcing_data(forcing_file, start_time, end_time):
2626
"""Reads the forcing data from the provided netCDF file, and applies the required
2727
unit conversions before returning the read data.
2828
2929
Args:
3030
forcing_file (Path): Path to the netCDF file containing the forcing data
31+
start_time (str): Start of time range in ISO format string e.g. 'YYYY-MM-DDTHH:MM:SS'.
32+
end_time (str): End of time range in ISO format string e.g. 'YYYY-MM-DDTHH:MM:SS'.
3133
3234
Returns:
3335
dict: Dictionary containing the different variables required by STEMMUS_SCOPE
@@ -38,6 +40,14 @@ def read_forcing_data(forcing_file):
3840
# remove the x and y coordinates from the data variables to make the numpy arrays 1D
3941
ds_forcing = ds_forcing.squeeze(['x', 'y'])
4042

43+
# check if time range is covered by forcing
44+
# if so, return a subset of forcing matching the given time range
45+
ds_forcing = _slice_forcing_file(
46+
ds_forcing,
47+
start_time,
48+
end_time,
49+
)
50+
4151
data = {}
4252

4353
# Expected time format is days (as floating point) since Jan 1st 00:00.
@@ -138,7 +148,7 @@ def write_meteo_file(data, fname):
138148
_write_matlab_ascii(fname, meteo_file_data, ncols=len(meteo_data_vars))
139149

140150

141-
def prepare_global_variables(data, input_path, config):
151+
def prepare_global_variables(data, input_path):
142152
"""Function to read and calculate global variables for STEMMUS_SCOPE from the
143153
forcing data. Data will be written to a Matlab binary file (v7.3), under the name
144154
'forcing_globals.mat' in the specified input directory.
@@ -149,10 +159,7 @@ def prepare_global_variables(data, input_path, config):
149159
input_path (Path): Path to which the file should be written to.
150160
config (dict): The PyStemmusScope configuration dictionary.
151161
"""
152-
if config['NumberOfTimeSteps'] != 'NA':
153-
total_duration = min(int(config['NumberOfTimeSteps']), data['total_timesteps'])
154-
else:
155-
total_duration = data['total_timesteps']
162+
total_duration = data['total_timesteps']
156163

157164
matfile_vars = ['latitude', 'longitude', 'elevation', 'IGBP_veg_long',
158165
'reference_height', 'canopy_height', 'DELT', 'sitename']
@@ -165,8 +172,11 @@ def prepare_global_variables(data, input_path, config):
165172

166173

167174
def prepare_forcing(config):
168-
"""Function to prepare the forcing files required by STEMMUS_SCOPE. The input
169-
directory should be taken from the model configuration file.
175+
"""Function to prepare the forcing files required by STEMMUS_SCOPE.
176+
177+
The input directory should be taken from the model configuration file.
178+
A subset of forcing file will be generated if the time range is covered
179+
by the time of existing forcing file.
170180
171181
Args:
172182
config (dict): The PyStemmusScope configuration dictionary.
@@ -175,8 +185,8 @@ def prepare_forcing(config):
175185
input_path = Path(config["InputPath"])
176186

177187
# Read the required data from the forcing file into a dictionary
178-
forcing_file = Path(config["ForcingPath"]) / config["ForcingFileName"]
179-
data = read_forcing_data(forcing_file)
188+
forcing_file = utils.get_forcing_file(config)
189+
data = read_forcing_data(forcing_file, config["StartTime"], config["EndTime"])
180190

181191
# Write the single-column ascii '.dat' files to the input directory
182192
write_dat_files(data, input_path)
@@ -189,4 +199,36 @@ def prepare_forcing(config):
189199

190200
# Write the remaining variables (without time dependency) to the matlab v7.3
191201
# file 'forcing_globals.mat'
192-
prepare_global_variables(data, input_path, config)
202+
prepare_global_variables(data, input_path)
203+
204+
205+
def _slice_forcing_file(ds_forcing, start_time, end_time):
206+
"""Get the subset of forcing file based on time range in config
207+
208+
Also check if the desired time range is covered by forcing file.
209+
210+
Args:
211+
ds_forcing (xr.Dataset): Dataset of forcing file.
212+
start_time (str): Start of time range in ISO format string e.g. 'YYYY-MM-DDTHH:MM:SS'.
213+
If "NA", start time will be the first timestamp of the forcing input data.
214+
end_time (str): End of time range in ISO format string e.g. 'YYYY-MM-DDTHH:MM:SS'.
215+
If "NA", end time will be the last timestamp of the forcing input data.
216+
217+
Returns:
218+
Forcing dataset, sliced with the start and end time.
219+
"""
220+
start_time = None if start_time == "NA" else np.datetime64(start_time)
221+
end_time = None if end_time == "NA" else np.datetime64(end_time)
222+
223+
start_time_forcing = ds_forcing.coords["time"].values[0]
224+
end_time_forcing = ds_forcing.coords["time"].values[-1]
225+
226+
start_time_valid = start_time >= start_time_forcing if start_time else True
227+
end_time_valid = end_time <= end_time_forcing if end_time else True
228+
if not (start_time_valid and end_time_valid):
229+
raise ValueError(
230+
f"Given time range (from {start_time} to {end_time}) cannot be covered by"
231+
f"the time range of forcing file (from {start_time_forcing} to "
232+
f"{end_time_forcing}).")
233+
234+
return ds_forcing.sel(time=slice(start_time, end_time))

PyStemmusScope/save.py

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,16 @@
1515
`STEMMUS_SCOPE_Processing repository <https://github.com/EcoExtreML/STEMMUS_SCOPE_Processing>`_
1616
1717
"""
18-
1918
import logging
2019
from pathlib import Path
2120
from typing import Dict
2221
from typing import List
23-
from typing import Union
2422
import numpy as np
2523
import pandas as pd
2624
import xarray as xr
2725
from PyStemmusScope import config_io
2826
from PyStemmusScope import forcing_io
27+
from PyStemmusScope import utils
2928
from . import variable_conversion as vc
3029

3130

@@ -64,24 +63,6 @@ def _select_forcing_variables(forcing_dict: Dict, forcing_var: str, alma_var: st
6463
return data_array
6564

6665

67-
def _shorten_data_array(data: Union[xr.DataArray, xr.Dataset], time_steps: str)-> Union[xr.DataArray, xr.Dataset]:
68-
"""Shorten data based on time_steps.
69-
70-
Args:
71-
data(xr.DataArray or xr.Dataset): data to be shortend.
72-
time_steps(str): number of time steps to shorten.
73-
74-
Returns:
75-
xr.DataArray or xr.Dataset: subset of data with the lenght of time equal to time_steps.
76-
"""
77-
78-
if time_steps != "NA":
79-
time_length = int(time_steps)
80-
data = data.isel(time=np.arange(0, time_length))
81-
82-
return data
83-
84-
8566
def _prepare_soil_data(file_name: str, var_name: str, time: List) -> xr.DataArray:
8667
"""Return simulated soil temperature and soil moisture as `xr.DataArray`.
8768
@@ -250,7 +231,7 @@ def _update_dataset_attrs_dims(dataset: xr.Dataset, forcing_dict: Dict) -> xr.Da
250231

251232
return dataset
252233

253-
234+
#pylint: disable=too-many-locals
254235
def to_netcdf(config_file: str, cf_filename: str) -> str:
255236
"""Save csv files generated by Stemmus_Scope model to a netcdf file using
256237
information provided by ALMA conventions.
@@ -262,9 +243,8 @@ def to_netcdf(config_file: str, cf_filename: str) -> str:
262243
Returns:
263244
str: path to a csv file under the output directory.
264245
"""
265-
266-
# read config file
267246
config = config_io.read_config(config_file)
247+
forcing_filename = utils.get_forcing_file(config)
268248

269249
# list of required forcing variables, Alma_short_name: forcing_io_name, # model_name
270250
var_names = {
@@ -280,11 +260,13 @@ def to_netcdf(config_file: str, cf_filename: str) -> str:
280260

281261
# read forcing file into a dict
282262
forcing_dict = forcing_io.read_forcing_data(
283-
Path(config["ForcingPath"]) / config["ForcingFileName"]
263+
forcing_filename,
264+
config["StartTime"],
265+
config["EndTime"],
284266
)
285267

286268
# get time info
287-
time = _shorten_data_array(forcing_dict["time"], config["NumberOfTimeSteps"])
269+
time = forcing_dict["time"].values
288270

289271
# read convention file
290272
conventions = pd.read_csv(cf_filename)
@@ -298,14 +280,13 @@ def to_netcdf(config_file: str, cf_filename: str) -> str:
298280
if alma_name in var_names:
299281
# select data
300282
data_array = _select_forcing_variables(forcing_dict, var_names[alma_name], alma_name)
301-
data_array = _shorten_data_array(data_array, config["NumberOfTimeSteps"])
302283

303284
# create data array
304285
elif alma_name in {"SoilTemp", "SoilMoist"}:
305-
data_array = _prepare_soil_data(file_name, alma_name, time.values)
286+
data_array = _prepare_soil_data(file_name, alma_name, time)
306287
else:
307288
data_array = _prepare_simulated_data(
308-
file_name, df["short_name_STEMMUS-SCOPE"], alma_name, time.values
289+
file_name, df["short_name_STEMMUS-SCOPE"], alma_name, time
309290
)
310291

311292
# update attributes of array
@@ -328,6 +309,6 @@ def to_netcdf(config_file: str, cf_filename: str) -> str:
328309

329310
# # save to nc file
330311
nc_filename = Path(config["OutputPath"]) / f"{Path(config['OutputPath']).stem}_STEMMUS_SCOPE.nc"
331-
332312
dataset.to_netcdf(path= nc_filename)
313+
333314
return str(nc_filename)

PyStemmusScope/soil_io.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,10 @@ def prepare_soil_data(config):
214214
config (dict): The PyStemmusScope configuration dictionary.
215215
"""
216216

217-
forcing_file = Path(config["ForcingPath"]) / config["ForcingFileName"]
217+
forcing_file = utils.get_forcing_file(config)
218218

219219
# Data missing at ID-Pag site. See github.com/EcoExtreML/STEMMUS_SCOPE/issues/77
220-
if config["ForcingFileName"].startswith("ID"):
220+
if config["Location"].startswith("ID"):
221221
lat, lon = -1., 112.
222222
else:
223223
lat, lon = _retrieve_latlon(forcing_file)

PyStemmusScope/stemmus_scope.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import subprocess
77
from pathlib import Path
88
from typing import Dict
9+
from typing import Tuple
910
from . import config_io
1011
from . import forcing_io
1112
from . import soil_io
@@ -111,9 +112,10 @@ def __init__(self, config_file: str, model_src_path: str, interpreter: str = Non
111112
def setup(
112113
self,
113114
WorkDir: str = None,
114-
ForcingFileName: str = None,
115-
NumberOfTimeSteps: str = None,
116-
) -> str:
115+
Location: str = None,
116+
StartTime: str = None,
117+
EndTime: str = None,
118+
) -> Tuple[str, str]:
117119
"""Configure model run.
118120
119121
1. Creates config file and input/output directories based on the config template.
@@ -122,34 +124,35 @@ def setup(
122124
Args:
123125
WorkDir: path to a directory where input/output directories should be created.
124126
ForcingFileName: forcing file name. Forcing file should be in netcdf format.
125-
NumberOfTimeSteps: total number of time steps in which model runs. It can be
126-
`NA` or a number. Example `10` runs the model for 10 time steps.
127+
StartTime: Start time of the model run. It must be in
128+
ISO format (e.g. 2007-01-01T00:00).
129+
EndTime: End time of the model run. It must be in ISO format (e.g. 2007-01-01T00:00).
127130
128131
Returns:
129-
Paths to config file and input/output directories
132+
Path to the config file
130133
"""
131134
# update config template if needed
132135
if WorkDir:
133136
self._config["WorkDir"] = WorkDir
134137

135-
if ForcingFileName:
136-
self._config["ForcingFileName"] = ForcingFileName
138+
if Location:
139+
self._config["Location"] = Location
137140

138-
if NumberOfTimeSteps:
139-
self._config["NumberOfTimeSteps"] = NumberOfTimeSteps
141+
if StartTime:
142+
self._config["StartTime"] = StartTime
143+
144+
if EndTime:
145+
self._config["EndTime"] = EndTime
146+
147+
# validate config *before* directory creation
148+
config_io.validate_config(self._config)
140149

141150
# create customized config file and input/output directories for model run
142-
_, _, self.cfg_file = config_io.create_io_dir(
143-
self._config["ForcingFileName"], self._config
144-
)
151+
_, _, self.cfg_file = config_io.create_io_dir(self._config)
145152

146-
# read the run config file
147153
self._config = config_io.read_config(self.cfg_file)
148154

149-
# prepare forcing data
150155
forcing_io.prepare_forcing(self._config)
151-
152-
# prepare soil data
153156
soil_io.prepare_soil_data(self._config)
154157

155158
return str(self.cfg_file)
@@ -192,7 +195,6 @@ def run(self) -> str:
192195
result = _run_sub_process(args, self.model_src)
193196
return result
194197

195-
196198
@property
197199
def config(self) -> Dict:
198200
"""Return the configurations for this model."""

0 commit comments

Comments
 (0)