Skip to content

Commit 3d7daa4

Browse files
Merge pull request #23 from EcoExtreML/write_outputs
Write csv files in netcdf fromat
2 parents ff97fd8 + de8f8a8 commit 3d7daa4

8 files changed

Lines changed: 846 additions & 117 deletions

File tree

PyStemmusScope/forcing_io.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,13 @@ def read_forcing_data(forcing_file):
5757
data['rh'] = ds_forcing['RH']
5858
data['vpd'] = ds_forcing['VPD']
5959
data['lai'] = vc.mask_data(ds_forcing['LAI'], min_value=0.01)
60+
6061
# calculate ea, conversion from kPa to hPa:
6162
data['ea'] = vc.calculate_ea(data['t_air_celcius'], data['rh']) * 10
6263

6364
# Load in non-timedependent variables
6465
data['sitename'] = forcing_file.name.split('_')[0]
66+
6567
# Forcing data timestep size in seconds
6668
time_delta = (ds_forcing.time.values[1] -
6769
ds_forcing.time.values[0]) / np.timedelta64(1, 's')
@@ -75,6 +77,10 @@ def read_forcing_data(forcing_file):
7577
data['reference_height'] = ds_forcing['reference_height'].values
7678
data['canopy_height'] = ds_forcing['canopy_height'].values
7779

80+
# these are needed by save.py
81+
data['time'] = ds_forcing["time"]
82+
data['Qair'] = ds_forcing['Qair']
83+
7884
return data
7985

8086

@@ -142,10 +148,10 @@ def prepare_global_variables(data, input_path, config):
142148
input_path (Path): Path to which the file should be written to.
143149
config (dict): The PyStemmusScope configuration dictionary.
144150
"""
145-
if int(config['NumberOfTimeSteps']) > data['total_timesteps']:
146-
total_duration = data['total_timesteps']
151+
if config['NumberOfTimeSteps'] != 'NA':
152+
total_duration = min(int(config['NumberOfTimeSteps']), data['total_timesteps'])
147153
else:
148-
total_duration = int(config['NumberOfTimeSteps'])
154+
total_duration = data['total_timesteps']
149155

150156
matfile_vars = ['latitude', 'longitude', 'elevation', 'IGBP_veg_long',
151157
'reference_height', 'canopy_height', 'DELT', 'sitename']

PyStemmusScope/save.py

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
"""PyStemmusScope save module.
2+
3+
Module designed to create a netcdf file following `ALMA
4+
convention <https://web.lmd.jussieu.fr/~polcher/ALMA/>`_ from csv files following
5+
`SCOPE format <https://scope-model.readthedocs.io/en/latest/outfiles.html>`_ in
6+
the output directory.
7+
8+
The file
9+
`required_netcf_variables.csv <https://github.com/EcoExtreML/STEMMUS_SCOPE/blob/main/utils/csv_to_nc/required_netcf_variables.csv>`_
10+
lists required variable names and their attributes based on `ALMA+CF
11+
convention table <https://docs.google.com/spreadsheets/d/1CA3aTvI9piXqRqO-3MGrsH1vW-Sd87D8iZXHGrqK42o/edit#gid=2085475627>`_.
12+
13+
Example:
14+
See notebooks/run_model_in_notebook.ipynb in
15+
`STEMMUS_SCOPE_Processing repository <https://github.com/EcoExtreML/STEMMUS_SCOPE_Processing>`_
16+
17+
"""
18+
19+
import logging
20+
from pathlib import Path
21+
from typing import Dict
22+
from typing import List
23+
from typing import Union
24+
import numpy as np
25+
import pandas as pd
26+
import xarray as xr
27+
from PyStemmusScope import forcing_io
28+
from . import variable_conversion as vc
29+
30+
31+
logger = logging.getLogger(__name__)
32+
33+
DATASET_ATTRS = {
34+
'model': 'STEMMUS_SCOPE',
35+
'institution': 'University of Twente; Northwest A&F University',
36+
'contact': (
37+
'Zhongbo Su, z.su@utwente.nl; '
38+
'Yijian Zeng, y.zeng@utwente.nl; '
39+
'Yunfei Wang, y.wang-3@utwente.nl'
40+
),
41+
'license_type': 'CC BY 4.0',
42+
'license_url': 'https://creativecommons.org/licenses/by/4.0/',
43+
}
44+
45+
46+
def _select_forcing_variables(forcing_dict: Dict, forcing_var: str, alma_var: str) -> xr.DataArray:
47+
"""Select the variable needed by ALMA convention.
48+
49+
Args:
50+
forcing_dict(dict): a dictionary returned by `PyStemmusScope.forcing_io.read_forcing_data()`.
51+
forcing_var(str): variable name in forcing dataset.
52+
alma_var(str): variable name in ALMA convention.
53+
54+
Returns:
55+
xr.DataArray: a data array which its variable name is alma_name.
56+
"""
57+
58+
# select the forcing variable
59+
data_array = forcing_dict[forcing_var]
60+
61+
# rename the variable name to alma_name
62+
data_array = data_array.rename(alma_var)
63+
return data_array
64+
65+
66+
def _shorten_data_array(data: Union[xr.DataArray, xr.Dataset], time_steps: str)-> Union[xr.DataArray, xr.Dataset]:
67+
"""Shorten data based on time_steps.
68+
69+
Args:
70+
data(xr.DataArray or xr.Dataset): data to be shortend.
71+
time_steps(str): number of time steps to shorten.
72+
73+
Returns:
74+
xr.DataArray or xr.Dataset: subset of data with the lenght of time equal to time_steps.
75+
"""
76+
77+
if time_steps != "NA":
78+
time_length = int(time_steps)
79+
data = data.isel(time=np.arange(0, time_length))
80+
81+
return data
82+
83+
84+
def _prepare_soil_data(file_name: str, var_name: str, time: List) -> xr.DataArray:
85+
"""Return simulated soil temperature and soil moisture as `xr.DataArray`.
86+
87+
Args:
88+
file_name(str): csv file name generated by Stemmus_Scope model.
89+
var_name(str): variable name by ALMA convention.
90+
time(list): time values to be used for the time coordinates.
91+
92+
Returns:
93+
xr.DataArray: a dataarray with two dimensions of time and z.
94+
"""
95+
# the first two rows are depth and thickness
96+
data = pd.read_csv(file_name, delimiter=",", header=[0, 1])
97+
98+
# skip first row that is unit
99+
data = data.iloc[1:]
100+
101+
# make sure it is float and not str
102+
data = data.astype('float32')
103+
104+
# get depth, thickness info
105+
depths = []
106+
thicknesses = []
107+
for depth, thickness in data.columns:
108+
depths.append(np.float32(depth))
109+
thicknesses.append(np.float32(thickness))
110+
111+
# soil layer metadata
112+
soil_metadata = _create_soil_layer_metadata(thicknesses, depths)
113+
114+
# drop thickness
115+
data = data.droplevel(level=1, axis=1)
116+
117+
if var_name == "SoilTemp":
118+
# Celsius to Kelvin : K = 273.15 + C
119+
data = data + 273.15
120+
121+
elif var_name == "SoilMoist":
122+
# cm to m
123+
thicknesses = np.array(thicknesses) / 100.0
124+
125+
for index in data.index:
126+
# m3/m3 to kg/m2
127+
volumetric_water_content = np.array(data.loc[index])
128+
data.loc[index] = vc.soil_moisture(volumetric_water_content, thicknesses)
129+
130+
# reshape the data frame, it returns Series
131+
data = data.stack()
132+
133+
# set values
134+
layers = range(1, data.index.levshape[1] + 1)
135+
data.index.names = ["time", "z"]
136+
data.index = data.index.set_levels([time, layers], level=["time", "z"])
137+
data.name = var_name
138+
139+
# convert data to xarray data array
140+
data_array = data.to_xarray()
141+
142+
# add z attributes
143+
data_array["z"].attrs = {
144+
"long_name": "Soil layer",
145+
"standard_name": "Soil layer",
146+
"definition": soil_metadata,
147+
"units": "-",
148+
}
149+
150+
return data_array
151+
152+
153+
def _prepare_simulated_data(file_name: str, model_name: str, alma_name: str, time: List) -> xr.DataArray:
154+
"""Return model simulation as `xr.DataArray`.
155+
156+
Args:
157+
file_name(str): csv file name generated by Stemmus_Scope model.
158+
model_name(str): variable name by Stemmus_Scope model.
159+
alma_name(str): variable name by ALMA conventions.
160+
time(list): time values to be used for the time coordinates.
161+
162+
Returns:
163+
xr.DataArray: a dataarray with one dimension of time.
164+
"""
165+
# the first three rows are names and units
166+
data = pd.read_csv(file_name, delimiter=",")
167+
168+
# select variable and skip first row that is unit
169+
data = data[model_name].iloc[1:]
170+
171+
# set time values
172+
data.index = time
173+
data.index.names = ["time"]
174+
175+
# rename it to alma name
176+
data.name = alma_name
177+
178+
# make sure it is float and not str
179+
data = data.astype('float32')
180+
181+
# convert dataframe to xarray data array
182+
return data.to_xarray()
183+
184+
185+
def _create_soil_layer_metadata(thicknesses: List[float], depths: List[float]) -> List[str]:
186+
"""
187+
layer_1: 0.0 - 1.0 cm
188+
layer_2: 1.0 - 2.0 cm
189+
layer_3: 2.0 - 3.0 cm
190+
"""
191+
192+
metadata = []
193+
for index, (thickness, depth) in enumerate(zip(thicknesses, depths)):
194+
metadata.append(f"layer_{index + 1}: {(depth - thickness)} - {depth} cm")
195+
196+
return metadata
197+
198+
199+
def _update_dataset_attrs_dims(dataset: xr.Dataset, forcing_dict: Dict) -> xr.Dataset:
200+
"""Update dimentions of a dataset according to ALMA conventions.
201+
202+
Args:
203+
dataset(xr.Dataset): a dataset with varaibles in ALMA conventions.
204+
205+
Returns:
206+
xr.Dataset: the dataset with dimensions ("time", "x", "y").
207+
"""
208+
209+
# add x/y dims to the dataset
210+
dataset_expanded = dataset.expand_dims(["x", "y"])
211+
212+
# change the order of dims
213+
req_dims = ['time', 'x', 'y']
214+
if any(dim not in dataset_expanded.dims for dim in req_dims):
215+
raise ValueError("Data should have dimensions time, y, x.")
216+
217+
if "z" in dataset_expanded.dims:
218+
dataset_reordered = dataset_expanded.transpose("time", "z", "y", "x")
219+
else:
220+
dataset_reordered = dataset_expanded.transpose("time", "y", "x")
221+
222+
# additional metadata
223+
lat = forcing_dict["latitude"]
224+
lon = forcing_dict["longitude"]
225+
dataset_reordered.attrs = DATASET_ATTRS
226+
dataset_reordered.attrs['latitude'] = lat
227+
dataset_reordered.attrs['longitude'] = lon
228+
229+
# update values of x and y coords
230+
dataset = dataset_reordered.assign_coords(
231+
{
232+
"x": [lon],
233+
"y": [lat],
234+
}
235+
)
236+
237+
# update x, y attributes
238+
dataset["x"].attrs = {
239+
"long_name": "Gridbox longitude",
240+
"standard_name": "longitude",
241+
"units": "degrees",
242+
}
243+
244+
dataset["y"].attrs = {
245+
"long_name": "Gridbox latitude",
246+
"standard_name": "latitude",
247+
"units": "degrees",
248+
}
249+
250+
return dataset
251+
252+
253+
def to_netcdf(config: Dict, cf_filename: str) -> str:
254+
"""Save csv files generated by Stemmus_Scope model to a netcdf file using
255+
information provided by ALMA conventions.
256+
257+
Args:
258+
config(Dict): PyStemmusScope configuration dictionary.
259+
cf_filename(str): Path to a csv file for ALMA conventions.
260+
261+
Returns:
262+
str: path to a csv file under the output directory.
263+
"""
264+
265+
# list of required forcing variables, Alma_short_name: forcing_io_name, # model_name
266+
var_names = {
267+
"RH": "rh", # RH
268+
"SWdown_ec": "sw_down", # Rin
269+
"LWdown_ec": "lw_down", # Rli
270+
"Qair": "Qair",
271+
"Tair": "t_air_celcius", # Ta
272+
"Psurf": "psurf_hpa", # P
273+
"Wind": "wind_speed", # u
274+
"Precip": "precip_conv", # Pre
275+
}
276+
277+
# Number of time steps from configuration file
278+
time_steps = config["NumberOfTimeSteps"]
279+
280+
# read forcing file into a dict
281+
forcing_dict = forcing_io.read_forcing_data(
282+
Path(config["ForcingPath"]) / config["ForcingFileName"]
283+
)
284+
285+
# get time info
286+
time = _shorten_data_array(forcing_dict["time"], time_steps)
287+
288+
# read convention file
289+
conventions = pd.read_csv(cf_filename)
290+
291+
alma_short_names = conventions["short_name_alma"]
292+
data_list = []
293+
for alma_name in alma_short_names:
294+
df = conventions.loc[alma_short_names == alma_name].iloc[0]
295+
file_name = Path(config["OutputPath"]) / df["file_name_STEMMUS-SCOPE"]
296+
297+
if alma_name in var_names:
298+
# select data
299+
data_array = _select_forcing_variables(forcing_dict, var_names[alma_name], alma_name)
300+
data_array = _shorten_data_array(data_array, time_steps)
301+
302+
# create data array
303+
elif alma_name in {"SoilTemp", "SoilMoist"}:
304+
data_array = _prepare_soil_data(file_name, alma_name, time.values)
305+
else:
306+
data_array = _prepare_simulated_data(
307+
file_name, df["short_name_STEMMUS-SCOPE"], alma_name, time.values
308+
)
309+
310+
# update attributes of array
311+
data_array.attrs = {
312+
"units": df["unit"],
313+
"long_name": df["long_name"],
314+
"standard_name": df["standard_name"],
315+
"STEMMUS-SCOPE_name": df["short_name_STEMMUS-SCOPE"],
316+
"definition": df["definition"],
317+
}
318+
319+
# add to list
320+
data_list.append(data_array)
321+
322+
# merge to a dataset
323+
dataset = xr.merge(data_list)
324+
325+
# update dimensions
326+
dataset = _update_dataset_attrs_dims(dataset, forcing_dict)
327+
328+
# # save to nc file
329+
nc_filename = Path(config["OutputPath"]) / f"{Path(config['OutputPath']).stem}_STEMMUS_SCOPE.nc"
330+
331+
dataset.to_netcdf(path= nc_filename)
332+
return str(nc_filename)

0 commit comments

Comments
 (0)