EcoExtreML
diff --git a/‎PyStemmusScope/forcing_io.py‎
Lines changed: 9 additions & 3 deletions b/‎PyStemmusScope/forcing_io.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎PyStemmusScope/save.py‎
Lines changed: 332 additions & 0 deletions b/‎PyStemmusScope/save.py‎
Lines changed: 332 additions & 0 deletions
@@ -57,11 +57,13 @@ def read_forcing_data(forcing_file):
     data['rh'] = ds_forcing['RH']
     data['vpd'] = ds_forcing['VPD']
     data['lai'] = vc.mask_data(ds_forcing['LAI'], min_value=0.01)
+
     # calculate ea, conversion from kPa to hPa:
     data['ea'] = vc.calculate_ea(data['t_air_celcius'], data['rh']) * 10
 
     # Load in non-timedependent variables
     data['sitename'] = forcing_file.name.split('_')[0]
+
     # Forcing data timestep size in seconds
     time_delta = (ds_forcing.time.values[1] -
                   ds_forcing.time.values[0]) / np.timedelta64(1, 's')
@@ -75,6 +77,10 @@ def read_forcing_data(forcing_file):
     data['reference_height'] = ds_forcing['reference_height'].values
     data['canopy_height'] = ds_forcing['canopy_height'].values
 
+    # these are needed by save.py
+    data['time'] = ds_forcing["time"]
+    data['Qair'] = ds_forcing['Qair']
+
     return data
 
 
@@ -142,10 +148,10 @@ def prepare_global_variables(data, input_path, config):
         input_path (Path): Path to which the file should be written to.
         config (dict): The PyStemmusScope configuration dictionary.
     """
-    if int(config['NumberOfTimeSteps']) > data['total_timesteps']:
-        total_duration = data['total_timesteps']
+    if config['NumberOfTimeSteps'] != 'NA':
+        total_duration = min(int(config['NumberOfTimeSteps']), data['total_timesteps'])
     else:
-        total_duration = int(config['NumberOfTimeSteps'])
+        total_duration = data['total_timesteps']
 
     matfile_vars = ['latitude', 'longitude', 'elevation', 'IGBP_veg_long',
                     'reference_height', 'canopy_height', 'DELT', 'sitename']
 
@@ -0,0 +1,332 @@
+"""PyStemmusScope save module.
+
+Module designed to create a netcdf file following `ALMA
+convention <https://web.lmd.jussieu.fr/~polcher/ALMA/>`_ from csv files following
+`SCOPE format <https://scope-model.readthedocs.io/en/latest/outfiles.html>`_ in
+the output directory.
+
+The file
+`required_netcf_variables.csv <https://github.com/EcoExtreML/STEMMUS_SCOPE/blob/main/utils/csv_to_nc/required_netcf_variables.csv>`_
+lists required variable names and their attributes based on `ALMA+CF
+convention table <https://docs.google.com/spreadsheets/d/1CA3aTvI9piXqRqO-3MGrsH1vW-Sd87D8iZXHGrqK42o/edit#gid=2085475627>`_.
+
+Example:
+    See notebooks/run_model_in_notebook.ipynb in
+    `STEMMUS_SCOPE_Processing repository <https://github.com/EcoExtreML/STEMMUS_SCOPE_Processing>`_
+
+"""
+
+import logging
+from pathlib import Path
+from typing import Dict
+from typing import List
+from typing import Union
+import numpy as np
+import pandas as pd
+import xarray as xr
+from PyStemmusScope import forcing_io
+from . import variable_conversion as vc
+
+
+logger = logging.getLogger(__name__)
+
+DATASET_ATTRS = {
+    'model': 'STEMMUS_SCOPE',
+    'institution': 'University of Twente; Northwest A&F University',
+    'contact': (
+        'Zhongbo Su, z.su@utwente.nl; '
+        'Yijian Zeng, y.zeng@utwente.nl; '
+        'Yunfei Wang, y.wang-3@utwente.nl'
+        ),
+    'license_type': 'CC BY 4.0',
+    'license_url': 'https://creativecommons.org/licenses/by/4.0/',
+}
+
+
+def _select_forcing_variables(forcing_dict: Dict, forcing_var: str, alma_var: str) -> xr.DataArray:
+    """Select the variable needed by ALMA convention.
+
+    Args:
+        forcing_dict(dict): a dictionary returned by `PyStemmusScope.forcing_io.read_forcing_data()`.
+        forcing_var(str): variable name in forcing dataset.
+        alma_var(str): variable name in ALMA convention.
+
+    Returns:
+        xr.DataArray: a data array which its variable name is alma_name.
+    """
+
+    # select the forcing variable
+    data_array = forcing_dict[forcing_var]
+
+    # rename the variable name to alma_name
+    data_array = data_array.rename(alma_var)
+    return data_array
+
+
+def _shorten_data_array(data: Union[xr.DataArray, xr.Dataset], time_steps: str)-> Union[xr.DataArray, xr.Dataset]:
+    """Shorten data based on time_steps.
+
+    Args:
+        data(xr.DataArray or xr.Dataset): data to be shortend.
+        time_steps(str): number of time steps to shorten.
+
+    Returns:
+        xr.DataArray or xr.Dataset: subset of data with the lenght of time equal to time_steps.
+    """
+
+    if time_steps != "NA":
+        time_length = int(time_steps)
+        data = data.isel(time=np.arange(0, time_length))
+
+    return data
+
+
+def _prepare_soil_data(file_name: str, var_name: str, time: List) -> xr.DataArray:
+    """Return simulated soil temperature and soil moisture as `xr.DataArray`.
+
+    Args:
+        file_name(str): csv file name generated by Stemmus_Scope model.
+        var_name(str): variable name by ALMA convention.
+        time(list): time values to be used for the time coordinates.
+
+    Returns:
+        xr.DataArray: a dataarray with two dimensions of time and z.
+    """
+    # the first two rows are depth and thickness
+    data = pd.read_csv(file_name, delimiter=",", header=[0, 1])
+
+    # skip first row that is unit
+    data = data.iloc[1:]
+
+    # make sure it is float and not str
+    data = data.astype('float32')
+
+    # get depth, thickness info
+    depths = []
+    thicknesses = []
+    for depth, thickness in data.columns:
+        depths.append(np.float32(depth))
+        thicknesses.append(np.float32(thickness))
+
+    # soil layer metadata
+    soil_metadata = _create_soil_layer_metadata(thicknesses, depths)
+
+    # drop thickness
+    data = data.droplevel(level=1, axis=1)
+
+    if var_name == "SoilTemp":
+        # Celsius to Kelvin : K = 273.15 + C
+        data = data + 273.15
+
+    elif var_name == "SoilMoist":
+        # cm to m
+        thicknesses = np.array(thicknesses) / 100.0
+
+        for index in data.index:
+            # m3/m3 to kg/m2
+            volumetric_water_content = np.array(data.loc[index])
+            data.loc[index] = vc.soil_moisture(volumetric_water_content, thicknesses)
+
+    # reshape the data frame, it returns Series
+    data = data.stack()
+
+    # set values
+    layers = range(1, data.index.levshape[1] + 1)
+    data.index.names = ["time", "z"]
+    data.index = data.index.set_levels([time, layers], level=["time", "z"])
+    data.name = var_name
+
+    # convert data to xarray data array
+    data_array = data.to_xarray()
+
+    # add z attributes
+    data_array["z"].attrs = {
+        "long_name": "Soil layer",
+        "standard_name": "Soil layer",
+        "definition": soil_metadata,
+        "units": "-",
+    }
+
+    return data_array
+
+
+def _prepare_simulated_data(file_name: str, model_name: str, alma_name: str, time: List) ->  xr.DataArray:
+    """Return model simulation as `xr.DataArray`.
+
+    Args:
+        file_name(str): csv file name generated by Stemmus_Scope model.
+        model_name(str): variable name by Stemmus_Scope model.
+        alma_name(str): variable name by ALMA conventions.
+        time(list): time values to be used for the time coordinates.
+
+    Returns:
+        xr.DataArray: a dataarray with one dimension of time.
+    """
+    # the first three rows are names and units
+    data = pd.read_csv(file_name, delimiter=",")
+
+    # select variable and skip first row that is unit
+    data = data[model_name].iloc[1:]
+
+    # set time values
+    data.index = time
+    data.index.names = ["time"]
+
+    # rename it to alma name
+    data.name = alma_name
+
+    # make sure it is float and not str
+    data = data.astype('float32')
+
+    # convert dataframe to xarray data array
+    return data.to_xarray()
+
+
+def _create_soil_layer_metadata(thicknesses: List[float], depths: List[float]) -> List[str]:
+    """
+    layer_1: 0.0 - 1.0 cm
+    layer_2: 1.0 - 2.0 cm
+    layer_3: 2.0 - 3.0 cm
+    """
+
+    metadata = []
+    for index, (thickness, depth) in enumerate(zip(thicknesses, depths)):
+        metadata.append(f"layer_{index + 1}: {(depth - thickness)} - {depth} cm")
+
+    return metadata
+
+
+def _update_dataset_attrs_dims(dataset: xr.Dataset, forcing_dict: Dict) -> xr.Dataset:
+    """Update dimentions of a dataset according to ALMA conventions.
+
+    Args:
+        dataset(xr.Dataset): a dataset with varaibles in ALMA conventions.
+
+    Returns:
+        xr.Dataset: the dataset with dimensions ("time", "x", "y").
+    """
+
+    # add x/y dims to the dataset
+    dataset_expanded = dataset.expand_dims(["x", "y"])
+
+    # change the order of dims
+    req_dims = ['time', 'x', 'y']
+    if any(dim not in dataset_expanded.dims for dim in req_dims):
+        raise ValueError("Data should have dimensions time, y, x.")
+
+    if "z" in dataset_expanded.dims:
+        dataset_reordered = dataset_expanded.transpose("time", "z", "y", "x")
+    else:
+        dataset_reordered = dataset_expanded.transpose("time", "y", "x")
+
+    # additional metadata
+    lat = forcing_dict["latitude"]
+    lon = forcing_dict["longitude"]
+    dataset_reordered.attrs = DATASET_ATTRS
+    dataset_reordered.attrs['latitude'] = lat
+    dataset_reordered.attrs['longitude'] = lon
+
+    # update values of x and y coords
+    dataset = dataset_reordered.assign_coords(
+        {
+            "x": [lon],
+            "y": [lat],
+            }
+        )
+
+    # update x, y attributes
+    dataset["x"].attrs = {
+            "long_name": "Gridbox longitude",
+            "standard_name": "longitude",
+            "units": "degrees",
+            }
+
+    dataset["y"].attrs = {
+            "long_name": "Gridbox latitude",
+            "standard_name": "latitude",
+            "units": "degrees",
+            }
+
+    return dataset
+
+
+def to_netcdf(config: Dict, cf_filename: str) -> str:
+    """Save csv files generated by Stemmus_Scope model to a netcdf file using
+        information provided by ALMA conventions.
+
+    Args:
+        config(Dict): PyStemmusScope configuration dictionary.
+        cf_filename(str): Path to a csv file for ALMA conventions.
+
+    Returns:
+        str: path to a csv file under the output directory.
+    """
+
+    # list of required forcing variables, Alma_short_name: forcing_io_name, # model_name
+    var_names = {
+        "RH": "rh", # RH
+        "SWdown_ec": "sw_down", # Rin
+        "LWdown_ec": "lw_down", # Rli
+        "Qair": "Qair",
+        "Tair": "t_air_celcius", # Ta
+        "Psurf": "psurf_hpa", # P
+        "Wind": "wind_speed", # u
+        "Precip": "precip_conv", # Pre
+    }
+
+    # Number of time steps from configuration file
+    time_steps = config["NumberOfTimeSteps"]
+
+    # read forcing file into a dict
+    forcing_dict = forcing_io.read_forcing_data(
+        Path(config["ForcingPath"]) / config["ForcingFileName"]
+    )
+
+    # get time info
+    time = _shorten_data_array(forcing_dict["time"], time_steps)
+
+    # read convention file
+    conventions = pd.read_csv(cf_filename)
+
+    alma_short_names = conventions["short_name_alma"]
+    data_list = []
+    for alma_name in alma_short_names:
+        df = conventions.loc[alma_short_names == alma_name].iloc[0]
+        file_name = Path(config["OutputPath"]) / df["file_name_STEMMUS-SCOPE"]
+
+        if alma_name in var_names:
+            # select data
+            data_array = _select_forcing_variables(forcing_dict, var_names[alma_name], alma_name)
+            data_array = _shorten_data_array(data_array, time_steps)
+
+        # create data array
+        elif alma_name in {"SoilTemp", "SoilMoist"}:
+            data_array = _prepare_soil_data(file_name, alma_name, time.values)
+        else:
+            data_array = _prepare_simulated_data(
+                file_name, df["short_name_STEMMUS-SCOPE"], alma_name, time.values
+                )
+
+        # update attributes of array
+        data_array.attrs = {
+            "units": df["unit"],
+            "long_name": df["long_name"],
+            "standard_name": df["standard_name"],
+            "STEMMUS-SCOPE_name": df["short_name_STEMMUS-SCOPE"],
+            "definition": df["definition"],
+        }
+
+        # add to list
+        data_list.append(data_array)
+
+    # merge to a dataset
+    dataset = xr.merge(data_list)
+
+    # update dimensions
+    dataset = _update_dataset_attrs_dims(dataset, forcing_dict)
+
+    # # save to nc file
+    nc_filename = Path(config["OutputPath"]) / f"{Path(config['OutputPath']).stem}_STEMMUS_SCOPE.nc"
+
+    dataset.to_netcdf(path= nc_filename)
+    return str(nc_filename)