Skip to content

Commit a3dc480

Browse files
Merge pull request #17 from EcoExtreML/soil_io
Replacing matlab soil reading routines with python
2 parents 378774e + 808bf8a commit a3dc480

84 files changed

Lines changed: 460 additions & 93 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

PyStemmusScope/forcing_io.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def read_forcing_data(forcing_file):
3838
ds_forcing = ds_forcing.squeeze(['x', 'y'])
3939

4040
data = {}
41-
41+
4242
# Expected time format is days (as floating point) since Jan 1st 00:00.
4343
data['doy_float'] = (
4444
ds_forcing['time'].dt.dayofyear - 1 +
@@ -47,17 +47,18 @@ def read_forcing_data(forcing_file):
4747
)
4848
data['year'] = ds_forcing['time'].dt.year.astype(float)
4949

50-
data['t_air_celcius'] = vc.kelvin_to_celcius(ds_forcing['Tair'])
51-
data['psurf_hpa'] = vc.pa_to_hpa(ds_forcing['Psurf'])
50+
data['t_air_celcius'] = ds_forcing['Tair'] - 273.15 # conversion from K to degC.
51+
data['psurf_hpa'] = ds_forcing['Psurf'] / 100 # conversion from Pa to hPa
5252
data['co2_conv'] = vc.co2_molar_fraction_to_kg_per_m3(ds_forcing['CO2air'])
53-
data['precip_conv'] = vc.precipitation_mm_s_to_cm_s(ds_forcing['Precip'])
53+
data['precip_conv'] = ds_forcing['Precip'] / 10 # conversion from mm/s to cm/s
5454
data['lw_down'] = ds_forcing['LWdown']
5555
data['sw_down'] = ds_forcing['SWdown']
5656
data['wind_speed'] = ds_forcing['Wind']
5757
data['rh'] = ds_forcing['RH']
5858
data['vpd'] = ds_forcing['VPD']
5959
data['lai'] = vc.mask_data(ds_forcing['LAI'], min_value=0.01)
60-
data['ea'] = vc.kpa_to_hpa(vc.calculate_ea(data['t_air_celcius'], data['rh']))
60+
# calculate ea, conversion from kPa to hPa:
61+
data['ea'] = vc.calculate_ea(data['t_air_celcius'], data['rh']) * 10
6162

6263
# Load in non-timedependent variables
6364
data['sitename'] = forcing_file.name.split('_')[0]
@@ -167,7 +168,7 @@ def prepare_forcing(input_dir, forcing_file, config):
167168
config (dict): The PyStemmusScope configuration dictionary.
168169
"""
169170
input_path = Path(input_dir)
170-
171+
171172
# Read the required data from the forcing file into a dictionary
172173
data = read_forcing_data(forcing_file)
173174

PyStemmusScope/soil_io.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
from pathlib import Path
2+
import hdf5storage
3+
import numpy as np
4+
import xarray as xr
5+
from . import utils
6+
from . import variable_conversion as vc
7+
8+
9+
def _open_multifile_datasets(paths, lat, lon, lat_key='lat', lon_key='lon'):
10+
"""Internal function to open multifile netCDF files, and selects the lat & lon
11+
before merging them by coordinates. xarray's open_mfdataset does not support this
12+
type of functionality.
13+
14+
Args:
15+
paths (iterable): Iterable containing the paths to the netCDF files
16+
lat (float): Latitude of the site of interest (in degrees North)
17+
lon (float): Longitude of the site of interest (in degrees East)
18+
19+
Returns:
20+
xarray.Dataset: Dataset containing the merged data for a single location in
21+
space.
22+
"""
23+
datasets = []
24+
for file in paths:
25+
ds = xr.open_dataset(file)
26+
ds.attrs = '' # Drop attributed to avoid combine conflicts
27+
datasets.append(ds.sel({lat_key: lat, lon_key: lon}, method='nearest'))
28+
ds = xr.combine_by_coords(datasets)
29+
30+
return ds
31+
32+
33+
def _read_lambda_coef(lambda_directory, lat, lon, depth_indices):
34+
"""Internal function that reads the lambda coefficient files and returns the data
35+
of interest in a dictionary.
36+
37+
Args:
38+
lambda_directory (Path): Path to the directory which contains the lambda data.
39+
lat (float): Latitude of the site of interest (in degrees North)
40+
lon (float): Longitude of the site of interest (in degrees East)
41+
depth_indices (list): List of which indices (0 - 7) should be selected from the
42+
lambda variable dataset.
43+
44+
Returns:
45+
dict: Dictionary containing the lambda coefficient data.
46+
"""
47+
if not np.all([d in range(0, 8) for d in depth_indices]):
48+
raise ValueError("Incorrect depth indices provided. Indices range from 0 to 7")
49+
50+
lambda_files = sorted(lambda_directory.glob("lambda_l*.nc"))
51+
52+
ds = _open_multifile_datasets(lambda_files, lat, lon)
53+
54+
# which depth indices the STEMMUS_SCOPE model expects
55+
ds = ds.sortby("depth") # make sure that the depths are sorted in increasing order
56+
coef_lambda = ds['lambda'].isel(depth=depth_indices).values
57+
58+
return {'Coef_Lamda': coef_lambda}
59+
60+
61+
def _read_soil_composition(soil_data_path, lat, lon, depth_indices):
62+
"""Internal function that reads the soil composition files and returns them in a
63+
dictionary.
64+
65+
Args:
66+
soil_data_path (Path): Path to the directory which contains the soil data.
67+
lat (float): Latitude of the site of interest (in degrees North)
68+
lon (float): Longitude of the site of interest (in degrees East)
69+
depth_indices (list): List of which indices (0 - 7) should be selected from the
70+
soil composition dataset.
71+
Returns:
72+
dict: Dictionary containing the soil composition data.
73+
"""
74+
soil_comp_fnames = ['CLAY1.nc', 'CLAY2.nc', 'OC1.nc', 'OC2.nc', 'SAND1.nc',
75+
'SAND2.nc', 'SILT1.nc', 'SILT2.nc']
76+
77+
soil_comp_paths = [soil_data_path / fname for fname in soil_comp_fnames]
78+
79+
ds = _open_multifile_datasets(soil_comp_paths, lat, lon)
80+
81+
if not np.all([d in range(0, 8) for d in depth_indices]):
82+
raise ValueError("Incorrect depth indices provided. Indices range from 0 to 7")
83+
84+
ds = ds.sortby("depth") # make sure that the depths are sorted in increasing order
85+
ds = ds.isel(depth=depth_indices)
86+
87+
clay_fraction = ds['CLAY'].values / 100 # convert % to fraction
88+
sand_fraction = ds['SAND'].values / 100 # convert % to fraction
89+
organic_fraction = ds['OC'].values / 10000 # convert from 1/100th % to fraction.
90+
91+
return {'FOC': clay_fraction, 'FOS': sand_fraction, 'MSOC': organic_fraction}
92+
93+
94+
def _read_hydraulic_parameters(soil_data_path, lat, lon, depths):
95+
"""Internal function that reads the soil hydraulic parameters from the Schaap
96+
dataset and returns them in a dictionary.
97+
98+
Args:
99+
soil_data_path (Path): Path to the directory which contains the soil data.
100+
lat (float): Latitude of the site of interest (in degrees North)
101+
lon (float): Longitude of the site of interest (in degrees East)
102+
depths (list): List of depths which should be selected from the dataset. The
103+
valid depths are: 0, 5, 15, 30, 60, 100 and 200 cm.
104+
105+
Returns:
106+
dict: Dictionary containing the hydraulic parameters.
107+
"""
108+
ptf_files = sorted((soil_data_path / 'Schaap').glob('PTF_*.nc'))
109+
ds = _open_multifile_datasets(
110+
ptf_files, lat, lon, lat_key='latitude', lon_key='longitude'
111+
)
112+
113+
valid_depths = [0, 5, 15, 30, 60, 100, 200]
114+
if not np.all([d in valid_depths for d in depths]):
115+
raise ValueError("Incorrect depth value(s) provided. Available depths are"
116+
f"{valid_depths}")
117+
118+
schaap_vars = ['alpha', 'Ks', 'thetas', 'thetar', 'n']
119+
schaap_data = {key: np.zeros(len(depths)) for key in schaap_vars}
120+
121+
for i, depth in enumerate(depths):
122+
for var in schaap_vars:
123+
schaap_data[var][i] = ds[f"{var}_{depth}cm"]
124+
125+
fieldmc = vc.field_moisture_content(schaap_data['thetar'], schaap_data['thetas'],
126+
schaap_data['alpha'], schaap_data['n'])
127+
128+
hydraulic_matfiledata = {
129+
'SaturatedMC': schaap_data['thetas'],
130+
'ResidualMC': schaap_data['thetar'],
131+
'Coefficient_n': schaap_data['n'],
132+
'Coefficient_Alpha': schaap_data['alpha'],
133+
'porosity': schaap_data['thetas'],
134+
'Ks0': schaap_data['Ks'][0],
135+
'SaturatedK': schaap_data['Ks'] / (24 * 3600), # convert 1/day -> 1/s
136+
'fieldMC': fieldmc,
137+
'theta_s0': schaap_data['thetas'][0]
138+
}
139+
140+
return hydraulic_matfiledata
141+
142+
143+
def _read_surface_data(soil_data_path, lat, lon):
144+
"""Internal function that reads the fmax variable from the surface dataset and
145+
returns it in a dictionary.
146+
147+
Args:
148+
soil_data_path (Path): Path to the directory which contains the surface data.
149+
lat (float): Latitude of the site of interest (in degrees North)
150+
lon (float): Longitude of the site of interest (in degrees East)
151+
152+
Returns:
153+
dict: Dictionary containing the `fmax` value (maximum fractional saturated area)
154+
"""
155+
ds = xr.open_dataset(soil_data_path / 'surfdata.nc')
156+
lat, lon = utils.convert_to_lsm_coordinates(lat, lon)
157+
ds = ds.sel(
158+
lsmlat=lat, lsmlon=lon)
159+
160+
fmax = ds['FMAX'].values
161+
162+
return {'fmax': fmax}
163+
164+
165+
def _collect_soil_data(soil_data_path, lat, lon):
166+
"""Internal function that calls the individual data collectors and merges them into
167+
a single dictionary ready to be written.
168+
169+
Args:
170+
soil_data_path (Path): Path to the directory which contains the soil data.
171+
lat (float): Latitude of the site of interest (in degrees North)
172+
lon (float): Longitude of the site of interest (in degrees East)
173+
174+
Returns:
175+
dict: Dictionary containing all the processed soil property data.
176+
"""
177+
lambda_directory = soil_data_path / "lambda"
178+
179+
schaap_depths = [0, 5, 30, 60, 100, 200]
180+
depth_indices = [0, 2, 4, 5, 6, 7]
181+
182+
matfiledata = _read_lambda_coef(lambda_directory, lat, lon, depth_indices)
183+
matfiledata.update(_read_hydraulic_parameters(soil_data_path, lat, lon,
184+
schaap_depths))
185+
matfiledata.update(_read_soil_composition(soil_data_path, lat, lon, depth_indices))
186+
matfiledata.update(_read_surface_data(soil_data_path, lat, lon))
187+
188+
return matfiledata
189+
190+
191+
def _retrieve_latlon(file):
192+
"""Retrieves the latitude and longitude coordinates from the dataset file.
193+
194+
Args:
195+
file (Path): Full path to the netCDF file containing the site latitude
196+
and longitude
197+
198+
Returns:
199+
tuple(float, float): Tuple containing the latitude and longitude values.
200+
Latitude in degrees N, longitude in degrees E.
201+
"""
202+
ds = xr.open_dataset(file)
203+
lon = ds.longitude.values.flatten()
204+
lat = ds.latitude.values.flatten()
205+
return lat, lon
206+
207+
208+
def prepare_soil_data(soil_data_dir, matfile_path, run_config):
209+
"""Function that prepares the soil input data for the STEMMUS_SCOPE model. It parses
210+
the data for the input location, and writes a file that can be easily read in by
211+
Matlab.
212+
213+
Args:
214+
soil_data_dir (Path): Path to the directory which contains the soil data.
215+
mathfile_path (Path): Path to the directory where soil parameter file
216+
should be written to.
217+
run_config (dict): Dictionary containing the configuration for the current
218+
STEMMUS_SCOPE run.
219+
"""
220+
forcing_file = Path(run_config["ForcingPath"]) / run_config["ForcingFileName"]
221+
222+
# Data missing at ID-Pag site. See github.com/EcoExtreML/STEMMUS_SCOPE/issues/77
223+
if run_config["ForcingFileName"].startswith("ID"):
224+
lat, lon = -1., 112.
225+
else:
226+
lat, lon = _retrieve_latlon(forcing_file)
227+
228+
matfiledata = _collect_soil_data(Path(soil_data_dir), lat, lon)
229+
230+
hdf5storage.savemat(
231+
Path(matfile_path) / "soil_parameters.mat", mdict=matfiledata, appendmat=False,
232+
)

PyStemmusScope/utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import numpy as np
2+
3+
4+
def convert_to_lsm_coordinates(lat, lon):
5+
"""Converts latitude in degrees North to NCAR's LSM coordinate system: Grid with lat
6+
values ranging from 0 -- 360, where 0 is the South Pole, and lon values ranging
7+
from 0 -- 720, where 0 is the prime meridian. Both representing a 0.5 degree
8+
resolution.
9+
10+
Args:
11+
lat (float): Latitude in degrees North
12+
lon (float): longitude in degrees East
13+
14+
Returns:
15+
(int, int): (nearest) latitude grid coordinates
16+
"""
17+
lat += 90
18+
lat *= 2
19+
20+
lon *= 2
21+
if lon < 0:
22+
lon += 720
23+
24+
return np.round(lat).astype(int), np.round(lon).astype(int)

0 commit comments

Comments
 (0)