Skip to content

Commit a5be424

Browse files
Add support for land cover data (#73)
* Add LAI to global data selection, tests * Correct test data generation * Add instructions, download script & parsing nb * Move global data operations to separate folder * Modify nearest_non_nan to check max_distance * Fix typing issue * Start split of global data, add verification * Finish refactoring global data * Fix bugs * Add checks for tile existance for DEM and h_canopy * Compress txt assets * Improve err msgs. Add tests for raises * Fix linter issues * Add landcover table, download+parse scripts * Add land cover data support & test data * Add checks, cleanup landcover parse notebook * Apply suggestions from Sarah's code review Co-authored-by: SarahAlidoost <55081872+SarahAlidoost@users.noreply.github.com> --------- Co-authored-by: SarahAlidoost <55081872+SarahAlidoost@users.noreply.github.com>
1 parent 4d5ec56 commit a5be424

11 files changed

Lines changed: 481 additions & 31784 deletions

File tree

PyStemmusScope/global_data/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Module for operations related to the 'global' datasets."""
22
from PyStemmusScope.global_data import cams_co2
3+
from PyStemmusScope.global_data import cci_landcover
34
from PyStemmusScope.global_data import copernicus_lai
45
from PyStemmusScope.global_data import era5
56
from PyStemmusScope.global_data import eth_canopy_height
@@ -16,4 +17,5 @@
1617
"prism_dem",
1718
"cams_co2",
1819
"copernicus_lai",
20+
"cci_landcover",
1921
]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"lccs_class","IGBP_STEMMUS_SCOPE"
2+
0,"No vegetation"
3+
10,"Croplands"
4+
20,"Croplands"
5+
30,"Cropland mosaics"
6+
40,"Cropland mosaics"
7+
50,"Evergreen Broadleaf Forest"
8+
60,"Deciduous Broadleaf Forests"
9+
70,"Evergreen Needleleaf Forests"
10+
80,"Deciduous needleleaf forests"
11+
90,"Mixed Forests"
12+
100,"Woody savannas"
13+
110,"Savannas"
14+
120,"Closed shrublands"
15+
130,"Grasslands"
16+
140,"Not available in IGBP"
17+
150,"Open shrublands"
18+
160,"Permanent Wetlands"
19+
170,"Permanent Wetlands"
20+
180,"Permanent Wetlands"
21+
190,"Urban and built-up lands"
22+
200,"Barren"
23+
210,"Water bodies"
24+
220,"Snow and ice"
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""Module for loading and validating the ESA CCI land cover dataset."""
2+
from pathlib import Path
3+
from typing import Dict
4+
from typing import List
5+
from typing import Tuple
6+
from typing import Union
7+
import numpy as np
8+
import pandas as pd
9+
import xarray as xr
10+
from PyStemmusScope.global_data import utils
11+
12+
13+
RESOLUTION_CCI = 1 / 360 # Resolution of the dataset in degrees
14+
FILEPATH_LANDCOVER_TABLE = Path(__file__).parent / "assets" / "lccs_to_igbp_table.csv"
15+
16+
17+
def retrieve_landcover_data(
18+
global_data_dir: Path,
19+
latlon: Union[Tuple[int, int], Tuple[float, float]],
20+
time_range: Tuple[np.datetime64, np.datetime64],
21+
timestep: str,
22+
) -> Dict[str, np.ndarray]:
23+
"""Get the land cover data from the CCI netCDF files.
24+
25+
Args:
26+
global_data_dir: Path to the directory containing the global datasets.
27+
latlon: Latitude and longitude of the site.
28+
time_range: Start and end time of the model run.
29+
timestep: Desired timestep of the model, this is derived from the forcing data.
30+
In a pandas-timedelta compatible format. For example: "1800S"
31+
32+
Returns:
33+
Dictionary containing IGBP and LCCS land cover classes.
34+
"""
35+
files_cci = list((global_data_dir / "landcover").glob("*.nc"))
36+
37+
if len(files_cci) == 0:
38+
raise FileNotFoundError(
39+
f"No netCDF files found in the folder '{global_data_dir / 'landcover'}'"
40+
)
41+
42+
return extract_landcover_data(
43+
files_cci=files_cci,
44+
latlon=latlon,
45+
time_range=time_range,
46+
timestep=timestep,
47+
)
48+
49+
50+
def extract_landcover_data(
51+
files_cci: List[Path],
52+
latlon: Union[Tuple[int, int], Tuple[float, float]],
53+
time_range: Tuple[np.datetime64, np.datetime64],
54+
timestep: str,
55+
) -> Dict[str, np.ndarray]:
56+
"""Extract the land cover data from the CCI netCDF files.
57+
58+
Args:
59+
files_cci: List of CCI land cover files.
60+
latlon: Latitude and longitude of the site.
61+
time_range: Start and end time of the model run.
62+
timestep: Desired timestep of the model, this is derived from the forcing data.
63+
In a pandas-timedelta compatible format. For example: "1800S"
64+
65+
Returns:
66+
Dictionary containing IGBP and LCCS land cover classes.
67+
"""
68+
cci_dataset = xr.open_mfdataset(files_cci)
69+
70+
check_cci_dataset(cci_dataset, latlon, time_range) # Assert spatial/temporal bounds
71+
72+
lat_bounds = cci_dataset["lat_bounds"].load() # Load so that they are not
73+
lon_bounds = cci_dataset["lon_bounds"].load() # dask arrays
74+
lat_idx = np.logical_and( # type: ignore
75+
lat_bounds.isel(bounds=0) >= latlon[0], lat_bounds.isel(bounds=1) < latlon[0]
76+
).argmax(dim="lat")
77+
lon_idx = np.logical_and( # type: ignore
78+
lon_bounds.isel(bounds=0) <= latlon[1], lon_bounds.isel(bounds=1) > latlon[1]
79+
).argmax(dim="lon")
80+
81+
lccs_id = cci_dataset.isel(lat=lat_idx, lon=lon_idx)["lccs_class"]
82+
83+
# If time is size 1, interp fails. Adding an extra datapoint prevents this.
84+
if lccs_id["time"].size == 1:
85+
data_copy = lccs_id.copy()
86+
data_copy["time"] = lccs_id["time"] + np.timedelta64(1, "D")
87+
lccs_id = xr.concat((lccs_id, data_copy), dim="time")
88+
89+
lccs_id = lccs_id.interp(
90+
time=pd.date_range(time_range[0], time_range[1], freq=timestep),
91+
method="nearest",
92+
kwargs={"fill_value": "extrapolate", "bounds_error": False},
93+
)
94+
95+
landcover_lookup_table = get_landcover_table(cci_dataset)
96+
igbp_lookup_table = get_lccs_to_igbp_table()
97+
98+
return {
99+
"LCCS_landcover": np.array(
100+
[landcover_lookup_table[_id] for _id in lccs_id.to_numpy()]
101+
),
102+
"IGBP_veg_long": np.array(
103+
[igbp_lookup_table[_id] for _id in lccs_id.to_numpy()]
104+
),
105+
}
106+
107+
108+
def get_lccs_to_igbp_table() -> Dict[int, str]:
109+
"""Read the land cover translation table, and turn it into a lookup dictionary."""
110+
df = pd.read_csv(FILEPATH_LANDCOVER_TABLE, index_col="lccs_class")
111+
return df.to_dict()["IGBP_STEMMUS_SCOPE"]
112+
113+
114+
def get_landcover_table(cci_dataset: xr.Dataset) -> Dict[int, str]:
115+
"""Get the lookup table to convert the flag values to a land cover name.
116+
117+
The lookup table for the land cover classes is contained in the netCDF file, under
118+
the lcc_class attributes. This function extracts it and turns it into a (dict)
119+
lookup table.
120+
121+
Args:
122+
cci_dataset: The CCI dataset netCDF file.
123+
124+
Returns:
125+
The landcover class lookup table
126+
"""
127+
flag_meanings = cci_dataset["lccs_class"].attrs["flag_meanings"].split(" ")
128+
flag_values = cci_dataset["lccs_class"].attrs["flag_values"]
129+
return dict(zip(flag_values, flag_meanings))
130+
131+
132+
def check_cci_dataset(
133+
cci_dataset: xr.Dataset,
134+
latlon: Union[Tuple[int, int], Tuple[float, float]],
135+
time_range: Tuple[np.datetime64, np.datetime64],
136+
) -> None:
137+
"""Validate the cci dataset for spatial and temporal bounds."""
138+
# Assert spatial bounds
139+
if (
140+
latlon[0] > cci_dataset["lat"].max() or latlon[0] < cci_dataset["lat"].min()
141+
) or (latlon[1] > cci_dataset["lon"].max() or latlon[1] < cci_dataset["lon"].min()):
142+
raise utils.MissingDataError(
143+
f"\nThe specified location {latlon} was not within bounds of the CCI land"
144+
f"\ncover dataset."
145+
f"\nPlease check the netCDF files or select a different location"
146+
)
147+
148+
# Assert temporal bounds
149+
first_year_cci = pd.to_datetime(cci_dataset["time"].min().to_numpy()).year
150+
last_year_cci = pd.to_datetime(cci_dataset["time"].max().to_numpy()).year
151+
first_year_range = pd.to_datetime(time_range[0]).year
152+
last_year_range = pd.to_datetime(time_range[-1]).year
153+
# As the data is yearly, allow some leeway with the time bounds
154+
if (first_year_range + 1 < first_year_cci) or (last_year_range - 1 > last_year_cci):
155+
raise utils.MissingDataError(
156+
f"\nThe specified time range {time_range} was not within the range of the"
157+
f"\nCCI land cover dataset:"
158+
f"\n({cci_dataset['time'].min(), cci_dataset['time'].max()})"
159+
f"\nPlease check the netCDF files or select a different location"
160+
)

PyStemmusScope/global_data/global_data_selection.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,15 @@ def collect_datasets(
100100
data["latitude"] = latlon[0]
101101
data["longitude"] = latlon[1]
102102

103-
# TODO: Add land cover data retrieval.
104-
data["IGBP_veg_long"] = "Evergreen Needleleaf Forests"
103+
landcover_data = gd.cci_landcover.retrieve_landcover_data(
104+
global_data_dir,
105+
latlon,
106+
time_range,
107+
timestep,
108+
)
109+
# TODO see issue github.com/EcoExtreML/STEMMUS_SCOPE/issues/137
110+
# for now, we only use the first value of the time series
111+
data["IGBP_veg_long"] = landcover_data["IGBP_veg_long"][0]
112+
data["LCCS_landcover"] = landcover_data["LCCS_landcover"][0]
105113

106114
return data

global_data/data_analysis_notebooks/parse_landcover.ipynb

Lines changed: 123 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Download land cover data using the cdsapi."""
2+
from pathlib import Path
3+
import cdsapi
4+
5+
6+
with (Path.home() / ".cdsloginrc").open(encoding="utf8") as f:
7+
uid = f.readline().strip()
8+
api_key = f.readline().strip()
9+
10+
11+
c = cdsapi.Client(
12+
url="https://cds.climate.copernicus.eu/api/v2",
13+
key=f"{uid}:{api_key}",
14+
verify=True,
15+
)
16+
17+
18+
years = [2013]
19+
20+
21+
for year in years:
22+
c.retrieve(
23+
"satellite-land-cover",
24+
{
25+
"variable": "all",
26+
"format": "zip",
27+
"year": f"{year}",
28+
"version": "v2.0.7cds",
29+
},
30+
f"cds_landcover_{year}.zip",
31+
)

0 commit comments

Comments
 (0)