Skip to content

Commit b274e1e

Browse files
committed
function, docs, tests
1 parent 28ea577 commit b274e1e

6 files changed

Lines changed: 292 additions & 1 deletion

File tree

docs/sphinx/source/reference/iotools.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,17 @@ lower quality.
237237
iotools.read_crn
238238

239239

240+
ECMWF ERA5
241+
^^^^^^^^^^
242+
243+
A global reanalysis dataset providing weather and solar resource data.
244+
245+
.. autosummary::
246+
:toctree: generated/
247+
248+
iotools.get_era5
249+
250+
240251
Generic data file readers
241252
-------------------------
242253

docs/sphinx/source/whatsnew/v0.13.2.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ Enhancements
2727
:py:func:`~pvlib.singlediode.bishop88_mpp`,
2828
:py:func:`~pvlib.singlediode.bishop88_v_from_i`, and
2929
:py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`)
30-
30+
* Add :py:func:`~pvlib.iotools.get_era5`, a function for accessing
31+
ERA-5 reanalysis data. (:pull:`2573`)
3132

3233

3334
Documentation

pvlib/iotools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@
4545
from pvlib.iotools.meteonorm import get_meteonorm_observation_training # noqa: F401, E501
4646
from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401
4747
from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401
48+
from pvlib.iotools.ecmwf import get_era5 # noqa: F401

pvlib/iotools/ecmwf.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
import requests
2+
import pandas as pd
3+
from io import BytesIO, StringIO
4+
import zipfile
5+
import time
6+
7+
8+
VARIABLE_MAP = {
9+
# short names
10+
'd2m': 'temp_dew',
11+
't2m': 'temp_air',
12+
'sp': 'pressure',
13+
'ssrd': 'ghi',
14+
'tp': 'precipitation',
15+
16+
# long names
17+
'2m_dewpoint_temperature': 'temp_dew',
18+
'2m_temperature': 'temp_air',
19+
'surface_pressure': 'pressure',
20+
'surface_solar_radiation_downwards': 'ghi',
21+
'total_precipitation': 'precipitation',
22+
}
23+
24+
25+
def same(x):
26+
return x
27+
28+
29+
def k_to_c(temp_k):
30+
return temp_k - 273.15
31+
32+
33+
def j_to_w(j):
34+
return j / 3600
35+
36+
37+
def m_to_cm(m):
38+
return m / 100
39+
40+
UNITS = {
41+
'u100': same,
42+
'v100': same,
43+
'u10': same,
44+
'v10': same,
45+
'd2m': k_to_c,
46+
't2m': k_to_c,
47+
'msl': same,
48+
'sst': k_to_c,
49+
'skt': k_to_c,
50+
'sp': same,
51+
'ssrd': j_to_w,
52+
'strd': j_to_w,
53+
'tp': m_to_cm,
54+
}
55+
56+
57+
def get_era5(latitude, longitude, start, end, variables, api_key,
58+
map_variables=True, timeout=60,
59+
url='https://cds.climate.copernicus.eu/api/retrieve/v1/'):
60+
"""
61+
Retrieve ERA5 reanalysis data from the ECMWF's Copernicus Data Store.
62+
63+
This API [1]_ provides a subset of the full ERA5 dataset. See [2]_ for
64+
the available variables. Data are available on a 0.25° x 0.25° grid.
65+
66+
Parameters
67+
----------
68+
latitude : float
69+
In decimal degrees, north is positive (ISO 19115).
70+
longitude: float
71+
In decimal degrees, east is positive (ISO 19115).
72+
start : datetime like or str
73+
First day of the requested period.
74+
end : datetime like or str
75+
Last day of the requested period.
76+
variables : list of str
77+
List of variable names to retrieve. See [1]_ for options.
78+
api_key : str
79+
ECMWF API key.
80+
map_variables : bool, default True
81+
When true, renames columns of the DataFrame to pvlib variable names
82+
where applicable. See variable :const:`VARIABLE_MAP`.
83+
timeout : int, default 60
84+
Number of seconds to wait for the requested data to become available
85+
before timeout.
86+
url : str, optional
87+
API endpoint URL.
88+
89+
Raises
90+
------
91+
Exception
92+
If ``timeout`` is reached without the job finishing.
93+
94+
Returns
95+
-------
96+
data : pd.DataFrame
97+
Time series data. The index corresponds to the start of the interval.
98+
meta : dict
99+
Metadata.
100+
101+
References
102+
----------
103+
.. [1] https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=overview
104+
.. [2] https://confluence.ecmwf.int/pages/viewpage.action?pageId=505390919
105+
""" # noqa: E501
106+
start = pd.to_datetime(start).strftime("%Y-%m-%d")
107+
end = pd.to_datetime(end).strftime("%Y-%m-%d")
108+
109+
headers = {'PRIVATE-TOKEN': api_key}
110+
111+
# allow variables to be specified with pvlib names
112+
reverse_map = {v: k for k, v in VARIABLE_MAP.items()}
113+
variables = [reverse_map.get(k, k) for k in variables]
114+
115+
# Step 1: submit data request (add it to the queue)
116+
params = {
117+
"inputs": {
118+
"variable": variables,
119+
"location": {"longitude": longitude, "latitude": latitude},
120+
"date": [f"{start}/{end}"],
121+
"data_format": "csv"
122+
}
123+
}
124+
slug = "processes/reanalysis-era5-single-levels-timeseries/execution"
125+
response = requests.post(url + slug, json=params, headers=headers)
126+
submission_response = response.json()
127+
job_id = submission_response['jobID']
128+
129+
# Step 2: poll until the data request is ready
130+
slug = "jobs/" + job_id
131+
poll_interval = 1
132+
num_polls = 0
133+
while True:
134+
response = requests.get(url + slug, headers=headers)
135+
poll_response = response.json()
136+
job_status = poll_response['status']
137+
138+
if job_status == 'successful':
139+
break # ready to proceed to next step
140+
elif job_status == 'failed':
141+
msg = (
142+
'Request failed. Please check the ECMWF website for details: '
143+
'https://cds.climate.copernicus.eu/requests?tab=all'
144+
)
145+
raise Exception(msg)
146+
147+
num_polls += 1
148+
if num_polls * poll_interval > timeout:
149+
raise Exception(
150+
'Request timed out. Try increasing the timeout parameter or '
151+
'reducing the request size.'
152+
)
153+
154+
time.sleep(1)
155+
156+
# Step 3: get the download link for our requested dataset
157+
slug = "jobs/" + job_id + "/results"
158+
response = requests.get(url + slug, headers=headers)
159+
results_response = response.json()
160+
download_url = results_response['asset']['value']['href']
161+
162+
# Step 4: finally, download our dataset. it's a zipfile of one CSV
163+
response = requests.get(download_url)
164+
zipbuffer = BytesIO(response.content)
165+
archive = zipfile.ZipFile(zipbuffer)
166+
filename = archive.filelist[0].filename
167+
csvbuffer = StringIO(archive.read(filename).decode('utf-8'))
168+
df = pd.read_csv(csvbuffer)
169+
170+
# and parse into the usual formats
171+
metadata = submission_response['metadata'] # include messages from ECMWF
172+
metadata['jobID'] = job_id
173+
if not df.empty:
174+
metadata['latitude'] = df['latitude'].values[0]
175+
metadata['longitude'] = df['longitude'].values[0]
176+
177+
df.index = pd.to_datetime(df['valid_time']).dt.tz_localize('UTC')
178+
df = df.drop(columns=['valid_time', 'latitude', 'longitude'])
179+
180+
if map_variables:
181+
# convert units and rename
182+
for shortname in df.columns:
183+
converter = UNITS[shortname]
184+
df[shortname] = converter(df[shortname])
185+
df = df.rename(columns=VARIABLE_MAP)
186+
187+
return df, metadata

tests/conftest.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,19 @@ def nrel_api_key():
130130
reason='requires solaranywhere credentials')
131131

132132

133+
try:
134+
# Attempt to load ECMWF API key used for testing
135+
# pvlib.iotools.get_era5
136+
ecwmf_api_key = os.environ["ECMWF_API_KEY"]
137+
has_ecmwf_credentials = True
138+
except KeyError:
139+
has_ecmwf_credentials = False
140+
141+
requires_ecmwf_credentials = pytest.mark.skipif(
142+
not has_solaranywhere_credentials,
143+
reason='requires ECMWF credentials')
144+
145+
133146
try:
134147
import statsmodels # noqa: F401
135148
has_statsmodels = True

tests/iotools/test_ecmwf.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""
2+
tests for pvlib/iotools/ecmwf.py
3+
"""
4+
5+
import pandas as pd
6+
import pytest
7+
import pvlib
8+
import os
9+
from tests.conftest import RERUNS, RERUNS_DELAY, requires_ecmwf_credentials
10+
11+
12+
@pytest.fixture
13+
def params():
14+
api_key = os.environ["ECMWF_API_KEY"]
15+
16+
return {
17+
'latitude': 40.01, 'longitude': -80.01,
18+
'start': '2020-06-01', 'end': '2020-06-02',
19+
'variables': ['ghi', 'temp_air'],
20+
'api_key': api_key,
21+
}
22+
23+
24+
@pytest.fixture
25+
def expected():
26+
index = pd.date_range("2020-06-01 00:00", "2020-06-01 23:59", freq="h",
27+
tz="UTC")
28+
index.name = 'valid_time'
29+
temp_air = [16.6, 15.2, 13.5, 11.2, 10.8, 9.1, 7.3, 6.8, 7.6, 7.4, 8.5,
30+
8.1, 9.8, 11.5, 14.1, 17.4, 18.3, 20., 20.7, 20.9, 21.5,
31+
21.6, 21., 20.7]
32+
ghi = [153., 18.4, 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 229.5,
33+
427.8, 620.1, 785.5, 910.1, 984.2, 1005.9, 962.4, 844.1, 685.2,
34+
526.9, 331.4]
35+
df = pd.DataFrame({'temp_air': temp_air, 'ghi': ghi}, index=index)
36+
return df
37+
38+
39+
@requires_ecmwf_credentials
40+
@pytest.mark.remote_data
41+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
42+
def test_get_era5(params, expected):
43+
df, meta = pvlib.iotools.get_era5(**params)
44+
pd.testing.assert_frame_equal(df, expected, check_freq=False)
45+
assert meta['longitude'] == -80.0
46+
assert meta['latitude'] == 40.0
47+
assert isinstance(meta['jobID'], str)
48+
49+
50+
@requires_ecmwf_credentials
51+
@pytest.mark.remote_data
52+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
53+
def test_get_era5_map_variables(params, expected):
54+
df, meta = pvlib.iotools.get_era5(**params, map_variables=False)
55+
expected = expected.rename(columns={'temp_air': 't2m', 'ghi': 'ssrd'})
56+
pd.testing.assert_frame_equal(df, expected, check_freq=False)
57+
assert meta['longitude'] == -80.0
58+
assert meta['latitude'] == 40.0
59+
assert isinstance(meta['jobID'], str)
60+
61+
62+
@requires_ecmwf_credentials
63+
@pytest.mark.remote_data
64+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
65+
def test_get_era5_error(params):
66+
params['variables'] = ['nonexistent']
67+
match = 'Request failed. Please check the ECMWF website'
68+
with pytest.raises(Exception, match=match):
69+
df, meta = pvlib.iotools.get_era5(**params)
70+
71+
72+
@requires_ecmwf_credentials
73+
@pytest.mark.remote_data
74+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
75+
def test_get_era5_timeout(params):
76+
match = 'Request failed. Please check the ECMWF website'
77+
with pytest.raises(Exception, match=match):
78+
df, meta = pvlib.iotools.get_era5(**params, timeout=1)

0 commit comments

Comments
 (0)