forked from pvlib/pvlib-python
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpsm3.py
More file actions
365 lines (325 loc) · 13.3 KB
/
psm3.py
File metadata and controls
365 lines (325 loc) · 13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
"""
Get PSM3 TMY
see https://developer.nrel.gov/docs/solar/nsrdb/psm3_data_download/
"""
import io
import requests
import pandas as pd
from json import JSONDecodeError
from pvlib._deprecation import deprecated
from pvlib import tools
NSRDB_API_BASE = "https://developer.nrel.gov"
PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-2-2-download.csv"
TMY_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-tmy-download.csv"
PSM5MIN_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-5min-download.csv"
ATTRIBUTES = (
'air_temperature', 'dew_point', 'dhi', 'dni', 'ghi', 'surface_albedo',
'surface_pressure', 'wind_direction', 'wind_speed')
PVLIB_PYTHON = 'pvlib python'
# Dictionary mapping PSM3 response names to pvlib names
VARIABLE_MAP = {
'GHI': 'ghi',
'DHI': 'dhi',
'DNI': 'dni',
'Clearsky GHI': 'ghi_clear',
'Clearsky DHI': 'dhi_clear',
'Clearsky DNI': 'dni_clear',
'Solar Zenith Angle': 'solar_zenith',
'Temperature': 'temp_air',
'Dew Point': 'temp_dew',
'Relative Humidity': 'relative_humidity',
'Pressure': 'pressure',
'Wind Speed': 'wind_speed',
'Wind Direction': 'wind_direction',
'Surface Albedo': 'albedo',
'Precipitable Water': 'precipitable_water',
}
# Dictionary mapping pvlib names to PSM3 request names
# Note, PSM3 uses different names for the same variables in the
# response and the request
REQUEST_VARIABLE_MAP = {
'ghi': 'ghi',
'dhi': 'dhi',
'dni': 'dni',
'ghi_clear': 'clearsky_ghi',
'dhi_clear': 'clearsky_dhi',
'dni_clear': 'clearsky_dni',
'solar_zenith': 'solar_zenith_angle',
'temp_air': 'air_temperature',
'temp_dew': 'dew_point',
'relative_humidity': 'relative_humidity',
'pressure': 'surface_pressure',
'wind_speed': 'wind_speed',
'wind_direction': 'wind_direction',
'albedo': 'surface_albedo',
'precipitable_water': 'total_precipitable_water',
}
def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
attributes=ATTRIBUTES, leap_day=True, full_name=PVLIB_PYTHON,
affiliation=PVLIB_PYTHON, map_variables=True, url=None,
timeout=30):
"""
Retrieve NSRDB PSM3 timeseries weather data from the PSM3 API. The NSRDB
is described in [1]_ and the PSM3 API is described in [2]_, [3]_, and [4]_.
.. versionchanged:: 0.9.0
The function now returns a tuple where the first element is a dataframe
and the second element is a dictionary containing metadata. Previous
versions of this function had the return values switched.
.. versionchanged:: 0.10.0
The default endpoint for hourly single-year datasets is now v3.2.2.
The previous datasets can still be accessed (for now) by setting
the ``url`` parameter to the original API endpoint
(``"https://developer.nrel.gov/api/nsrdb/v2/solar/psm3-download.csv"``).
Parameters
----------
latitude : float or int
in decimal degrees, between -90 and 90, north is positive
longitude : float or int
in decimal degrees, between -180 and 180, east is positive
api_key : str
NREL Developer Network API key
email : str
NREL API uses this to automatically communicate messages back
to the user only if necessary
names : str, default 'tmy'
PSM3 API parameter specifing year (e.g. ``2020``) or TMY variant
to download (e.g. ``'tmy'`` or ``'tgy-2019'``). The allowed values
update periodically, so consult the NSRDB references below for the
current set of options.
interval : int, {60, 5, 15, 30}
interval size in minutes, must be 5, 15, 30 or 60. Must be 60 for
typical year requests (i.e., tmy/tgy/tdy).
attributes : list of str, optional
meteorological fields to fetch. If not specified, defaults to
``pvlib.iotools.psm3.ATTRIBUTES``. See references [2]_, [3]_, and [4]_
for lists of available fields. Alternatively, pvlib names may also be
used (e.g. 'ghi' rather than 'GHI'); see :const:`REQUEST_VARIABLE_MAP`.
To retrieve all available fields, set ``attributes=[]``.
leap_day : bool, default : True
include leap day in the results. Only used for single-year requests
(i.e., it is ignored for tmy/tgy/tdy requests).
full_name : str, default 'pvlib python'
optional
affiliation : str, default 'pvlib python'
optional
map_variables : bool, default True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
url : str, optional
API endpoint URL. If not specified, the endpoint is determined from
the ``names`` and ``interval`` parameters.
timeout : int, default 30
time in seconds to wait for server response before timeout
Returns
-------
data : pandas.DataFrame
timeseries data from NREL PSM3
metadata : dict
metadata from NREL PSM3 about the record, see
:func:`pvlib.iotools.read_psm3` for fields
Raises
------
requests.HTTPError
if the request response status is not ok, then the ``'errors'`` field
from the JSON response or any error message in the content will be
raised as an exception, for example if the `api_key` was rejected or if
the coordinates were not found in the NSRDB
Notes
-----
The required NREL developer key, `api_key`, is available for free by
registering at the `NREL Developer Network <https://developer.nrel.gov/>`_.
.. warning:: The "DEMO_KEY" `api_key` is severely rate limited and may
result in rejected requests.
.. warning:: PSM3 is limited to data found in the NSRDB, please consult the
references below for locations with available data. Additionally,
querying data with < 30-minute resolution uses a different API endpoint
with fewer available fields (see [4]_).
See Also
--------
pvlib.iotools.read_psm3
References
----------
.. [1] `NREL National Solar Radiation Database (NSRDB)
<https://nsrdb.nrel.gov/>`_
.. [2] `Physical Solar Model (PSM) v3.2.2
<https://developer.nrel.gov/docs/solar/nsrdb/psm3-2-2-download/>`_
.. [3] `Physical Solar Model (PSM) v3 TMY
<https://developer.nrel.gov/docs/solar/nsrdb/psm3-tmy-download/>`_
.. [4] `Physical Solar Model (PSM) v3 - Five Minute Temporal Resolution
<https://developer.nrel.gov/docs/solar/nsrdb/psm3-5min-download/>`_
"""
# The well know text (WKT) representation of geometry notation is strict.
# A POINT object is a string with longitude first, then the latitude, with
# four decimals each, and exactly one space between them.
longitude = ('%9.4f' % longitude).strip()
latitude = ('%8.4f' % latitude).strip()
# TODO: make format_WKT(object_type, *args) in tools.py
# convert to string to accomodate integer years being passed in
names = str(names)
# convert pvlib names in attributes to psm3 convention
attributes = [REQUEST_VARIABLE_MAP.get(a, a) for a in attributes]
# required query-string parameters for request to PSM3 API
params = {
'api_key': api_key,
'full_name': full_name,
'email': email,
'affiliation': affiliation,
'reason': PVLIB_PYTHON,
'mailing_list': 'false',
'wkt': 'POINT(%s %s)' % (longitude, latitude),
'names': names,
'attributes': ','.join(attributes),
'leap_day': str(leap_day).lower(),
'utc': 'false',
'interval': interval
}
# request CSV download from NREL PSM3
if url is None:
# determine the endpoint that suits the user inputs
if any(prefix in names for prefix in ('tmy', 'tgy', 'tdy')):
url = TMY_URL
elif interval in (5, 15):
url = PSM5MIN_URL
else:
url = PSM_URL
response = requests.get(url, params=params, timeout=timeout)
if not response.ok:
# if the API key is rejected, then the response status will be 403
# Forbidden, and then the error is in the content and there is no JSON
try:
errors = response.json()['errors']
except JSONDecodeError:
errors = response.content.decode('utf-8')
raise requests.HTTPError(errors, response=response)
# the CSV is in the response content as a UTF-8 bytestring
# to use pandas we need to create a file buffer from the response
fbuf = io.StringIO(response.content.decode('utf-8'))
return read_psm3(fbuf, map_variables)
def read_psm3(filename, map_variables=True):
"""
Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
is described in [1]_ and the SAM CSV format is described in [2]_.
.. versionchanged:: 0.9.0
The function now returns a tuple where the first element is a dataframe
and the second element is a dictionary containing metadata. Previous
versions of this function had the return values switched.
Parameters
----------
filename: str, path-like, or buffer
Filename or in-memory buffer of a file containing data to read.
map_variables: bool, default True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
Returns
-------
data : pandas.DataFrame
timeseries data from NREL PSM3
metadata : dict
metadata from NREL PSM3 about the record, see notes for fields
Notes
-----
The return is a tuple with two items. The first item is a dataframe with
the PSM3 timeseries data.
The second item is a dictionary with metadata from NREL PSM3 about the
record containing the following fields:
* Source
* Location ID
* City
* State
* Country
* Latitude
* Longitude
* Time Zone
* Elevation
* Local Time Zone
* Clearsky DHI Units
* Clearsky DNI Units
* Clearsky GHI Units
* Dew Point Units
* DHI Units
* DNI Units
* GHI Units
* Solar Zenith Angle Units
* Temperature Units
* Pressure Units
* Relative Humidity Units
* Precipitable Water Units
* Wind Direction Units
* Wind Speed Units
* Cloud Type -15
* Cloud Type 0
* Cloud Type 1
* Cloud Type 2
* Cloud Type 3
* Cloud Type 4
* Cloud Type 5
* Cloud Type 6
* Cloud Type 7
* Cloud Type 8
* Cloud Type 9
* Cloud Type 10
* Cloud Type 11
* Cloud Type 12
* Fill Flag 0
* Fill Flag 1
* Fill Flag 2
* Fill Flag 3
* Fill Flag 4
* Fill Flag 5
* Surface Albedo Units
* Version
Examples
--------
>>> # Read a local PSM3 file:
>>> df, metadata = iotools.read_psm3("data.csv") # doctest: +SKIP
>>> # Read a file object or an in-memory buffer:
>>> with open(filename, 'r') as f: # doctest: +SKIP
... df, metadata = iotools.read_psm3(f) # doctest: +SKIP
See Also
--------
pvlib.iotools.get_psm3
References
----------
.. [1] `NREL National Solar Radiation Database (NSRDB)
<https://nsrdb.nrel.gov/>`_
.. [2] `Standard Time Series Data File Format
<https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
"""
with tools._file_context_manager(filename) as fbuf:
# The first 2 lines of the response are headers with metadata
metadata_fields = fbuf.readline().split(',')
metadata_values = fbuf.readline().split(',')
# get the column names so we can set the dtypes
columns = fbuf.readline().split(',')
columns[-1] = columns[-1].strip() # strip trailing newline
# Since the header has so many columns, excel saves blank cols in the
# data below the header lines.
columns = [col for col in columns if col != '']
dtypes = dict.fromkeys(columns, float) # all floats except datevec
dtypes.update({'Year': int, 'Month': int, 'Day': int, 'Hour': int,
'Minute': int, 'Cloud Type': int, 'Fill Flag': int})
data = pd.read_csv(
fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
delimiter=',', lineterminator='\n') # skip carriage returns \r
metadata_fields[-1] = metadata_fields[-1].strip() # trailing newline
metadata_values[-1] = metadata_values[-1].strip() # trailing newline
metadata = dict(zip(metadata_fields, metadata_values))
# the response is all strings, so set some metadata types to numbers
metadata['Local Time Zone'] = int(metadata['Local Time Zone'])
metadata['Time Zone'] = int(metadata['Time Zone'])
metadata['Latitude'] = float(metadata['Latitude'])
metadata['Longitude'] = float(metadata['Longitude'])
metadata['Elevation'] = int(metadata['Elevation'])
# the response 1st 5 columns are a date vector, convert to datetime
dtidx = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
# in USA all timezones are integers
tz = 'Etc/GMT%+d' % -metadata['Time Zone']
data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)
if map_variables:
data = data.rename(columns=VARIABLE_MAP)
metadata['latitude'] = metadata.pop('Latitude')
metadata['longitude'] = metadata.pop('Longitude')
metadata['altitude'] = metadata.pop('Elevation')
return data, metadata
parse_psm3 = deprecated(since="0.12.1", name="parse_psm3",
alternative="read_psm3")(read_psm3)