pvlib-python/pvlib/iotools/psm3.py at ac14607048abc36ac4dcdcac5e827196a9e11082 · kandersolar/pvlib-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
"""
Get PSM3 TMY
see https://developer.nrel.gov/docs/solar/nsrdb/psm3_data_download/
"""

import io
import requests
import pandas as pd
from json import JSONDecodeError
from pvlib._deprecation import deprecated
from pvlib import tools

NSRDB_API_BASE = "https://developer.nrel.gov"
PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-2-2-download.csv"
TMY_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-tmy-download.csv"
PSM5MIN_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-5min-download.csv"

ATTRIBUTES = (
    'air_temperature', 'dew_point', 'dhi', 'dni', 'ghi', 'surface_albedo',
    'surface_pressure', 'wind_direction', 'wind_speed')
PVLIB_PYTHON = 'pvlib python'

# Dictionary mapping PSM3 response names to pvlib names
VARIABLE_MAP = {
    'GHI': 'ghi',
    'DHI': 'dhi',
    'DNI': 'dni',
    'Clearsky GHI': 'ghi_clear',
    'Clearsky DHI': 'dhi_clear',
    'Clearsky DNI': 'dni_clear',
    'Solar Zenith Angle': 'solar_zenith',
    'Temperature': 'temp_air',
    'Dew Point': 'temp_dew',
    'Relative Humidity': 'relative_humidity',
    'Pressure': 'pressure',
    'Wind Speed': 'wind_speed',
    'Wind Direction': 'wind_direction',
    'Surface Albedo': 'albedo',
    'Precipitable Water': 'precipitable_water',
}

# Dictionary mapping pvlib names to PSM3 request names
# Note, PSM3 uses different names for the same variables in the
# response and the request
REQUEST_VARIABLE_MAP = {
    'ghi': 'ghi',
    'dhi': 'dhi',
    'dni': 'dni',
    'ghi_clear': 'clearsky_ghi',
    'dhi_clear': 'clearsky_dhi',
    'dni_clear': 'clearsky_dni',
    'solar_zenith': 'solar_zenith_angle',
    'temp_air': 'air_temperature',
    'temp_dew': 'dew_point',
    'relative_humidity': 'relative_humidity',
    'pressure': 'surface_pressure',
    'wind_speed': 'wind_speed',
    'wind_direction': 'wind_direction',
    'albedo': 'surface_albedo',
    'precipitable_water': 'total_precipitable_water',
}


def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
             attributes=ATTRIBUTES, leap_day=True, full_name=PVLIB_PYTHON,
             affiliation=PVLIB_PYTHON, map_variables=True, url=None,
             timeout=30):
    """
    Retrieve NSRDB PSM3 timeseries weather data from the PSM3 API. The NSRDB
    is described in [1]_ and the PSM3 API is described in [2]_, [3]_, and [4]_.

    .. versionchanged:: 0.9.0
       The function now returns a tuple where the first element is a dataframe
       and the second element is a dictionary containing metadata. Previous
       versions of this function had the return values switched.

    .. versionchanged:: 0.10.0
       The default endpoint for hourly single-year datasets is now v3.2.2.
       The previous datasets can still be accessed (for now) by setting
       the ``url`` parameter to the original API endpoint
       (``"https://developer.nrel.gov/api/nsrdb/v2/solar/psm3-download.csv"``).

    Parameters
    ----------
    latitude : float or int
        in decimal degrees, between -90 and 90, north is positive
    longitude : float or int
        in decimal degrees, between -180 and 180, east is positive
    api_key : str
        NREL Developer Network API key
    email : str
        NREL API uses this to automatically communicate messages back
        to the user only if necessary
    names : str, default 'tmy'
        PSM3 API parameter specifing year (e.g. ``2020``) or TMY variant
        to download (e.g. ``'tmy'`` or ``'tgy-2019'``).  The allowed values
        update periodically, so consult the NSRDB references below for the
        current set of options.
    interval : int, {60, 5, 15, 30}
        interval size in minutes, must be 5, 15, 30 or 60. Must be 60 for
        typical year requests (i.e., tmy/tgy/tdy).
    attributes : list of str, optional
        meteorological fields to fetch. If not specified, defaults to
        ``pvlib.iotools.psm3.ATTRIBUTES``. See references [2]_, [3]_, and [4]_
        for lists of available fields. Alternatively, pvlib names may also be
        used (e.g. 'ghi' rather than 'GHI'); see :const:`REQUEST_VARIABLE_MAP`.
        To retrieve all available fields, set ``attributes=[]``.
    leap_day : bool, default : True
        include leap day in the results. Only used for single-year requests
        (i.e., it is ignored for tmy/tgy/tdy requests).
    full_name : str, default 'pvlib python'
        optional
    affiliation : str, default 'pvlib python'
        optional
    map_variables : bool, default True
        When true, renames columns of the Dataframe to pvlib variable names
        where applicable. See variable :const:`VARIABLE_MAP`.
    url : str, optional
        API endpoint URL.  If not specified, the endpoint is determined from
        the ``names`` and ``interval`` parameters.
    timeout : int, default 30
        time in seconds to wait for server response before timeout

    Returns
    -------
    data : pandas.DataFrame
        timeseries data from NREL PSM3
    metadata : dict
        metadata from NREL PSM3 about the record, see
        :func:`pvlib.iotools.read_psm3` for fields

    Raises
    ------
    requests.HTTPError
        if the request response status is not ok, then the ``'errors'`` field
        from the JSON response or any error message in the content will be
        raised as an exception, for example if the `api_key` was rejected or if
        the coordinates were not found in the NSRDB

    Notes
    -----
    The required NREL developer key, `api_key`, is available for free by
    registering at the `NREL Developer Network <https://developer.nrel.gov/>`_.

    .. warning:: The "DEMO_KEY" `api_key` is severely rate limited and may
        result in rejected requests.

    .. warning:: PSM3 is limited to data found in the NSRDB, please consult the
        references below for locations with available data. Additionally,
        querying data with < 30-minute resolution uses a different API endpoint
        with fewer available fields (see [4]_).

    See Also
    --------
    pvlib.iotools.read_psm3

    References
    ----------

    .. [1] `NREL National Solar Radiation Database (NSRDB)
       <https://nsrdb.nrel.gov/>`_
    .. [2] `Physical Solar Model (PSM) v3.2.2
       <https://developer.nrel.gov/docs/solar/nsrdb/psm3-2-2-download/>`_
    .. [3] `Physical Solar Model (PSM) v3 TMY
       <https://developer.nrel.gov/docs/solar/nsrdb/psm3-tmy-download/>`_
    .. [4] `Physical Solar Model (PSM) v3 - Five Minute Temporal Resolution
       <https://developer.nrel.gov/docs/solar/nsrdb/psm3-5min-download/>`_
    """
    # The well know text (WKT) representation of geometry notation is strict.
    # A POINT object is a string with longitude first, then the latitude, with
    # four decimals each, and exactly one space between them.
    longitude = ('%9.4f' % longitude).strip()
    latitude = ('%8.4f' % latitude).strip()
    # TODO: make format_WKT(object_type, *args) in tools.py

    # convert to string to accomodate integer years being passed in
    names = str(names)

    # convert pvlib names in attributes to psm3 convention
    attributes = [REQUEST_VARIABLE_MAP.get(a, a) for a in attributes]

    # required query-string parameters for request to PSM3 API
    params = {
        'api_key': api_key,
        'full_name': full_name,
        'email': email,
        'affiliation': affiliation,
        'reason': PVLIB_PYTHON,
        'mailing_list': 'false',
        'wkt': 'POINT(%s %s)' % (longitude, latitude),
        'names': names,
        'attributes':  ','.join(attributes),
        'leap_day': str(leap_day).lower(),
        'utc': 'false',
        'interval': interval
    }
    # request CSV download from NREL PSM3
    if url is None:
        # determine the endpoint that suits the user inputs
        if any(prefix in names for prefix in ('tmy', 'tgy', 'tdy')):
            url = TMY_URL
        elif interval in (5, 15):
            url = PSM5MIN_URL
        else:
            url = PSM_URL

    response = requests.get(url, params=params, timeout=timeout)
    if not response.ok:
        # if the API key is rejected, then the response status will be 403
        # Forbidden, and then the error is in the content and there is no JSON
        try:
            errors = response.json()['errors']
        except JSONDecodeError:
            errors = response.content.decode('utf-8')
        raise requests.HTTPError(errors, response=response)
    # the CSV is in the response content as a UTF-8 bytestring
    # to use pandas we need to create a file buffer from the response
    fbuf = io.StringIO(response.content.decode('utf-8'))
    return read_psm3(fbuf, map_variables)


def read_psm3(filename, map_variables=True):
    """
    Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
    is described in [1]_ and the SAM CSV format is described in [2]_.

    .. versionchanged:: 0.9.0
       The function now returns a tuple where the first element is a dataframe
       and the second element is a dictionary containing metadata. Previous
       versions of this function had the return values switched.

    Parameters
    ----------
    filename: str, path-like, or buffer
        Filename or in-memory buffer of a file containing data to read.
    map_variables: bool, default True
        When true, renames columns of the Dataframe to pvlib variable names
        where applicable. See variable :const:`VARIABLE_MAP`.

    Returns
    -------
    data : pandas.DataFrame
        timeseries data from NREL PSM3
    metadata : dict
        metadata from NREL PSM3 about the record, see notes for fields

    Notes
    -----
    The return is a tuple with two items. The first item is a dataframe with
    the PSM3 timeseries data.

    The second item is a dictionary with metadata from NREL PSM3 about the
    record containing the following fields:

    * Source
    * Location ID
    * City
    * State
    * Country
    * Latitude
    * Longitude
    * Time Zone
    * Elevation
    * Local Time Zone
    * Clearsky DHI Units
    * Clearsky DNI Units
    * Clearsky GHI Units
    * Dew Point Units
    * DHI Units
    * DNI Units
    * GHI Units
    * Solar Zenith Angle Units
    * Temperature Units
    * Pressure Units
    * Relative Humidity Units
    * Precipitable Water Units
    * Wind Direction Units
    * Wind Speed Units
    * Cloud Type -15
    * Cloud Type 0
    * Cloud Type 1
    * Cloud Type 2
    * Cloud Type 3
    * Cloud Type 4
    * Cloud Type 5
    * Cloud Type 6
    * Cloud Type 7
    * Cloud Type 8
    * Cloud Type 9
    * Cloud Type 10
    * Cloud Type 11
    * Cloud Type 12
    * Fill Flag 0
    * Fill Flag 1
    * Fill Flag 2
    * Fill Flag 3
    * Fill Flag 4
    * Fill Flag 5
    * Surface Albedo Units
    * Version

    Examples
    --------
    >>> # Read a local PSM3 file:
    >>> df, metadata = iotools.read_psm3("data.csv")  # doctest: +SKIP

    >>> # Read a file object or an in-memory buffer:
    >>> with open(filename, 'r') as f:  # doctest: +SKIP
    ...     df, metadata = iotools.read_psm3(f)  # doctest: +SKIP

    See Also
    --------
    pvlib.iotools.get_psm3

    References
    ----------
    .. [1] `NREL National Solar Radiation Database (NSRDB)
       <https://nsrdb.nrel.gov/>`_
    .. [2] `Standard Time Series Data File Format
       <https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
    """
    with tools._file_context_manager(filename) as fbuf:
        # The first 2 lines of the response are headers with metadata
        metadata_fields = fbuf.readline().split(',')
        metadata_values = fbuf.readline().split(',')
        # get the column names so we can set the dtypes
        columns = fbuf.readline().split(',')
        columns[-1] = columns[-1].strip()  # strip trailing newline
        # Since the header has so many columns, excel saves blank cols in the
        # data below the header lines.
        columns = [col for col in columns if col != '']
        dtypes = dict.fromkeys(columns, float)  # all floats except datevec
        dtypes.update({'Year': int, 'Month': int, 'Day': int, 'Hour': int,
                       'Minute': int, 'Cloud Type': int, 'Fill Flag': int})
        data = pd.read_csv(
            fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
            delimiter=',', lineterminator='\n')  # skip carriage returns \r

    metadata_fields[-1] = metadata_fields[-1].strip()  # trailing newline
    metadata_values[-1] = metadata_values[-1].strip()  # trailing newline
    metadata = dict(zip(metadata_fields, metadata_values))
    # the response is all strings, so set some metadata types to numbers
    metadata['Local Time Zone'] = int(metadata['Local Time Zone'])
    metadata['Time Zone'] = int(metadata['Time Zone'])
    metadata['Latitude'] = float(metadata['Latitude'])
    metadata['Longitude'] = float(metadata['Longitude'])
    metadata['Elevation'] = int(metadata['Elevation'])

    # the response 1st 5 columns are a date vector, convert to datetime
    dtidx = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
    # in USA all timezones are integers
    tz = 'Etc/GMT%+d' % -metadata['Time Zone']
    data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)

    if map_variables:
        data = data.rename(columns=VARIABLE_MAP)
        metadata['latitude'] = metadata.pop('Latitude')
        metadata['longitude'] = metadata.pop('Longitude')
        metadata['altitude'] = metadata.pop('Elevation')

    return data, metadata


parse_psm3 = deprecated(since="0.12.1", name="parse_psm3",
                        alternative="read_psm3")(read_psm3)