forked from pvlib/pvlib-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbsrn.py
More file actions
459 lines (405 loc) · 21.2 KB
/
bsrn.py
File metadata and controls
459 lines (405 loc) · 21.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
"""Functions to read data from the Baseline Surface Radiation Network (BSRN).
.. codeauthor:: Adam R. Jensen<adam-r-j@hotmail.com>
"""
import pandas as pd
import gzip
import ftplib
import warnings
import io
import os
BSRN_FTP_URL = "ftp.bsrn.awi.de"
BSRN_LR0100_COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27),
(27, 32), (32, 39), (39, 45), (45, 50), (50, 55),
(55, 64), (64, 70), (70, 75)]
BSRN_LR0300_COL_SPECS = [(1, 3), (4, 9), (10, 16), (16, 22), (22, 27),
(27, 31), (31, 38), (38, 44), (44, 49), (49, 54),
(54, 61), (61, 67), (67, 72), (72, 78)]
BSRN_LR0500_COL_SPECS = [(0, 3), (3, 8), (8, 14), (14, 20), (20, 26), (26, 32),
(32, 38), (38, 44), (44, 50), (50, 56), (56, 62),
(62, 68), (68, 74), (74, 80)]
BSRN_LR0100_COLUMNS = ['day', 'minute',
'ghi', 'ghi_std', 'ghi_min', 'ghi_max',
'dni', 'dni_std', 'dni_min', 'dni_max',
'empty', 'empty', 'empty', 'empty', 'empty',
'dhi', 'dhi_std', 'dhi_min', 'dhi_max',
'lwd', 'lwd_std', 'lwd_min', 'lwd_max',
'temp_air', 'relative_humidity', 'pressure']
BSRN_LR0300_COLUMNS = ['day', 'minute', 'gri', 'gri_std', 'gri_min', 'gri_max',
'lwu', 'lwu_std', 'lwu_min', 'lwu_max', 'net_radiation',
'net_radiation_std', 'net_radiation_min',
'net_radiation_max']
BSRN_LR0500_COLUMNS = ['day', 'minute', 'uva_global', 'uva_global_std',
'uva_global_min', 'uva_global_max', 'uvb_direct',
'uvb_direct_std', 'uvb_direct_min', 'uvb_direct_max',
'empty', 'empty', 'empty', 'empty',
'uvb_global', 'uvb_global_std', 'uvb_global_min',
'uvb_global_max', 'uvb_diffuse', 'uvb_diffuse_std',
'uvb_diffuse', 'uvb_diffuse_std',
'uvb_diffuse_min', 'uvb_diffuse_max',
'uvb_reflected', 'uvb_reflected_std',
'uvb_reflected_min', 'uvb_reflected_max']
BSRN_COLUMNS = {'0100': BSRN_LR0100_COLUMNS, '0300': BSRN_LR0300_COLUMNS,
'0500': BSRN_LR0500_COLUMNS}
def _empty_dataframe_from_logical_records(logical_records):
# Create an empty DataFrame with the column names corresponding to the
# requested logical records
columns = []
for lr in logical_records:
columns += BSRN_COLUMNS[lr][2:]
columns = [c for c in columns if c != 'empty']
return pd.DataFrame(columns=columns)
def get_bsrn(station, start, end, username, password,
logical_records=('0100',), save_path=None):
"""
Retrieve ground measured irradiance data from the BSRN FTP server.
The BSRN (Baseline Surface Radiation Network) is a world wide network
of high-quality solar radiation monitoring stations as described in [1]_.
Data is retrieved from the BSRN FTP server [2]_.
Data is returned for the entire months between and including start and end.
Parameters
----------
station: str
3-letter BSRN station abbreviation
start: datetime-like
First day of the requested period
end: datetime-like
Last day of the requested period
username: str
username for accessing the BSRN FTP server
password: str
password for accessing the BSRN FTP server
logical_records: list or tuple, default: ('0100',)
List of the logical records (LR) to parse. Options include: '0100',
'0300', and '0500'.
save_path: str or path-like, optional
If specified, a directory path of where to save files.
Returns
-------
data: DataFrame
timeseries data from the BSRN archive, see
:func:`pvlib.iotools.read_bsrn` for fields. An empty DataFrame is
returned if no data was found for the time period.
metadata: dict
metadata for the last available monthly file.
Raises
------
KeyError
If the specified station does not exist on the FTP server.
Warns
-----
UserWarning
If one or more requested files are missing a UserWarning is returned
with a list of the filenames missing. If no files match the specified
station and timeframe a seperate UserWarning is given.
Notes
-----
The username and password for the BSRN FTP server can be obtained for free
as described in the BSRN's Data Release Guidelines [3]_.
Currently only parsing of logical records 0100, 0300 and 0500 is supported.
Note not all stations measure LR0300 and LR0500. However, LR0100 is
mandatory as it contains the basic irradiance and auxillary measurements.
See [4]_ for a description of the different logical records. Future updates
may include parsing of additional data and metadata.
Important
---------
While data from the BSRN is generally of high-quality, measurement data
should always be quality controlled before usage!
Examples
--------
>>> # Retrieve two months irradiance data from the Cabauw BSRN station
>>> data, metadata = pvlib.iotools.get_bsrn( # doctest: +SKIP
>>> start=pd.Timestamp(2020,1,1), end=pd.Timestamp(2020,12,1), # doctest: +SKIP
>>> station='cab', username='yourusername', password='yourpassword') # doctest: +SKIP
See Also
--------
pvlib.iotools.read_bsrn, pvlib.iotools.parse_bsrn
References
----------
.. [1] `World Radiation Monitoring Center - Baseline Surface Radiation
Network (BSRN)
<https://bsrn.awi.de/>`_
.. [2] `BSRN Data Retrieval via FTP
<https://bsrn.awi.de/data/data-retrieval-via-ftp/>`_
.. [4] `BSRN Data Release Guidelines
<https://bsrn.awi.de/data/conditions-of-data-release/>`_
.. [3] `Update of the Technical Plan for BSRN Data Management, 2013,
Global Climate Observing System (GCOS) GCOS-174.
<https://bsrn.awi.de/fileadmin/user_upload/bsrn.awi.de/Publications/gcos-174.pdf>`_
""" # noqa: E501
# The FTP server uses lowercase station abbreviations
station = station.lower()
# Generate list files to download based on start/end (SSSMMYY.dat.gz)
filenames = pd.date_range(
start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M')\
.strftime(f"{station}%m%y.dat.gz").tolist()
# Create FTP connection
with ftplib.FTP(BSRN_FTP_URL, username, password) as ftp:
# Change to station sub-directory (checks that the station exists)
try:
ftp.cwd(f'/{station}')
except ftplib.error_perm as e:
raise KeyError('Station sub-directory does not exist. Specified '
'station is probably not a proper three letter '
'station abbreviation.') from e
dfs = [] # Initialize list for monthly dataframes
non_existing_files = [] # Initilize list of files that were not found
for filename in filenames:
try:
bio = io.BytesIO() # Initialize BytesIO object
# Retrieve binary file from server and write to BytesIO object
response = ftp.retrbinary(f'RETR {filename}', bio.write)
# Check that transfer was successfull
if not response.startswith('226 Transfer complete'):
raise ftplib.Error(response)
# Save file locally if save_path is specified
if save_path is not None:
# Create local file
with open(os.path.join(save_path, filename), 'wb') as f:
f.write(bio.getbuffer()) # Write local file
# Open gzip file and convert to StringIO
bio.seek(0) # reset buffer to start of file
gzip_file = io.TextIOWrapper(gzip.GzipFile(fileobj=bio),
encoding='latin1')
dfi, metadata = parse_bsrn(gzip_file, logical_records)
dfs.append(dfi)
# FTP client raises an error if the file does not exist on server
except ftplib.error_perm as e:
if str(e) == '550 Failed to open file.':
non_existing_files.append(filename)
else:
raise ftplib.error_perm(e)
ftp.quit() # Close and exit FTP connection
# Raise user warnings
if not dfs: # If no files were found
warnings.warn('No files were available for the specified timeframe.')
elif non_existing_files: # If only some files were missing
warnings.warn(f'The following files were not found: {non_existing_files}') # noqa: E501
# Concatenate monthly dataframes to one dataframe
if len(dfs):
data = pd.concat(dfs, axis='rows')
else: # Return empty dataframe
data = _empty_dataframe_from_logical_records(logical_records)
metadata = {}
# Return dataframe and metadata (metadata belongs to last available file)
return data, metadata
def parse_bsrn(fbuf, logical_records=('0100',)):
"""
Parse a file-like buffer of a BSRN station-to-archive file.
Parameters
----------
fbuf: file-like buffer
Buffer of a BSRN station-to-archive data file
logical_records: list or tuple, default: ('0100',)
List of the logical records (LR) to parse. Options include: '0100',
'0300', and '0500'.
Returns
-------
data: DataFrame
timeseries data from the BSRN archive, see
:func:`pvlib.iotools.read_bsrn` for fields. An empty DataFrame is
returned if the specified logical records were not found.
metadata: dict
Dictionary containing metadata (primarily from LR0004).
See Also
--------
pvlib.iotools.read_bsrn, pvlib.iotools.get_bsrn
"""
# Parse metadata
fbuf.readline() # first line should be *U0001, so read it and discard
date_line = fbuf.readline() # second line contains important metadata
start_date = pd.Timestamp(year=int(date_line[7:11]),
month=int(date_line[3:6]), day=1,
tz='UTC') # BSRN timestamps are UTC
metadata = {} # Initilize dictionary containing metadata
metadata['start date'] = start_date
metadata['station identification number'] = int(date_line[:3])
metadata['version of data'] = int(date_line.split()[-1])
for line in fbuf:
if line[2:6] == '0004': # stop once LR0004 has been reached
break
elif line == '':
raise ValueError('Mandatory record LR0004 not found.')
metadata['date when station description changed'] = fbuf.readline().strip()
metadata['surface type'] = int(fbuf.readline(3))
metadata['topography type'] = int(fbuf.readline())
metadata['address'] = fbuf.readline().strip()
metadata['telephone no. of station'] = fbuf.readline(20).strip()
metadata['FAX no. of station'] = fbuf.readline().strip()
metadata['TCP/IP no. of station'] = fbuf.readline(15).strip()
metadata['e-mail address of station'] = fbuf.readline().strip()
metadata['latitude_bsrn'] = float(fbuf.readline(8)) # BSRN convention
metadata['latitude'] = metadata['latitude_bsrn'] - 90 # ISO 19115
metadata['longitude_bsrn'] = float(fbuf.readline(8)) # BSRN convention
metadata['longitude'] = metadata['longitude_bsrn'] - 180 # ISO 19115
metadata['altitude'] = int(fbuf.readline(5))
metadata['identification of "SYNOP" station'] = fbuf.readline().strip()
metadata['date when horizon changed'] = fbuf.readline().strip()
# Pass last section of LR0004 containing the horizon elevation data
horizon = [] # list for raw horizon elevation data
while True:
line = fbuf.readline()
if ('*' in line) | (line == ''):
break
else:
horizon += [int(i) for i in line.split()]
horizon = pd.Series(horizon[1::2], horizon[::2], name='horizon_elevation',
dtype=int).drop(-1, errors='ignore').sort_index()
horizon.index.name = 'azimuth'
metadata['horizon'] = horizon
# Read file and store the starting line number and number of lines for
# each logical record (LR)
fbuf.seek(0) # reset buffer to start of file
lr_startrow = {} # Dictionary of starting line number for each LR
lr_nrows = {} # Dictionary of end line number for each LR
for num, line in enumerate(fbuf):
if line.startswith('*'): # Find start of all logical records
if len(lr_startrow) >= 1:
lr_nrows[lr] = num - lr_startrow[lr] - 1 # noqa: F821
lr = line[2:6] # string of 4 digit LR number
lr_startrow[lr] = num
lr_nrows[lr] = num - lr_startrow[lr]
for lr in logical_records:
if lr not in ['0100', '0300', '0500']:
raise ValueError(f"Logical record {lr} not in "
"['0100', '0300','0500'].")
dfs = [] # Initialize empty list for dataframe
# Parse LR0100 - basic measurements including GHI, DNI, DHI and temperature
if ('0100' in lr_startrow.keys()) & ('0100' in logical_records):
fbuf.seek(0) # reset buffer to start of file
LR_0100 = pd.read_fwf(fbuf, skiprows=lr_startrow['0100'] + 1,
nrows=lr_nrows['0100'], header=None,
colspecs=BSRN_LR0100_COL_SPECS,
na_values=[-999.0, -99.9])
# Create multi-index and unstack, resulting in 1 col for each variable
LR_0100 = LR_0100.set_index([LR_0100.index // 2, LR_0100.index % 2])
LR_0100 = LR_0100.unstack(level=1).swaplevel(i=0, j=1, axis='columns')
# Sort columns to match original order and assign column names
LR_0100 = LR_0100.reindex(sorted(LR_0100.columns), axis='columns')
LR_0100.columns = BSRN_LR0100_COLUMNS
# Set datetime index
LR_0100.index = (start_date+pd.to_timedelta(LR_0100['day']-1, unit='d')
+ pd.to_timedelta(LR_0100['minute'], unit='T'))
# Drop empty, minute, and day columns
LR_0100 = LR_0100.drop(columns=['empty', 'day', 'minute'])
dfs.append(LR_0100)
# Parse LR0300 - other time series data, including upward and net radiation
if ('0300' in lr_startrow.keys()) & ('0300' in logical_records):
fbuf.seek(0) # reset buffer to start of file
LR_0300 = pd.read_fwf(fbuf, skiprows=lr_startrow['0300']+1,
nrows=lr_nrows['0300'], header=None,
na_values=[-999.0, -99.9],
colspecs=BSRN_LR0300_COL_SPECS,
names=BSRN_LR0300_COLUMNS)
LR_0300.index = (start_date+pd.to_timedelta(LR_0300['day']-1, unit='d')
+ pd.to_timedelta(LR_0300['minute'], unit='T'))
LR_0300 = LR_0300.drop(columns=['day', 'minute']).astype(float)
dfs.append(LR_0300)
# Parse LR0500 - UV measurements
if ('0500' in lr_startrow.keys()) & ('0500' in logical_records):
fbuf.seek(0) # reset buffer to start of file
LR_0500 = pd.read_fwf(fbuf, skiprows=lr_startrow['0500']+1,
nrows=lr_nrows['0500'], na_values=[-99.9],
header=None, colspecs=BSRN_LR0500_COL_SPECS)
# Create multi-index and unstack, resulting in 1 col for each variable
LR_0500 = LR_0500.set_index([LR_0500.index // 2, LR_0500.index % 2])
LR_0500 = LR_0500.unstack(level=1).swaplevel(i=0, j=1, axis='columns')
# Sort columns to match original order and assign column names
LR_0500 = LR_0500.reindex(sorted(LR_0500.columns), axis='columns')
LR_0500.columns = BSRN_LR0500_COLUMNS
LR_0500.index = (start_date+pd.to_timedelta(LR_0500['day']-1, unit='d')
+ pd.to_timedelta(LR_0500['minute'], unit='T'))
LR_0500 = LR_0500.drop(columns=['empty', 'day', 'minute'])
dfs.append(LR_0500)
if len(dfs):
data = pd.concat(dfs, axis='columns')
else:
data = _empty_dataframe_from_logical_records(logical_records)
metadata = {}
return data, metadata
def read_bsrn(filename, logical_records=('0100',)):
"""
Read a BSRN station-to-archive file into a DataFrame.
The BSRN (Baseline Surface Radiation Network) is a world wide network
of high-quality solar radiation monitoring stations as described in [1]_.
The function is able to parse logical records (LR) 0100, 0300, and 0500.
LR0100 contains the basic measurements, which include global, diffuse, and
direct irradiance, as well as downwelling long-wave radiation [2]_. Future
updates may include parsing of additional data and metadata.
BSRN files are freely available and can be accessed via FTP [3]_. The
username and password for the BSRN FTP server can be obtained for free as
described in the BSRN's Data Release Guidelines [3]_.
Parameters
----------
filename: str or path-like
Name or path of a BSRN station-to-archive data file
logical_records: list or tuple, default: ('0100',)
List of the logical records (LR) to parse. Options include: '0100',
'0300', and '0500'.
Returns
-------
data: DataFrame
A DataFrame with the columns as described below. For a more extensive
description of the variables, consult [2]_. An empty DataFrame is
returned if the specified logical records were not found.
metadata: dict
Dictionary containing metadata (primarily from LR0004).
Notes
-----
The data DataFrame for LR0100 includes the following fields:
======================= ====== ==========================================
Key Format Description
======================= ====== ==========================================
**Logical record 0100**
---------------------------------------------------------------------------
ghi† float Mean global horizontal irradiance [W/m^2]
dni† float Mean direct normal irradiance [W/m^2]
dhi† float Mean diffuse horizontal irradiance [W/m^2]
lwd† float Mean. downward long-wave radiation [W/m^2]
temp_air float Air temperature [°C]
relative_humidity float Relative humidity [%]
pressure float Atmospheric pressure [hPa]
----------------------- ------ ------------------------------------------
**Logical record 0300**
---------------------------------------------------------------------------
gri† float Mean ground-reflected irradiance [W/m^2]
lwu† float Mean long-wave upwelling irradiance [W/m^2]
net_radiation† float Mean net radiation (net radiometer) [W/m^2]
----------------------- ------ ------------------------------------------
**Logical record 0500**
---------------------------------------------------------------------------
uva_global† float Mean UV-A global irradiance [W/m^2]
uvb_direct† float Mean UV-B direct irradiance [W/m^2]
uvb_global† float Mean UV-B global irradiance [W/m^2]
uvb_diffuse† float Mean UV-B diffuse irradiance [W/m^2]
uvb_reflected† float Mean UV-B reflected irradiance [W/m^2]
======================= ====== ==========================================
† Marked variables have corresponding columns for the standard deviation
(_std), minimum (_min), and maximum (_max) calculated from the 60 samples
that are average into each 1-minute measurement.
Hint
----
According to [2]_ "All time labels in the station-to-archive files denote
the start of a time interval." This corresponds to left bin edge labeling.
See Also
--------
pvlib.iotools.parse_bsrn, pvlib.iotools.get_bsrn
References
----------
.. [1] `World Radiation Monitoring Center - Baseline Surface Radiation
Network (BSRN)
<https://bsrn.awi.de/>`_
.. [2] `Update of the Technical Plan for BSRN Data Management, 2013,
Global Climate Observing System (GCOS) GCOS-174.
<https://bsrn.awi.de/fileadmin/user_upload/bsrn.awi.de/Publications/gcos-174.pdf>`_
.. [3] `BSRN Data Retrieval via FTP
<https://bsrn.awi.de/data/data-retrieval-via-ftp/>`_
.. [4] `BSRN Data Release Guidelines
<https://bsrn.awi.de/data/conditions-of-data-release/>`_
""" # noqa: E501
if str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file
open_func, mode = gzip.open, 'rt'
else:
open_func, mode = open, 'r'
with open_func(filename, mode) as f:
content = parse_bsrn(f, logical_records)
return content