forked from pvlib/pvlib-python
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmerra2.py
More file actions
154 lines (124 loc) · 4.61 KB
/
merra2.py
File metadata and controls
154 lines (124 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import pandas as pd
import requests
from io import StringIO
VARIABLE_MAP = {
'SWGDN': 'ghi',
'SWGDNCLR': 'ghi_clear',
'ALBEDO': 'albedo',
'T2M': 'temp_air',
'T2MDEW': 'temp_dew',
'PS': 'pressure',
'TOTEXTTAU': 'aod550',
}
def get_merra2(latitude, longitude, start, end, username, password, dataset,
variables, map_variables=True):
"""
Retrieve MERRA-2 time-series irradiance and meteorological data from
NASA's GESDISC data archive.
MERRA-2 [1]_ offers modeled data for many atmospheric quantities at hourly
resolution on a 0.5° x 0.625° global grid.
Access must be granted to the GESDISC data archive before EarthData
credentials will work. See [2]_ for instructions.
Parameters
----------
latitude : float
In decimal degrees, north is positive (ISO 19115).
longitude: float
In decimal degrees, east is positive (ISO 19115).
start : datetime like or str
First timestamp of the requested period. If a timezone is not
specified, UTC is assumed.
end : datetime like or str
Last timestamp of the requested period. If a timezone is not
specified, UTC is assumed.
username : str
NASA EarthData username.
password : str
NASA EarthData password.
dataset : str
Dataset name (with version), e.g. "M2T1NXRAD.5.12.4".
variables : list of str
List of variable names to retrieve. See the documentation of the
specific dataset you are accessing for options.
map_variables : bool, default True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
Raises
------
ValueError
If ``start`` and ``end`` are in different years, when converted to UTC.
Returns
-------
data : pd.DataFrame
Time series data. The index corresponds to the middle of the interval.
meta : dict
Metadata.
Notes
-----
The following datasets provide quantities useful for PV modeling:
- M2T1NXRAD.5.12.4: SWGDN, SWGDNCLR, ALBEDO
- M2T1NXSLV.5.12.4: T2M, U10M, V10M, T2MDEW, PS
- M2T1NXAER.5.12.4: TOTEXTTAU
Note that MERRA2 does not currently provide DNI or DHI.
References
----------
.. [1] https://gmao.gsfc.nasa.gov/gmao-products/merra-2/
.. [2] https://disc.gsfc.nasa.gov/earthdata-login
"""
# general API info here:
# https://docs.unidata.ucar.edu/tds/5.0/userguide/netcdf_subset_service_ref.html # noqa: E501
def _to_utc_dt_notz(dt):
dt = pd.to_datetime(dt)
if dt.tzinfo is not None:
# convert to utc, then drop tz so that isoformat() is clean
dt = dt.tz_convert("UTC").tz_localize(None)
return dt
start = _to_utc_dt_notz(start)
end = _to_utc_dt_notz(end)
if (year := start.year) != end.year:
raise ValueError("start and end must be in the same year (in UTC)")
url = (
"https://goldsmr4.gesdisc.eosdis.nasa.gov/thredds/ncss/grid/"
f"MERRA2_aggregation/{dataset}/{dataset}_Aggregation_{year}.ncml"
)
parameters = {
'var': ",".join(variables),
'latitude': latitude,
'longitude': longitude,
'time_start': start.isoformat() + "Z",
'time_end': end.isoformat() + "Z",
'accept': 'csv',
}
auth = (username, password)
with requests.Session() as session:
session.auth = auth
login = session.request('get', url, params=parameters)
response = session.get(login.url, auth=auth, params=parameters)
response.raise_for_status()
content = response.content.decode('utf-8')
buffer = StringIO(content)
df = pd.read_csv(buffer)
df.index = pd.to_datetime(df['time'])
meta = {}
meta['dataset'] = dataset
meta['station'] = df['station'].values[0]
meta['latitude'] = df['latitude[unit="degrees_north"]'].values[0]
meta['longitude'] = df['longitude[unit="degrees_east"]'].values[0]
# drop the non-data columns
dropcols = ['time', 'station', 'latitude[unit="degrees_north"]',
'longitude[unit="degrees_east"]']
df = df.drop(columns=dropcols)
# column names are like T2M[unit="K"] by default. extract the unit
# for the metadata, then rename col to just T2M
units = {}
rename = {}
for col in df.columns:
name, _ = col.split("[", maxsplit=1)
unit = col.split('"')[1]
units[name] = unit
rename[col] = name
meta['units'] = units
df = df.rename(columns=rename)
if map_variables:
df = df.rename(columns=VARIABLE_MAP)
return df, meta