-
Notifications
You must be signed in to change notification settings - Fork 0
/
nwis.py
180 lines (155 loc) · 5.53 KB
/
nwis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from __future__ import division, print_function
import datetime
import json
import numpy as np
import pandas as pd
import requests
def nwis_json(
site="01646500",
parm="00065",
start=None,
end=None,
period=None,
freq="iv",
xarray=False,
):
"""Obtain NWIS data via JSON.
Parameters
----------
site : str, optional
NWIS site code. Default '01646500'
parm : str, optional
Parameter code. Default '00065'
e.g. '00065' for water level, '00060' for discharge.
freq : str, optional
Data frequency. Valid values are:
- 'iv' for unit-value data (default)
- 'dv' for daily average data
start : str, optional
end : str, optional
Start and end dates in ISO-8601 format (e.g. '2016-07-01').
If specifying start and end, do not specify period.
period : str, optional
Duration following ISO-8601 duration format, e.g. 'P1D' for one day
Default one day.
If specifying period, do not specify start and end.
xarray : bool, optional
If True, return an xarray Dataset instead of pandas DataFrame
Returns
-------
pandas.DataFrame
A Pandas DataFrame of the returned data with the following columns:
- 'time': time in UTC
- 'sitename': site name
- 'sitecode': site code
- 'val': value
- 'unit': unit
- 'variableName': variable name
- 'timelocal': local time
- 'latitude': site latitude
- 'longitude': site longitude
- 'srs': latitude and longitude spatial reference system
If xarray is True, return an xarray Dataset instead
Warnings
--------
This will fail on Windows systems when trying to retrieve data from before
1970. See https://bugs.python.org/issue36759 for more details.
Notes
-----
More info about the URL format is at https://waterservices.usgs.gov
[email protected] 2016-07
"""
if start is not None and (pd.Timestamp(start) < pd.Timestamp("1970-01-01")):
import warnings
warnings.warn(
"Requesting data from before 1970 on Windows systems will fail.",
RuntimeWarning,
)
url = (
"http://waterservices.usgs.gov/nwis/"
+ freq
+ "/?format=json&sites="
+ str(site)
+ "¶meterCd="
+ str(parm)
)
if period is None and start is None and end is None:
period = "P1D"
if period is None:
url = url + "&startDT=" + start + "&endDt=" + end
else:
url = url + "&period=" + period
try:
payload = requests.get(url).json()
except json.JSONDecodeError:
raise ValueError(
f"Error decoding JSON. For more details check the following URL in a browser: <{url}>"
)
try:
pvt = payload["value"]["timeSeries"][0]
except IndexError:
raise IndexError(
f"Data may not be available during the specified time period. For more details check the following URL in a browser: <{url}>"
)
dss = []
for n in range(len(pvt["values"])):
dss.append(get_single_value(pvt, n, freq, xarray))
# if only a single value, return it not as a list
# otherwise return as a list
if len(dss) == 1:
dss = dss[0]
return dss
def get_single_value(pvt, n, freq, xarray):
v = pvt["values"][n]["value"]
nwis = {}
nwis["timelocal"] = pd.to_datetime([v[i]["dateTime"] for i in range(len(v))])
# Convert local time to UTC if unit values
if freq == "iv":
nwis["time"] = np.array(
[x.astimezone(datetime.timezone.utc) for x in nwis["timelocal"]]
)
elif freq == "dv":
nwis["time"] = nwis["timelocal"] # keep naive date
nwis["sitename"] = pvt["sourceInfo"]["siteName"]
nwis["sitecode"] = pvt["sourceInfo"]["siteCode"][0]["value"]
nwis["latitude"] = pvt["sourceInfo"]["geoLocation"]["geogLocation"]["latitude"]
nwis["longitude"] = pvt["sourceInfo"]["geoLocation"]["geogLocation"]["longitude"]
nwis["srs"] = pvt["sourceInfo"]["geoLocation"]["geogLocation"]["srs"]
nwis["unit"] = pvt["variable"]["unit"]["unitCode"]
nwis["variableName"] = pvt["variable"]["variableName"]
nwis["methodDescription"] = pvt["values"][n]["method"][0]["methodDescription"]
nwis["val"] = np.array([float(v[i]["value"]) for i in range(len(v))])
nwis["val"][nwis["val"] == pvt["variable"]["noDataValue"]] = np.nan
# There can be times when certain values for a variable are empty, but others have data.
# For example, this is sometimes the case with turbidity.
if not len(nwis["val"]):
return None
df = pd.DataFrame(
nwis,
columns=[
"time",
"sitename",
"sitecode",
"val",
"unit",
"variableName",
"methodDescription",
"timelocal",
"latitude",
"longitude",
"srs",
],
).set_index("time")
if xarray:
ds = df.to_xarray()
ds["time"] = pd.DatetimeIndex(ds["time"].values)
ds["val"].attrs["units"] = ds["unit"].values[0]
ds["val"].attrs["variableName"] = ds["variableName"].values[0]
ds["val"].attrs["methodDescription"] = ds["methodDescription"].values[0]
ds = ds.drop_vars(["unit", "variableName", "methodDescription", "timelocal"])
for k in ["sitename", "latitude", "longitude", "sitecode", "srs"]:
ds.attrs[k] = ds[k].values[0]
ds = ds.drop_vars(k)
return ds
else:
return df