import numpy as np
import pandas as pd
from ulmo.usgs import nwis
%matplotlib inline
#barnegat
sta_id='394540074062901'
# download and cache site data (this will take a long time the first time)
# currently downloads all available parameters
nwis.hdf5.update_site_data(sta_id)
INFO:ulmo.usgs.nwis.core:processing data from request: http://waterservices.usgs.gov/nwis/dv/?startDT=2015-05-18&site=394540074062901&format=waterml INFO:ulmo.usgs.nwis.core:processing data from request: http://waterservices.usgs.gov/nwis/iv/?startDT=2015-05-18T10%3A49%3A13&site=394540074062901&format=waterml
sit = nwis.hdf5.get_site_data(sta_id, parameter_code='00035')
{}
# wind speed and direction
vars=['00035','00036']
sit
{}
#Try reading discharge data from another site
sta_id='06043500'
nwis.hdf5.update_site_data(sta_id)
# read daily mean discharge data from cache (statistics code 00003)
INFO:ulmo.usgs.nwis.core:processing data from request: http://waterservices.usgs.gov/nwis/dv/?startDT=1851-01-01&site=06043500&format=waterml INFO:ulmo.usgs.nwis.core:processing data from request: http://nwis.waterservices.usgs.gov/nwis/iv/?startDT=2007-10-01T00%3A00%3A00&site=06043500&format=waterml
data = nwis.hdf5.get_site_data(sta_id, parameter_code='00060:00003')['00060:00003']
# convert data to a pandas dataframe
df = pd.DataFrame(data['values']).drop(['last_checked','last_modified','qualifiers'], axis=1).set_index('datetime')
df.value = df.value.apply(np.float)
df.index = pd.to_datetime(df.index).to_period('D')
# mark bad data as NaN
df[df.values == -999999] = np.nan
# group the data by month, day & calculate means
daily_groups = df.groupby((lambda d: d.month, lambda d: d.day))
means = daily_groups.mean()
print 'historic daily mean on March 23rd is %s' % means.ix[3,23].value
historic daily mean on March 23rd is 318.717647059
df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f692aa67950>