Hamblin Pond 30 min (collected from 2013-06-18 20:28:08 to 2013-06-21 14:58:14) and continuous (collected from 2013-06-21 15:22:00 to 2013-06-26 02:00:00)
Retrieve data: The data must first be in a text file on dropbox
import pandas as pd
import urllib
import datetime
#retrieve data from dropbox by rightclicking on the file in dropbox from a web browser, and selecting share link
#then rightclick the download botton and select copy link address. Paste address below.
#with desired final destination after the comma
datafile = '/usgs/data2/notebook/data/ICO2_Hamblin_clipped_asc.txt'
# try opening the file to see if it's already been downloaded
try:
with open(datafile): pass
except:
print 'File not found. Downloading %s from Dropbox...' % datafile
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/qbnp51ab1dfaiia/ICO2sensordata_Hamblin_clipped_asc.txt?token_hash=AAGDuJOiJnjRlr81kRaPy1tlvMaWwkeFtSj3Jz3QMfRlPA&dl=1', datafile)
date = []; co2=[]; temp=[]; press=[]
fmt = '%Y/%m/%d %H:%M:%S'
f = open(datafile)
for line in f.readlines():
date_val, frag1 = line.split('- G')
date.append(datetime.datetime.strptime(date_val.strip(),fmt))
frag2, frag3 = frag1.split('HT0000 RH0000 P')
co2_val, temp_val = frag2.split('T')
press_val, frag4 = frag3.split('X')
if co2_val.isspace():
co2.append(NaN)
else:
co2.append(float(co2_val.strip()))
if temp_val.isspace():
temp.append(NaN)
else:
temp.append(float(temp_val.strip()))
if press_val.isspace():
press.append(NaN)
else:
press.append(float(press_val.strip()))
d={}
d['date']= asarray(date)
d['co2'] = asarray(co2)
d['temp'] = asarray(temp)
p = asarray(press)
# correct for pressure values where last decimal place was dropped
p = where(p<500,p*10.,p)
d['press'] = p
df = pd.DataFrame(d)
df.index = pd.to_datetime(df['date'])
df['press']=df['press']/10.
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/skihdky9rqchx66/YSI_Hamblin.txt?token_hash=AAH8XcrQL5NTwRK1QG1JYHmOKWAMfbsoZFDdoGiRnp8beA&dl=1', '/usgs/data2/notebook/data/YSI_Hamblin.txt')
df2 = pd.read_csv('/usgs/data2/notebook/data/YSI_Hamblin.txt',skiprows=[0], parse_dates =[[2,3]], index_col=0, sep=r"\s*",
names=['foo1','foo2','date', 'time', 'pH','depth','temp','sal'],header=None)
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/yxex97qk8deh5tj/lab_pH.txt?token_hash=AAEb0-rZ0Y2nlOsMuFsL2kyltvyiuqYng600F8FQLVJz2w&dl=1','/usgs/data2/notebook/data/lab_pH.txt')
df3 = pd.read_csv('/usgs/data2/notebook/data/lab_pH.txt',skiprows=[0,1], parse_dates =[[0,1]], index_col=0, sep=r"\s*",
names=['date','time','Rave', 'Rstdev', 'pHave', 'pHstdev'],header=None)
df = df['2013-06-19 00:00:00':'2013-06-26 02:00:00']
df2 = df2['2013-06-19 00:00:00':'2013-06-26 02:30:00']
df3 = df3['2013-06-19 00:00:00':'2013-06-26 02:00:00']
Data syntesis: calculating the 30 min, 10 min, and 1 min means in order to make the data more manageable
df_30min = df.resample('30min',how='mean')
df2_30min = df2.resample('30min',how='mean')
df_10min = df.resample('10min',how='mean')
df2_10min = df2.resample('10min',how='mean')
df_1min = df.resample('1min',how='mean')
df2_1min = df2.resample('1min',how='mean')
Explore the data: In this next section the colors have meaning: blue is for CO2, green is for pH, red is for depth, black is for temp, and yellow is for pressure
df['co2'].plot(figsize=(12,4))
<matplotlib.axes.AxesSubplot at 0x1541aed0>
#costructs figure and draws relevant information from different data frames
plt.figure()
df2['pH'].plot(style='g')
df['co2'].plot(figsize=(15,6),secondary_y=True)
<matplotlib.axes.AxesSubplot at 0x12887c90>
#costructs figure and draws relevant information from different data frames
plt.figure()
df3['pHave'].plot(style='go')
df['co2'].plot(figsize=(15,6),secondary_y=True)
<matplotlib.axes.AxesSubplot at 0xbce5c50>
df[['co2','temp']].plot(figsize=(12,4),secondary_y='temp', style= ['b','k']);
Making the data manageable: in this section the data is resampled to compile more manageabley sized data sets.
# calculates 30 minute averages of all the data
df_30min = df.resample('30min', how = 'mean')
df_30min[['co2','temp']].plot(figsize=(12,4),secondary_y='temp', style=['b','k']);
#costructs figure and draws relevant information from different data frames
plt.figure()
df2_30min['pH'].plot(style='g')
df_30min['co2'].plot(figsize=(15,6),secondary_y=True)
<matplotlib.axes.AxesSubplot at 0x10057a10>
#adjusts temperature from co2 sensor (black) down 5.3 degrees, to match YSI (cyan)
df['temp_adjusted']=df['temp']-5.3
plt.figure()
df2_30min['temp'].plot(style='c')
df_30min['temp_adjusted'].plot(figsize=(15,6),secondary_y=False, style='k')
<matplotlib.axes.AxesSubplot at 0x15e185d0>
df_30min[['temp','press']].plot(figsize=(12,4),secondary_y='press', style=['k','y']);
#calculate 10 minute means
df_30min = df.resample('30min', how = 'mean')
df2_30min = df2.resample('30min', how = 'mean')
plot(df_30min['co2'], df2_30min['pH'],'go');
grid();
# look at correlation between pH and CO2
corrcoef(df_30min['co2'],df2_30min['pH'])
array([[ 1. , -0.77234924], [-0.77234924, 1. ]])