Sage Pond continuous (collected from 2013-06-04 20:38:51 to 2013-06-11 17:17:11) and stopping just before tidal effort
Retrieve data: The data must first be in a text file on dropbox
import pandas as pd
import urllib
#gets data from dropbox and saves it into the data directory under the given name
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/9x75i5prcl95bhk/ICO2sensordata_Sage_clipped_asc.txt?token_hash=AAG2bFyl5yuW9RtMWbagkFiPP1nCYM-gPcDFcC5Ay6i48w&dl=1','/usgs/data2/notebook/data/ICO2_Sage_clipped_asc.txt')
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/7rpzn295mlvvmhj/YSI_Sage.txt?token_hash=AAFqTDOTebMZeGMuUI4uN2LdulqPV13doh3oK8D2tI7OeQ&dl=1','/usgs/data2/notebook/data/YSI_Sage.txt')
('/usgs/data2/notebook/data/YSI_Sage.txt', <httplib.HTTPMessage instance at 0x288b098>)
urllib.urlretrieve('https://dl.dropboxusercontent.com/s/yxex97qk8deh5tj/lab_pH.txt?token_hash=AAEb0-rZ0Y2nlOsMuFsL2kyltvyiuqYng600F8FQLVJz2w&dl=1','/usgs/data2/notebook/data/lab_pH.txt')
('/usgs/data2/notebook/data/lab_pH.txt', <httplib.HTTPMessage instance at 0x2ae13f8>)
# reads columns with fixed width and chops out un-needed characters.
col_specs=[(0,10), (11,19), (23,30), (32,37), (58,62)]
df = pd.read_fwf('/usgs/data2/notebook/data/ICO2_Sage_clipped_asc.txt',colspecs=col_specs, skiprows=2,parse_dates =[[0,1]], index_col=0,
names=['date','time','co2','temp','press'],header=None,nrows=500000)
df['press']=df['press']/10.
df2 = pd.read_csv('/usgs/data2/notebook/data/YSI_Sage.txt',skiprows=[0,1],parse_dates =[[0,1]], index_col=0, sep=r"\s*",
names=['date','time','pH','depth', 'temp', 'sal'],header=None)
df3 = pd.read_csv('/usgs/data2/notebook/data/lab_pH.txt',skiprows=[0,1], parse_dates =[[0,1]], index_col=0, sep=r"\s*",
names=['date','time','Rave', 'Rstdev', 'pHave', 'pHstdev'],header=None)
# clip to time when instrument was in water
df = df['2013-06-04 20:38:51':'2013-06-11 17:17:11']
df2 = df2['2013-06-04 20:38:51':'2013-06-11 17:17:11']
df3 = df3['2013-06-04 19:49:51':'2013-06-11 17:17:11']
df.plot(subplots=True,sharex=True)
df2.plot(subplots=True,sharex=True)
array([<matplotlib.axes.AxesSubplot object at 0x2f389d0>, <matplotlib.axes.AxesSubplot object at 0xd3f84d0>, <matplotlib.axes.AxesSubplot object at 0xaa10490>, <matplotlib.axes.AxesSubplot object at 0x9f70510>], dtype=object)
Data syntesis: calculating the 30 min, 10 min, and 1 min means in order to make the data more manageable
df_30min = df.resample('30min',how='mean')
df2_30min = df2.resample('30min',how='mean')
df_10min = df.resample('10min',how='mean')
df2_10min = df2.resample('10min',how='mean')
df_1min = df.resample('1min',how='mean')
df2_1min = df2.resample('1min',how='mean')
Explore the data: In this next section the colors have meaning: blue is for CO2, green is for pH, red is for depth, black is for temp, and yellow is for pressure
df_10min['co2'].plot(figsize=(15,6))
<matplotlib.axes.AxesSubplot at 0x2ac61d0>
df2['pH_adjusted']=df2['pH']-.3
#costructs figure and draws relevant information from different data frames
plt.figure()
df3['pHave'].plot(style='go')
df2_30min['pH_adjusted'].plot(figsize=(15,4),secondary_y=False,style='g')
<matplotlib.axes.AxesSubplot at 0xcf2c910>
#costructs figure and draws relevant information from different data frames
plt.figure()
df_30min['co2'].plot()
df2_30min['pH'].plot(figsize=(15,4),secondary_y=True, style='g')
<matplotlib.axes.AxesSubplot at 0xb2b61d0>
#adjusts temperature from co2 sensor (black) down 5.3 degrees, to match YSI (cyan)
df['temp_adjusted']=df['temp']-5.3
plt.figure()
df['temp_adjusted'].plot(style='k')
df2['temp'].plot(figsize=(15,4),secondary_y=False, style='c')
<matplotlib.axes.AxesSubplot at 0x426dc50>
#costructs figure and draws relevant information from different data frames
plt.figure()
df_10min['co2'].plot()
df2_10min['depth'].plot(figsize=(15,4),secondary_y=True, style='r')
<matplotlib.axes.AxesSubplot at 0x3d8bb10>
df_10min[['co2','temp']].plot(figsize=(15,4),secondary_y = 'temp', style=['b','k']);
plt.figure()
df_10min['temp'].plot(style='k')
df2_10min['depth'].plot(figsize=(15,4),secondary_y=True, style='r')
<matplotlib.axes.AxesSubplot at 0x7907850>
df2_10min[['depth','pH']].plot(figsize=(15,4),secondary_y = 'pH', style=['r','g']);
# look at correlation between pH and CO2
corrcoef(df_10min['co2'],df2_10min['pH'])
array([[ 1. , -0.84751757], [-0.84751757, 1. ]])
plot(df2_10min['pH'],df_10min['co2'],'go');
grid();
1 min means for Aleck
df_1min.to_csv('/usgs/data2/notebook/data/ICO2_Sage_1min.txt', cols=['co2','temp','press'])
df5=pd.read_csv('/usgs/data2/notebook/data/ICO2_Sage_1min.txt')
df5.head
<bound method DataFrame.head of <class 'pandas.core.frame.DataFrame'> Int64Index: 9880 entries, 0 to 9879 Data columns (total 4 columns): date_time 9880 non-null values co2 9880 non-null values temp 9880 non-null values press 9880 non-null values dtypes: float64(3), object(1)>