# IPython session run with --pylab=inline; "import matplotlib.pyplot as plt" was run during startup
import pandas as pd
# ERDDAP data request URL's (decided in advance), selecting only a subset of variables
# Each request returns a csv table with 2 header rows: variable names and units
willdet_url = "http://MYSERVER/erddap/tabledap/otnnepWILLDetects.csv?time,transmittername,platform_name,platform_latitude,platform_longitude"
antag_url = "http://MYSERVER/erddap/tabledap/otnnepAnTags.csv?transmittername,vernacularname,project_reference"
# read detection data from "will" receivers (Willapa Bay) into pandas dataframe
willdet = pd.read_csv(willdet_url, skiprows=[1], parse_dates=['time'])
# read all animal tags into pandas dataframe
antag = pd.read_csv(antag_url, skiprows=[1])
# create month variable on will det, then merge the two dataframes
willdet['month'] = willdet['time'].apply(lambda dt: dt.month)
willdetantag = pd.merge(willdet, antag, how='left')
willdetantag
<class 'pandas.core.frame.DataFrame'> Int64Index: 100367 entries, 0 to 100366 Data columns (total 8 columns): time 100367 non-null values transmittername 100367 non-null values platform_name 100367 non-null values platform_latitude 100367 non-null values platform_longitude 100367 non-null values month 100367 non-null values vernacularname 53035 non-null values project_reference 53035 non-null values dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
# create new dataframe where records without an identified tagging project are removed
d = willdetantag[willdetantag.project_reference.notnull()]
d.vernacularname.value_counts()
green sturgeon 53022 sockeye salmon 13 dtype: int64
# total number of detections, by tagging project
d.project_reference.value_counts()
VOGL 31146 LIND 15335 MOSER 6541 PSS2 13 dtype: int64
# aggregate (count) detections by calendar month (1-12), and split off by tagging project
dvogl = d[d.project_reference=='VOGL'].groupby('month').time.count()
dlind = d[d.project_reference=='LIND'].groupby('month').time.count()
dmoser = d[d.project_reference=='MOSER'].groupby('month').time.count()
dvogl
month 1 774 2 999 3 941 4 3000 5 6433 6 9524 7 2809 8 1067 9 1875 10 2773 11 462 12 489 dtype: int64
# plot number of detected Vogl tags vs month (plotted independently of year)
dvogl.plot()
<matplotlib.axes.AxesSubplot at 0x4335850>
# now compare the number of detected tags vs month (independent of year), for each of the 3 tagging projects
# blue line is Vogl (same plot as above)
plt.plot(dvogl.index,dvogl.values,'b-', dlind.index,dlind.values,'r-', dmoser.index,dmoser.values,'g-')
[<matplotlib.lines.Line2D at 0x52464d0>, <matplotlib.lines.Line2D at 0x5246990>, <matplotlib.lines.Line2D at 0x5246e90>]