#!/usr/bin/env python
# coding: utf-8

# #Escape Test: Comparison of Taiwanese and Philippines Netting
# **Abstract** Netting from the Philippines retained a significantly higher proportion of beetles that netting from Taiwan. Proportion of beetles remaining in container or entangled in net was 0.62 and 0.21 respectively. Most of the beetles escaping from the Philippines netting were smaller ones (elytra length < 23mm, elytral width < 18). Whereas slightly larger beetles escaped from the Taiwan netting (elytra length < 24mm, elytra width < 19).

# In[29]:


get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymysql


# In[30]:


connection = pymysql.connect(host='mysql.guaminsects.net', 
                     user='readonlyguest', 
                     passwd='readonlypassword', 
                     database='oryctes')


# In[31]:


sql = """
SELECT dateMarked, markNote, releaseNote, COUNT(*) AS count
FROM mark_release_recapture 
WHERE dateMarked IN ('2015-02-16', '2015-03-03', '2015-03-16')
GROUP BY dateMarked, markNote, releaseNote
ORDER BY dateMarked, markNote, releaseNote
"""
df = pd.read_sql(sql, connection)
df

# This query was run to fix typos. It is left here for reference.
# Running this query will require changing to a user which has UPDATE priveledges
sql = """
UPDATE mark_release_recapture
SET releaseNote = 'in peat'
WHERE releaseNote = 'in pete'
"""
cursor = connection.cursor()
cursor.execute(sql)
connection.commit
cursor.close()
connection.close()
# In[32]:


df1 = df[df['markNote'].str.contains('Taiwan')]
total = df1['count'].sum()
escaped = df1['count'][df1['releaseNote'] == 'escaped'].sum()
netting_list = ['Taiwan']
escaped_list = [escaped]
total_list = [total]
df1


# In[33]:


df1 = df[df['markNote'].str.contains('Phil')]
total = df1['count'].sum()
escaped = df1['count'][df1['releaseNote'] == 'escaped'].sum()
netting_list.append('Philippines')
escaped_list.append(escaped)
total_list.append(total)
df1


# In[34]:


df1 = df[df['markNote'].str.contains('no net')]
total = df1['count'].sum()
escaped = df1['count'][df1['releaseNote'] == 'escaped'].sum()
netting_list.append('no net')
escaped_list.append(escaped)
total_list.append(total)
df1


# In[35]:


data = {'netting': netting_list, 'total': total_list, 'escaped': escaped_list}
df2 = pd.DataFrame(data, columns=['netting', 'total', 'escaped'])
df2['contained'] = df2['total'] - df2['escaped']
df2['pcontained'] = df2['contained']/df2['total']
df2


# In[36]:


import astropy.stats
ciContained = astropy.stats.binom_conf_interval(df2['contained'], df2['total'])
ciContained


# In[37]:


df2['lcl'] = ciContained[0]
df2['ucl'] = ciContained[1]
df2['lbar'] = df2['pcontained'] - df2['lcl']
df2['ubar'] = df2['ucl'] - df2['pcontained']
df2.sort('pcontained', inplace=True)
df2.index = range(1,len(df2) + 1)
df2


# In[44]:


yerr = np.array([df2['lbar'],df2['ubar']])
plt.bar(
    left=(1,2,3), 
    height=df2['pcontained'], 
    yerr=yerr,
    color='lightgrey',
    align='center'
)
plt.title('Proportion of beetles remaining in peat or netting')
plt.ylim(-0.1,1)
plt.xticks((1,2,3), ('no net', 'Taiwan netting', 'Philippines netting'))
plt.savefig('barplot.pdf')
plt.show()
# Error bars are 95% binomial confidence intervals


# In[39]:


sql = """
SELECT markNote, releaseNote, elytraLength, elytraWidth
FROM mark_release_recapture 
WHERE dateMarked IN ('2015-02-16', '2015-03-03', '2015-03-16')
  AND markNote LIKE '%Taiwan%'
"""
df = pd.read_sql(sql, connection)
df.head()


# In[40]:


groups = df.groupby('releaseNote')

# Plot
fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in groups:
    ax.plot(group.elytraLength, group.elytraWidth, marker='o', linestyle='', ms=12, label=name)
ax.legend(loc='best', numpoints=1)
plt.title('Taiwan netting')
plt.xlabel('elytra length(mm)')
plt.ylabel('elytra width(mm)')
plt.savefig('TaiwanSizes.pdf')
plt.show()


# In[41]:


sql = """
SELECT markNote, releaseNote, elytraLength, elytraWidth
FROM mark_release_recapture 
WHERE dateMarked IN ('2015-02-16', '2015-03-03', '2015-03-16')
  AND markNote LIKE '%Philippines%'
"""
df = pd.read_sql(sql, connection)
df.head()


# In[42]:


groups = df.groupby('releaseNote')

# Plot
fig, ax = plt.subplots()
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
for name, group in groups:
    ax.plot(group.elytraLength, group.elytraWidth, marker='o', linestyle='', ms=12, label=name)
ax.legend(loc='best', numpoints=1)
plt.title('Philippines netting')
plt.xlabel('elytra length(mm)')
plt.ylabel('elytra width(mm)')
plt.savefig('PhilippineSizes.pdf')
plt.show()


# In[ ]: