#!/usr/bin/env python
# coding: utf-8

# # Exercise 6.3
# 
# For this practical you will need the Shock response data for sand. This can be found in the file [sand_swdb.txt](https://github.com/ggorman/Introduction-to-stats-for-geoscientists/blob/gh-pages/data/sand_swdb.txt), taken from the shock wave database.
# 
# ## Exercise 6.3.1
# * Open up the text file and have a look around, see what you're working with.
# * Read in the file sand_swdb.txt into two arrays called pressure and energy.
#     * Do NOT use recfromcsv to do this! numpy has other methods too, like genfromtxt, which recfromcsv is based on.
# * Remember you do not want any 'commented' lines in the text file to be included, how would you do this?
# 
# ## Exercise 6.3.2
# 
# * Make a scatter plot of pressure against energy.
# * Label the axes appropriately (include units).
# * Perform a linear regression on the data, and overlay the best fit line on the plot (as a dashed line).
# * Add a legend to the plot and label the best fit line with its r-value. e.g. r = 0.5.
# * Add (or change) the title of your plot to be y = {}x + {} and fill with the fit parameters.
# 
# ## BONUS ROUND (Harder)
# 
# * Remake your plot, but this time with vertical errorbars of 10% on every point.
# 
# For this practical you should make extensive use of the matplotlib documentation.

# In[2]:


get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('cat', 'sand_swdb.txt')


import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

data = np.genfromtxt('sand_swdb.txt', comments='#')
# Genfromtxt will use a delimiter of a space or tab by default, and specifying
# comments as '#' means it will ignore any line in the text file that starts with
# '#' like in a python script. This reads the data in as a 2D numpy array.

"""BONUS POINTS"""
# If you read the data in using the 'usecols' argument that is particularly good! e.g.
P    = np.genfromtxt('sand_swdb.txt', comments='#', usecols=0)
E    = np.genfromtxt('sand_swdb.txt', comments='#', usecols=1)
# However, either way works fine
"""############"""


Pressure = data[:, 0]
Energy   = data[:, 1]
# Split the data into two arrays

M, C, R, P, err = stats.linregress(Energy,Pressure)
# Linear regression on energy and pressure, to give coefficients of M and C
# from y = Mx + C; R-value, p-value and the standard error.

plt.figure()
# Start the figure
plt.plot(Energy, Pressure, linestyle=' ', marker='o', markersize=10)
# Plot Pressure against Energy. No line, circle markers, size 10
plt.plot(Energy, Energy*M+C, linestyle='-', label = 'r-value = {}'.format(R))
# Plot the best-fit line, use the Energy values on the x-axis and calculate
# the corresponding y-values using y = mx + c. 
# Label this with the r-value, and use conditional formatting to insert it.
plt.xlabel('Specific Energy behind Shock [kJ/g]')
plt.ylabel('Pressure behind Shock [GPa]')
# Label the axes, based on data in the text file
plt.title('Best fit: y = {}x + {}'.format(M,C))
# Use conditional formatting to label the title with the equation fo the best-fit line
plt.legend(loc='best')
# Add a legend, this will contain all the labels from each 'plot' function.
# EXTRA: add a 'loc' argument to give a location for the legend. 'best' puts it in the
# best possible place, to display as much data as possible.

plt.figure()
plt.errorbar(Energy, Pressure, yerr=Pressure*.1, linestyle=' ', marker='o', markersize=10)
# To add errorbars, use the errorbar function, and now just add the argument 'yerr'. To make
# the errorbars 10% simply multiply the Pressure values by 0.1 and assign this to yerr.
# The errorbars will be +/- 10%.
plt.plot(Energy, Energy*M+C, linestyle='-', label = 'r-value = {}'.format(R))
plt.xlabel('Specific Energy behind Shock [kJ/g]')
plt.ylabel('Pressure behind Shock [GPa]')
plt.title('Best fit: y = {}x + {}'.format(M,C))
plt.legend(loc='best')
plt.show()


# In[ ]: