%pylab inline
%cat ../data/BrachiopodBiometrics.csv
Populating the interactive namespace from numpy and matplotlib
import numpy as np
# Read in the records.
record = np.recfromcsv("../data/BrachiopodBiometrics.csv")
print record.dtype.names
('locality', 'lengthmm', 'widthmm')
# Convert this to numpy arrays.
lengthmm = np.array(record["lengthmm"], dtype=float)
widthmm = np.array(record["widthmm"], dtype=float)
print lengthmm
print widthmm
[ 2.3 2.95 3.1 3.7 3.5 2.35 4. 4. 2.75 2.95 5. 2.7 3. 3.2 2.3 2.75 3.4 3.5 3.75 2.8 2.8 2.5 3.6 3.3 3.35 3.7 3.6 3. 3.3 3.3 2.95 3.4 2.3 4.4 2.7 2.8 3.9 2.7 3.7 3.8 3.1 2.7 3.5 3. 3.3 3.3 3.4 3.1 4. 3.7 3.2 3.25 3.8 3.45 5.5 3.6 4. 4.75 4.87 3.6 4. 5.25 4.4 3.5 3.3 2.6 5.15 4.05 3.4 3.5 2.1 4.65 5.1 3.9 3.35] [ 2.1 2.85 2.8 3.5 4.6 2.3 3.9 4.2 2.55 2.7 5.1 2.7 2.9 3.4 2.2 2.7 3.2 3.2 3.6 2.75 2.6 2.7 4. 3.2 3.3 4.1 3.45 3.2 3.3 3.2 2.9 3.4 2.7 3.9 2.5 2.8 3.6 3. 3.55 4. 3.1 2.2 3.2 3.1 3.2 3.3 3.25 3. 4. 3.6 3.5 3.2 3.75 3.65 4.65 3.5 3.75 4.35 4.16 3.3 3.4 4.5 4.45 2.7 2.7 2.7 4.15 3.45 3.75 3.25 2.3 4.5 5.25 3.25 3.5 ]
plot(lengthmm, widthmm, 'bx', markersize=5, markeredgewidth=2, zorder=3)
# 'bx' - blue 'x' markers, 10 points in size,
# drawn in a thickish line.
# zorder=3 to make sure this is in front of grid
from scipy import stats
slope, intercept, r_value, p_value, std_err = stats.linregress(lengthmm, widthmm)
x = np.array([lengthmm.min(), widthmm.max()])
y = slope*x+intercept
plot(x,y)
print "r and p values of linear regression: ", r_value, p_value
# Not labeling a graph is unforgivable. This is the minimum that should be in any graph.
ylabel("Length/mm",weight='bold')
xlabel("Width/mm",weight='bold')
title("Brachiopod Biometric data",weight='bold')
show()
r and p values of linear regression: 0.894057945508 3.5961183914e-27