%pylab inline import numpy as np # Read in the records. record = np.recfromcsv("../data/first_year_marks.csv") print record.dtype.names field_mark = np.array(record["field_mark"], dtype=float) overall_year = np.array(record["overall_year"], dtype=float) from scipy import stats print "Pearson's r and p values: %g, %g"%stats.pearsonr(field_mark, overall_year) print "Spearman's r and p values: %g, %g"%stats.spearmanr(field_mark, overall_year) n, bins, patches = pylab.hist(field_mark, normed=1) # Add a 'best fit' line sigma = np.std(field_mark) mu = np.mean(field_mark) y = pylab.normpdf(bins, mu, sigma) l = pylab.plot(bins, y, 'r--', linewidth=1) pylab.xlabel("Final percentage mark") pylab.ylabel("Proportion of students achieving mark") pylab.show() n, bins, patches = pylab.hist(overall_year, normed=1) # Add a 'best fit' line sigma = np.std(overall_year) mu = np.mean(overall_year) y = pylab.normpdf(bins, mu, sigma) l = pylab.plot(bins, y, 'r--', linewidth=1) pylab.xlabel("Final percentage mark") pylab.ylabel("Proportion of students achieving mark") pylab.show()