# Tip! By starting iPython notebook from the pipeline you will know what
# folder you are in and find it easier to find the file to load. If you find
# relative paths too much for you then just copy the data into the working
# folder for now.
%load ../data/qzpercentages.csv
Quartz Percentages in Samples
53
49
56
61
41
52
24
51
32
34
51
49
41
45
48
57
47
42
36
55
47
50
58
53
45
37
45
41
51
46
42
61
47
40
55
37
35
43
32
43
53
29
56
56
46
36
40
37
50
39
45
43
38
37
53
51
55
51
48
50
55
55
48
46
50
53
51
42
52
54
48
52
60
43
46
42
40
34
44
43
46
48
61
54
46
44
57
56
41
54
60
55
32
38
45
63
44
51
65
45
34
47
42
49
51
41
55
56
48
44
28
50
66
50
42
36
47
51
42
56
33
44
35
44
43
49
38
48
49
34
46
53
41
51
46
45
36
54
45
65
48
45
50
48
52
34
41
44
48
40
40
52
52
45
55
38
48
42
46
46
42
# In the previous lecture course you learned how to read in ascii file and parse them.
# We could do that here as .csv files are easy to read but...
# ...it is much better/faster/easier to always spend a few minutes online to see if
# there is a "right" way of doing this.
# In this case I would write lots of loops, carefully skip the header (if there were
# multiple columns then I would have to handle that as well) etc. But a quick check
# online reveals I only have to do this...
import numpy as np
# Read in the records.
record = np.recfromcsv("../data/qzpercentages.csv")
# Convert this to a numpy array - note that while the data in the record was of
# type integer (how do I know this?), I only have to specify dtype to convert all
# the data to floats.
array = np.array(record, dtype=float)
# Finally you bask in the glory of your cleverness having RTFM'ed...
# http://docs.scipy.org/doc/numpy/reference/routines.statistics.html
print "Mean %g"%np.mean(array)
print "Median %g"%np.median(array)
print "Max, min (%g, %g)"%(np.amin(array), np.amax(array))
print "Range %g"%np.ptp(array)
print "Interquartile range %g"%(np.percentile(array, 75) - np.percentile(array, 25))
print "Standard deviation %g"%np.std(array)
print "Variance %g"%np.var(array)
# np doesn't have mode, however a quick online search throws up a whole lot more :-)
# http://docs.scipy.org/doc/scipy/reference/stats.html
from scipy import stats
print "Mode %g"%stats.mode(array)[0]
Mean 46.5714 Median 46 Max, min (24, 66) Range 42 Interquartile range 10 Standard deviation 7.76896 Variance 60.3567 Mode 48