# Tip! By starting iPython notebook from the pipeline you will know what # folder you are in and find it easier to find the file to load. If you find # relative paths too much for you then just copy the data into the working # folder for now. %load ../data/qzpercentages.csv Quartz Percentages in Samples 53 49 56 61 41 52 24 51 32 34 51 49 41 45 48 57 47 42 36 55 47 50 58 53 45 37 45 41 51 46 42 61 47 40 55 37 35 43 32 43 53 29 56 56 46 36 40 37 50 39 45 43 38 37 53 51 55 51 48 50 55 55 48 46 50 53 51 42 52 54 48 52 60 43 46 42 40 34 44 43 46 48 61 54 46 44 57 56 41 54 60 55 32 38 45 63 44 51 65 45 34 47 42 49 51 41 55 56 48 44 28 50 66 50 42 36 47 51 42 56 33 44 35 44 43 49 38 48 49 34 46 53 41 51 46 45 36 54 45 65 48 45 50 48 52 34 41 44 48 40 40 52 52 45 55 38 48 42 46 46 42 # In the previous lecture course you learned how to read in ascii file and parse them. # We could do that here as .csv files are easy to read but... # ...it is much better/faster/easier to always spend a few minutes online to see if # there is a "right" way of doing this. # In this case I would write lots of loops, carefully skip the header (if there were # multiple columns then I would have to handle that as well) etc. But a quick check # online reveals I only have to do this... import numpy as np # Read in the records. record = np.recfromcsv("../data/qzpercentages.csv") # Convert this to a numpy array - note that while the data in the record was of # type integer (how do I know this?), I only have to specify dtype to convert all # the data to floats. array = np.array(record, dtype=float) # Finally you bask in the glory of your cleverness having RTFM'ed... # http://docs.scipy.org/doc/numpy/reference/routines.statistics.html print "Mean %g"%np.mean(array) print "Median %g"%np.median(array) print "Max, min (%g, %g)"%(np.amin(array), np.amax(array)) print "Range %g"%np.ptp(array) print "Interquartile range %g"%(np.percentile(array, 75) - np.percentile(array, 25)) print "Standard deviation %g"%np.std(array) print "Variance %g"%np.var(array) # np doesn't have mode, however a quick online search throws up a whole lot more :-) # http://docs.scipy.org/doc/scipy/reference/stats.html from scipy import stats print "Mode %g"%stats.mode(array)[0]