%pylab inline # Read in data import numpy as np # Read in the records. record1 = np.recfromcsv("../data/triloshape1.csv") record2 = np.recfromcsv("../data/triloshape2.csv") print record1.dtype.names print record2.dtype.names # Read in the records - take II. record1 = np.recfromtxt("../data/triloshape1.csv") record2 = np.recfromtxt("../data/triloshape2.csv") triloshape1 = np.array(record1, dtype=float) triloshape2 = np.array(record2, dtype=float) # Plot distribution - triloshape1 n, bins, patches = pylab.hist(triloshape1, 10, normed=1) # Add a 'best fit' line sigma = np.std(triloshape1) mu = np.mean(triloshape1) y = pylab.normpdf(bins, mu, sigma) l = pylab.plot(bins, y, 'r--', linewidth=1) pylab.xlabel("Samples") pylab.ylabel("Mean length:width ratio") pylab.show() # Plot distribution - triloshape2 n, bins, patches = pylab.hist(triloshape2, 10, normed=1) # Add a 'best fit' line sigma = np.std(triloshape2) mu = np.mean(triloshape2) y = pylab.normpdf(bins, mu, sigma) l = pylab.plot(bins, y, 'r--', linewidth=1) pylab.xlabel("Samples") pylab.ylabel("Mean length:width ratio") pylab.show() # The plots don't look convincing. However, it is always better to use a quantative test. # Use the D’Agostino & Pearson test to test the null hypothesis that a samples come from # a normal distribution. from scipy import stats k2_1, p_1 = stats.normaltest(triloshape1) k2_2, p_2 = stats.normaltest(triloshape2) print "p-values = ", p_1, p_2 if p_1 <= 0.05 or p_2 <= 0.05: print "Reject", else: print "Accept", print "the hypothesis that the sample comes from a normal distribution." t, p = stats.ttest_ind(triloshape1, triloshape2) print "T-test - (t, p) = %g, %g"%(t, p) if p<=0.05: print "Reject", else: print "Accept" print "the hypothesis that the populations are the same." # Read in data import numpy as np # Read in the records. record1 = np.recfromcsv("../data/micapercent1.csv") record2 = np.recfromcsv("../data/micapercent2.csv") print record1.dtype.names print record2.dtype.names # Read in the records - take II. record1 = np.recfromtxt("../data/micapercent1.csv") record2 = np.recfromtxt("../data/micapercent2.csv") micapercent1 = np.array(record1, dtype=float) micapercent2 = np.array(record2, dtype=float) # Plot distribution - micapercent1 n, bins, patches = pylab.hist(micapercent1, 10, normed=1) # Add a 'best fit' line sigma = np.std(micapercent1) mu = np.mean(micapercent1) y = pylab.normpdf(bins, mu, sigma) l = pylab.plot(bins, y, 'r--', linewidth=1) pylab.xlabel("Samples") pylab.ylabel("Mean length:width ratio") pylab.show() # Plot distribution - micapercent2 n, bins, patches = pylab.hist(micapercent2, 10, normed=1) # Add a 'best fit' line sigma = np.std(micapercent2) mu = np.mean(micapercent2) y = pylab.normpdf(bins, mu, sigma) l = pylab.plot(bins, y, 'r--', linewidth=1) pylab.xlabel("Samples") pylab.ylabel("Mean length:width ratio") pylab.show() # Looking at the first of these two plots it would appear unlikely that the sample comes # from a normal distribution. However, it is always better to use a quantative test. # Use the D’Agostino & Pearson test to test the null hypothesis that a samples come from # a normal distribution. from scipy import stats k2_1, p_1 = stats.normaltest(micapercent1) k2_2, p_2 = stats.normaltest(micapercent2) print "p-values = ", p_1, p_2 if p_1 <= 0.05 or p_2 <= 0.05: print "Reject", else: print "Accept", print "the hypothesis that the sample comes from a normal distribution." u, p = stats.mannwhitneyu(micapercent1, micapercent2) # This is a one-tailed test. We want to consider both tails therefore we multiply the p value by 2. p = p*2 print "Mann-Whitney rank test two-tailed p-value = ", p, " and u value = ", u if p<=0.05: print "Reject", else: print "Accept" print "the hypothesis that the populations are the same."