class HypothesisTest():
def __init__(self, data):
self.data = data
self.MakeModel()
self.actual = self.TestStatistic(data)
def PValue(self, iters=1000):
"Returns p-value of actual data based on simulated data."
self.test_stats = [self.TestStatistic(self.RunModel())
for _ in range(iters)]
count = sum(1 for x in self.test_stats if x >= self.actual)
return count / iters
def TestStatistic(self, data):
"Test statistic for the current test."
raise UnimplementedMethodException()
def MakeModel(self):
pass
def RunModel(self):
"Returns a simulated data sample."
raise UnimplementedMethodException()
import random
random.random()
0.26367687539030094
from math import sin
class PileFaceTest(HypothesisTest):
"data = [0, 0, 0, 0]"
def TestStatistic(self, data):
return sum([sin(x) for x in data]) / len(data)
def RunModel(self):
data = []
for _ in range(len(self.data)):
x = random.random()
data.append(x)
return data
pft = PileFaceTest([0, 0, 0, 0])
pft.RunModel()
[0.1917714179922031, 0.41156684614997496, 0.9335259573569674, 0.10698235419583546]
pft.TestStatistic([0, 0, 1, 0])
0.21036774620197413
pft.actual
0.0
pft.PValue(iters=100000)
1.0
import matplotlib.pyplot as plt
plt.style.use('bmh')
%matplotlib inline
def plot_test_stat(test, title=""):
"Plots the test statistic distribution and observed value."
plt.hist(test.test_stats, bins=30, cumulative=False, normed=True)
ylim = plt.ylim()
plt.vlines(test.actual, *ylim, label='observed test stat')
plt.legend(loc='upper left')
plt.xlabel('test statistic')
plt.title(title)
plot_test_stat(pft)
plt.xlim(-0.1, 1)
(-0.1, 1)
pft2 = PileFaceTest()