import numpy as np
import scipy as sp
from scipy import stats
from scipy import interpolate
np.random.seed(12345678)
a = stats.norm.rvs(loc=5,scale=10,size=1000)
b = stats.norm.rvs(loc=5,scale=10,size=1000)
# Now do t-test
result = stats.ttest_ind(a,b)
p_value = result[-1]
print "p-value: ", p_value
p-value: 0.782578705472
def get_data():
data =[]
day_range = xrange(1,119)
for i in range(1000):
days = [0,120] # Add the first and last day
sample_size = randint(6,10) #How many days to sample?
days.extend(list(np.random.choice(day_range,size=sample_size,replace=False)))
days = list(np.sort(days))
measurements = list(stats.norm.rvs(loc=5,scale=10,size=sample_size+2))
data.append([days,measurements])
return data
process_data = get_data()
def calculate_auc(data,start_day,end_day):
auc =[]
for days,measurements in data:
tck = interpolate.splrep(days, measurements, s=0, k=1)
temp_auc = interpolate.splint(start_day, end_day, tck)
auc.append(temp_auc)
return auc
original_process_auc = calculate_auc(process_data,0,120)
pyplot.hist(original_process_auc)
(array([ 4, 28, 88, 195, 275, 231, 133, 36, 8, 2]), array([ -914.27695876, -587.87472991, -261.47250106, 64.9297278 , 391.33195665, 717.7341855 , 1044.13641436, 1370.53864321, 1696.94087206, 2023.34310092, 2349.74532977]), <a list of 10 Patch objects>)
def calculate_interval_auc(data,start_day,end_day,interval):
auc =[]
for days,measurements in data:
tck = interpolate.splrep(days, measurements, s=0, k=1)
new_days = range(start_day,end_day,interval)
new_days.append(end_day)
new_measurements = []
for day in new_days:
new_measurements.append(interpolate.splev(day,tck))
new_tck = interpolate.splrep(new_days, new_measurements, s=0, k=1)
temp_auc = interpolate.splint(start_day, end_day, new_tck)
auc.append(temp_auc)
return auc
interval_process_auc = calculate_interval_auc(data,0,120,30)
pyplot.hist(interval_process_auc)
(array([ 6, 27, 79, 171, 243, 240, 155, 54, 21, 4]), array([-1067.19503284, -728.22618587, -389.25733889, -50.28849192, 288.68035506, 627.64920203, 966.61804901, 1305.58689598, 1644.55574296, 1983.52458993, 2322.49343691]), <a list of 10 Patch objects>)
# Now do t-test for 30 days
t_result = stats.ttest_ind(original_process_auc,interval_process_auc,equal_var=False)
t_p_value = t_result[-1]
print t_p_value
0.347322142161
interval_60_process_auc = calculate_interval_auc(data,0,120,60)
pyplot.hist(interval_60_process_auc)
(array([ 6, 21, 64, 166, 245, 232, 156, 75, 27, 8]), array([-1545.455439 , -1123.48193243, -701.50842587, -279.53491931, 142.43858725, 564.41209381, 986.38560037, 1408.35910694, 1830.3326135 , 2252.30612006, 2674.27962662]), <a list of 10 Patch objects>)
# Now do t-test for 60 days
t_result = stats.ttest_ind(original_process_auc,interval_60_process_auc)
t_p_value = t_result[-1]
print t_p_value
0.33965966423
interval_120_process_auc = calculate_interval_auc(data,0,120,120)
pyplot.hist(interval_120_process_auc)
# Now do t-test for 120 days
t_result = stats.ttest_ind(original_process_auc,interval_120_process_auc,equal_var=False)
t_p_value = t_result[-1]
print "p-value 120 day intervals: ", t_p_value
p-value 120 day intervals: 0.394243496552
interval_15_process_auc = calculate_interval_auc(data,0,120,15)
pyplot.hist(interval_15_process_auc)
# Now do t-test for 15 days
t_result = stats.ttest_ind(original_process_auc,interval_15_process_auc,equal_var=False)
t_p_value = t_result[-1]
print "p-value 15 day intervals: ",t_p_value
p-value 15 day intervals: 0.458344635524
t_result = stats.ttest_ind(original_process_auc,original_process_auc,equal_var=False)
t_p_value = t_result[-1]
print "p-value Original: ",t_p_value
p-value Original: 1.0