%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy
import scipy.stats as stats
from tools import Analysis
a = Analysis()
t = plt.table(cellText=a.raw_names_ages_gender(10), colLabels=['Name', 'Age', 'Gender'], loc='center', cellLoc='center')
plt.axis('off')
plt.show()
n, bins, patches = plt.hist(a.raw_ages())
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()
n, bins, patches = plt.hist(a.raw_genders(), bins=2)
plt.xticks([0.25, 0.75], ('Female', 'Male'))
plt.ylabel('Count')
plt.show()
uid_counts = a.uids_with_counts_from_conversions()
# mean conversions per person
np.mean(uid_counts[:,1])
47.926000000000002
uid_exposures = a.raw_uids_from_exposures()
# only one exposure per set, to simplify things
len(uid_exposures) == len(np.unique(uid_exposures))
True
# metrics distribution
n, bins, patches = plt.hist(a.raw_metrics_for_test(False, False), bins=3)
plt.xticks([0.3, 1, 1.6], ('A', 'B', 'C'))
plt.ylabel('Count')
plt.show()
# Age distribution across metrics
plt.hold(True)
n, bins, patches = plt.hist(a.raw_ages_for_test_metric(False, False, 'A'), normed=True, histtype='step', color='b')
n, bins, patches = plt.hist(a.raw_ages_for_test_metric(False, False, 'B'), normed=True, histtype='step', color='g')
n, bins, patches = plt.hist(a.raw_ages_for_test_metric(False, False, 'C'), normed=True, histtype='step', color='r')
plt.xlabel('Age')
plt.ylabel('Count (normalised)')
plt.show()
# Gender distribution across metrics
plt.hold(True)
n, bins, patches = plt.hist(a.raw_genders_for_test_metric(False, False, 'A'), bins=2, normed=True, histtype='step', color='b')
n, bins, patches = plt.hist(a.raw_genders_for_test_metric(False, False, 'B'), bins=2, normed=True, histtype='step', color='g')
n, bins, patches = plt.hist(a.raw_genders_for_test_metric(False, False, 'C'), bins=2, normed=True, histtype='step', color='r')
plt.xticks([0.25, 0.75], ('Female', 'Male'))
plt.ylabel('Count (normalized)')
plt.show()
# Test 1 Analysis
raw_control_metrics = a.raw_metrics_for_test(False, False)
raw_test1_metrics = a.raw_metrics_for_test(True, False)
# just showing the raw counts, normalized
plt.hold(True)
n, bins, patches = plt.hist(raw_control_metrics, bins=3, histtype='step', color='b', normed=True)
n, bins, patches = plt.hist(raw_test1_metrics, bins=3, histtype='step', color='r', normed=True)
plt.xticks([0.3, 1, 1.6], ('A', 'B', 'C'))
plt.ylabel('Count')
plt.title('Test 1')
plt.show()
# Test 1 mean analysis
control_metrics_per_person = np.array(
[np.mean(a.uid_counts_for_test_metric(False, False, m)[:,1]) for m in ['A', 'B', 'C']]
)
test1_metrics_per_person = np.array(
[np.mean(a.uid_counts_for_test_metric(True, False, m)[:,1]) for m in ['A', 'B', 'C']]
)
plt.hold(True)
width = 0.35
control_bar = plt.bar(np.arange(3), control_metrics_per_person, width=width, color='b')
test1_bar = plt.bar(np.arange(3) + width, test1_metrics_per_person, width=width, color='r')
plt.xticks([0.4, 1.4, 2.4], ('A', 'B', 'C'))
plt.ylabel('Mean Conversions Per Person')
plt.title('Test 1')
plt.figlegend((control_bar, test1_bar), ('Control', 'Test 1'), 'upper right')
plt.show()
# Test 2 mean analysis
control_metrics_per_person = np.array(
[np.mean(a.uid_counts_for_test_metric(False, False, m)[:,1]) for m in ['A', 'B', 'C']]
)
test2_metrics_per_person = np.array(
[np.mean(a.uid_counts_for_test_metric(False, True, m)[:,1]) for m in ['A', 'B', 'C']]
)
plt.hold(True)
width = 0.35
control_bar = plt.bar(np.arange(3), control_metrics_per_person, width=width, color='b')
test2_bar = plt.bar(np.arange(3) + width, test2_metrics_per_person, width=width, color='g')
plt.xticks([0.4, 1.4, 2.4], ('A', 'B', 'C'))
plt.ylabel('Mean Conversions Per Person')
plt.title('Test 2')
plt.figlegend((control_bar, test2_bar), ('Control', 'Test 2'), 'upper right')
plt.show()
# Both tests effect on gender
control_metrics_per_person = np.array(
[np.mean(a.uid_counts_for_test_metric(False, False, m)[:,1]) for m in ['A', 'B', 'C']]
)
test1_metrics_per_person_male = np.array(
[np.mean(a.uid_counts_for_test_gender_metric(True, False, 'm', m)[:,1]) for m in ['A', 'B', 'C']]
)
test1_metrics_per_person_female = np.array(
[np.mean(a.uid_counts_for_test_gender_metric(True, False, 'f', m)[:,1]) for m in ['A', 'B', 'C']]
)
plt.hold(True)
width = 0.3
control_bar = plt.bar(np.arange(3), control_metrics_per_person, width=width, color='b')
test1_bar_male = plt.bar(np.arange(3) + width, test1_metrics_per_person_male, width=width, color='r')
test1_bar_female = plt.bar(np.arange(3) + 2 * width, test1_metrics_per_person_female, width=width, color='g')
plt.xticks([0.5, 1.5, 2.5], ('A', 'B', 'C'))
plt.ylabel('Mean Conversions Per Person')
plt.title('Test 1')
plt.figlegend((control_bar, test1_bar_male, test1_bar_female), ('Control', 'Test 1 (Male)', 'Test 1 (Female)'), 'upper right')
plt.show()
# Test 1 analysis with confidence
control_metrics_per_person = np.array(
[np.mean(a.uid_counts_for_test_metric(False, False, m)[:,1]) for m in ['A', 'B', 'C']]
)
test1_counts_per_person_a = a.uid_counts_for_test_metric(True, False, 'A')[:,1]
test1_counts_per_person_b = a.uid_counts_for_test_metric(True, False, 'B')[:,1]
test1_counts_per_person_c = a.uid_counts_for_test_metric(True, False, 'C')[:,1]
test1_counts = [test1_counts_per_person_a, test1_counts_per_person_b, test1_counts_per_person_c]
means = np.array(list(map(np.mean, test1_counts)))
# standard error of the mean
sems = np.array(list(map(stats.sem, test1_counts)))
# calculating probability density function
confidence = 0.95
ppfs = np.array(list(map(lambda counts: scipy.stats.norm.ppf((1+confidence)/2., len(counts)-1), test1_counts)))
hhs = sems * ppfs
# plotting the result
plt.hold(True)
width = 0.35
control_bar = plt.bar(np.arange(3), control_metrics_per_person, width=width, color='b')
test1_bar = plt.bar(np.arange(3) + width, means, width=width, yerr=hhs, color='r')
plt.xticks([0.4, 1.4, 2.4], ('A', 'B', 'C'))
plt.ylabel('Mean Conversions Per Person')
plt.title('Test 1')
plt.figlegend((control_bar, test1_bar), ('Control', 'Test 1'), 'upper right')
plt.show()