from kcbo import lognormal_comparison_test, t_test, conversion_test import pandas as pd import numpy as np from matplotlib import pyplot as plt import seaborn as sns %matplotlib inline # Generate some data g1d = np.random.lognormal(mean=3, sigma=1, size=10000) g1l = ['A'] * g1d.shape[0] g2d = np.random.lognormal(mean=3.03, sigma=1, size=10000) g2l = ['B'] * g2d.shape[0] g1 = pd.DataFrame(data=g1d, columns=['value']) g1['group'] = g1l g2 = pd.DataFrame(data=g2d, columns=['value']) g2['group'] = g2l lognormal_data = pd.concat([g1, g2]) summary, data = lognormal_comparison_test(lognormal_data, samples=100000) print summary A,B = data['A']['median'], data['B']['median'] diff = data[('A','B')]['diff_medians'] f, axes = plt.subplots(1,2, figsize=(12, 7)) sns.set(style="white", palette="muted") sns.despine(left=True) sns.distplot(A, ax=axes[0], label='Median Estimate Density for A') sns.distplot(B, ax=axes[0], label='Median Estimate Density for B') sns.distplot(diff, ax=axes[1], label='Difference of Densities (B-A)') axes[0].legend() axes[1].legend() plt.show() n1,n2 = (140,200) group1 = np.random.normal(15,2,n1) group2 = np.random.normal(15.7,2,n2) A = zip(['A']*n1, group1) B = zip(['B']*n2, group2) df = pd.concat([pd.DataFrame(A), pd.DataFrame(B)]) df.columns = 'group','value' df.head() description, data = t_test(df,groupcol='group',valuecol='value', samples=60000, progress_bar=True) print description diff = data[('A', 'B')]['diff_means'] f, axes = plt.subplots(1,1, figsize=(12, 7)) sns.despine(left=True) sns.distplot(diff, label='Difference of Means').legend() plt.show() A = {'group':'A', 'trials': 10000, 'successes':5000} B = {'group':'B', 'trials': 8000, 'successes':4090} df = pd.DataFrame([A,B]) df summary, data = conversion_test(df, groupcol='group',successcol='successes',totalcol='trials') print summary A = data['A']['distribution'] B = data['B']['distribution'] diff = data[('A','B')]['distribution'] f, axes = plt.subplots(1,2, figsize=(12, 7)) sns.set(style="white", palette="muted") sns.despine(left=True) sns.distplot(A, ax=axes[0], label='Density Estimate for A') sns.distplot(B, ax=axes[0], label='Density Estimate for B') sns.distplot(diff, ax=axes[1], label='Difference of Densities (B-A)') axes[0].legend() axes[1].legend() plt.show()