from IPython.display import YouTubeVideo
YouTubeVideo("WESld11iNcQ")

from IPython.display import Image
import prettyplotlib as ppl
from prettyplotlib import plt
import numpy as np
import scipy as sp
import seaborn as sns
sns.set_context('talk')
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica'], 'size': 22})
rc('xtick', labelsize=12) 
rc('ytick', labelsize=12)
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
%matplotlib inline

from IPython.html import widgets # Widget definitions
from IPython.display import display # Used to display widgets in the notebook
from IPython.html.widgets import interact, interactive
from IPython.display import clear_output, display, HTML

def gen_plot(success=(0, 100), failure=(0, 100)):
    alpha = 5 + success
    beta = 5 + failure
    fig = plt.figure(figsize=(8, 6))
    x = np.linspace(0, 1, 100)
    ax = fig.add_subplot(111, xlabel='Chance of success (beating the market)', 
                         ylabel='Probability of hypothesis', 
                         title=r'Posterior probability distribution of $\theta$')
    ax.plot(x, sp.stats.beta(alpha, beta).pdf(x), linewidth=3.)
    ax.set_xticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%']);

from IPython.display import Image
import prettyplotlib as ppl
from prettyplotlib import plt
import numpy as np
import scipy as sp
import seaborn as sns
sns.set_context('talk')
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica'], 'size': 22})
rc('xtick', labelsize=12) 
rc('ytick', labelsize=12)
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
%matplotlib inline

from IPython.html import widgets # Widget definitions
from IPython.display import display # Used to display widgets in the notebook
from IPython.html.widgets import interact, interactive
from IPython.display import clear_output, display, HTML

from IPython.display import Image
import prettyplotlib as ppl
from prettyplotlib import plt
import numpy as np
import scipy as sp
import seaborn as sns
sns.set_context('talk')
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica'], 'size': 22})
rc('xtick', labelsize=12) 
rc('ytick', labelsize=12)
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
%matplotlib inline

from IPython.html import widgets # Widget definitions
from IPython.display import display # Used to display widgets in the notebook
from IPython.html.widgets import interact, interactive
from IPython.display import clear_output, display, HTML

np.random.seed(9)
algo_a = sp.stats.bernoulli(.5).rvs(10) # 10 samples from 50% chance of beating the market
algo_b = sp.stats.bernoulli(.6).rvs(10) # 10 samples from 60% chance of beating the market

print 'Beat the market algo A: ', algo_a
print 'Beat the market algo B: ', algo_b

print 'Probability algo A beating the market = %.0f%%' 
% (np.mean(algo_a) * 100)
print 'Probability algo B beating the market = %.0f%%' 
% (np.mean(algo_b) * 100)

def run_exp():
    # Generate 50 random binary variables with probability = 0.5
    test_a = sp.stats.bernoulli(.5).rvs(50)
    test_b = sp.stats.bernoulli(.5).rvs(50)

    for i in range(2, 50):
        # Run statistical t-test
        _, p = sp.stats.ttest_ind(test_a[:i], test_b[:i])
        if p < 0.05:
            return True
        
    return False

p_sign_result = np.mean([run_exp() for i in range(1000)])

print 'Probability of getting significant result even though no\n difference exists = %.2f%%' % (p_sign_result * 100)

def gen_plot(successes=(0, 100, 1), failures=(0, 100, 1)):
    alpha_prior=5
    beta_prior=5
    if successes == 0 and failures == 0:
        title = r'Prior probability distribution on $\theta$'
    else:
        title = r'Posterior probability distribution of $\theta$ after having seen data'
        
    alpha = alpha_prior + successes
    beta = beta_prior + failures
    x = np.linspace(0, 1, 201)
    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111, xlabel=r'Chance of success (profit or conversion)', 
                         ylabel=r'Probability',
                         title=title)
    ax.plot(x, sp.stats.beta(alpha, beta).pdf(x), linewidth=3.)
    ax.set_xticklabels([r'0\%', r'20\%', r'40\%', r'60\%', r'80\%', r'100\%']);
    
    return

def gen_plot(success=(0, 100), failure=(0, 100)):
    alpha = 5 + success
    beta = 5 + success
    fig = plt.figure(figsize=(8, 6))
    x = np.linspace(0, 1, 100)
    ax = fig.add_subplot(111, xlabel='Chance of success (beating the market)', 
                         ylabel='Probability of hypothesis', 
                         title=r'Posterior probability distribution of $\theta$')
    ax.plot(x, sp.stats.beta(alpha, beta).pdf(x), linewidth=3.)
    ax.set_xticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%']);

gen_plot(0, 0)

interactive(gen_plot)

def plot_approx():
    from scipy import stats
    fig = ppl.plt.figure(figsize=(14, 6))
    x_plot = np.linspace(0, 1, 200)
    ax1 = fig.add_subplot(121, title='What we want', ylim=(0, 2.5))# , xlabel=r'$\theta$', ylabel=r'$P(\theta)$')
    ppl.plot(ax1, x_plot, stats.beta(4, 4).pdf(x_plot), linewidth=4.)
    ax2 = fig.add_subplot(122, title='What we get', xlim=(0, 1), ylim=(0, 2300))#, xlabel=r'\theta', ylabel='\# of samples')
    ax2.hist(stats.beta(4, 4).rvs(20000), bins=20);

plot_approx()

np.random.seed(9)
algo_a = sp.stats.bernoulli(.5).rvs(300) # 50% profitable days
algo_b = sp.stats.bernoulli(.6).rvs(300) # 60% profitable days

import pymc as pm
model = pm.Model()
with model: # model specifications in PyMC3 are wrapped in a with-statement
    # Define random variables
    theta_a = pm.Beta('theta_a', alpha=5, beta=5) # prior
    theta_b = pm.Beta('theta_b', alpha=5, beta=5) # prior
    
    # Define how data relates to unknown causes
    data_a = pm.Bernoulli('observed A',
                          p=theta_a, 
                          observed=algo_a)
    
    data_b = pm.Bernoulli('observed B', 
                          p=theta_b, 
                          observed=algo_b)
    
    # Inference!
    start = pm.find_MAP() # Find good starting point
    step = pm.Slice() # Instantiate MCMC sampling algorithm
    trace = pm.sample(10000, step, start=start, progressbar=False) # draw posterior samples using slice sampling 

sns.distplot(trace['theta_a'], label=r'$\theta_a$')
sns.distplot(trace['theta_b'], label=r'$\theta_b$')
plt.title('Posteriors')
plt.legend()

p_b_better_than_a = np.mean(trace['theta_a'] < trace['theta_b'])
print 'Probability that algo B is better than A = %.2f%%' % (p_b_better_than_a * 100)

np.random.seed(9)
algos = []
algos_idx = []
samples = 300
for i, p_algo in enumerate(sp.stats.beta(10, 10).rvs(20)):
    algos.append(sp.stats.bernoulli(p_algo).rvs(samples))
    algos_idx.append(np.ones(samples) * i)
    
algos = np.asarray(algos)
algos_idx = np.asarray(algos_idx, dtype=np.int)
print algos.shape
print algos
print algos_idx

with pm.Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
    # Define random variables
    grp_mean = pm.Beta('grp mean', alpha=2, beta=2) # prior
    grp_scale = pm.Gamma('grp scale', alpha=1, beta=10./10**2) # prior
    
    # Transform
    alpha = grp_mean * grp_scale
    beta = (1 - grp_mean) * grp_scale
    
    # Individual random variables, vector of lenght 20
    theta = pm.Beta('theta', alpha=alpha, beta=beta, shape=20)
    
    # Define how data relates to unknown causes
    data = pm.Bernoulli('observed',
                        p=theta[algos_idx],
                        observed=algos)
    
    # Inference!
    start = pm.find_MAP() # Find good starting point
    step = pm.NUTS (scaling=start) # Instantiate MCMC sampling algorithm
    trace = pm.sample(10000, step, start=start, progressbar=False)[:2000] # draw posterior samples using slice sampling

pm.traceplot(trace);