import numpy as np
import matplotlib.pyplot as plt

def data_to_counts(data):
    ''' Convert the data to an array of counts of the numbers
        of occurrences of each outcome in each row.
    '''
    counts = [np.bincount(row, minlength=10) for row in data]
    return np.asarray(counts)

def draw_assignments(counts, ps, qs):
    ''' Compute the conditional distributions of the assignemnts
        of each row and then draw from these distributions.
    '''
    p_likelihoods = np.product(np.power(ps, counts), axis=1)
    q_likelihoods = np.product(np.power(qs, counts), axis=1)
    assignments_probs = p_likelihoods / (p_likelihoods + q_likelihoods)
    assignments = np.random.uniform(size=len(assignments_probs)) < assignments_probs
    return np.asarray(assignments)

def draw_probabilities(counts, assignments):
    ''' Compute the total counts in all rows assigned to each
        type and draw new sets of rates for each type given
        these counts.
    '''
    ps_rows = counts[np.where(assignments == True)]
    ps_counts = ps_rows.sum(axis=0)
    ps = np.random.dirichlet(ps_counts)
    
    qs_rows = counts[np.where(assignments == False)]
    qs_counts = qs_rows.sum(axis=0)
    qs = np.random.dirichlet(qs_counts)
    
    return ps, qs

num_cycles = 10000

data = np.loadtxt('data/gibbs_data.txt', dtype=int)
num_rows, num_outcomes = data.shape
counts = data_to_counts(data)

ps_history = np.empty((num_cycles, num_outcomes))
qs_history = np.empty((num_cycles, num_outcomes))
assignments_history = np.empty((num_cycles, num_rows), dtype=bool)

# Draw sensible starting values.
ps = np.random.dirichlet(np.ones(num_outcomes) * counts.sum() / 2)
qs = np.random.dirichlet(np.ones(num_outcomes) * counts.sum() / 2)

for i in xrange(num_cycles):
    assignments = draw_assignments(counts, ps, qs)
    ps, qs = draw_probabilities(counts, assignments)
    
    assignments_history[i] = assignments
    ps_history[i] = ps
    qs_history[i] = qs

fig, (ps_ax, qs_ax) = plt.subplots(1, 2, figsize=(16, 8))

components_to_plot = 2
for label, ax, values in [('p', ps_ax, ps_history), ('q', qs_ax, qs_history)]:
    for j in range(components_to_plot):
        ax.hist(values[:, j], bins=100, histtype='step', label='{0}[{1}]'.format(label, j))
    ax.set_title('Posterior distribution of {0}'.format(label))
    ax.legend()

np.mean(ps_history, axis=0)

np.mean(qs_history, axis=0)

np.mean(assignments_history, axis=0)