using Distributions, Gadfly, DataFrames

# Hardcoding these in because gist
bach = [1,0,0,1,2,2,1,5,2,0,0,0,0,0,0,1,1,1,0,0,0,1,1,2,1,3,2,0,0,
    3,0,0,0,2,1,0,2,1,0,0,1,3,0,1,1,0,2,0,0,2,2,1,3,0,0,0,1,1];

none = [2,2,1,1,2,2,1,2,1,0,2,1,1,2,0,2,2,0,2,1,0,0,3,6,1,6,4,0,
    3,2,0,1,0,0,0,3,0,0,0,0,0,1,0,4,2,1,0,0,1,0,3,2,5,0,1,1,
    2,1,2,1,2,0,0,0,2,1,0,2,0,2,4,1,1,1,2,0,1,1,1,1,0,2,3,2,
    0,2,1,3,1,3,2,2,3,2,0,0,0,1,0,0,0,1,2,0,3,3,0,1,2,2,2,0,
    6,0,0,0,2,0,1,1,1,3,3,2,1,1,0,1,0,0,2,0,2,0,1,0,2,0,0,2,
    2,4,1,2,3,2,0,0,0,1,0,0,1,5,2,1,3,2,0,2,1,1,3,0,5,0,0,2,
    4,3,4,0,0,0,0,0,0,2,2,0,0,2,0,0,1,1,0,2,1,3,3,2,2,0,0,2,
    3,2,4,3,3,4,0,3,0,1,0,1,2,3,4,1,2,6,2,1,2,2];

# Priors
a = 2; b = 1;

# Sufficient stats for data
sy1 = sum(bach); n1 = length(bach);
sy2 = sum(none); n2 = length(none);

# Thetas
thetabach_mc = rand(Gamma(a+sy1, 1/(b+n1)), 5000);
thetanone_mc = rand(Gamma(a+sy2, 1/(b+n2)), 5000);

# Posterior predictive
ybach_mc = map(x -> rand(Poisson(x)), thetabach_mc);
ynone_mc = map(x -> rand(Poisson(x)), thetanone_mc);

# DF for plotting
df = DataFrame(bachelors=ybach_mc, none=ynone_mc)
df = stack(df, [1,2])
names!(df, [:education, :children])

plot(df, x="children", color="education", Geom.histogram(position=:dodge))