using Distributions, Gadfly, DataFrames
# Hardcoding these in because gist
bach = [1,0,0,1,2,2,1,5,2,0,0,0,0,0,0,1,1,1,0,0,0,1,1,2,1,3,2,0,0,
3,0,0,0,2,1,0,2,1,0,0,1,3,0,1,1,0,2,0,0,2,2,1,3,0,0,0,1,1];
none = [2,2,1,1,2,2,1,2,1,0,2,1,1,2,0,2,2,0,2,1,0,0,3,6,1,6,4,0,
3,2,0,1,0,0,0,3,0,0,0,0,0,1,0,4,2,1,0,0,1,0,3,2,5,0,1,1,
2,1,2,1,2,0,0,0,2,1,0,2,0,2,4,1,1,1,2,0,1,1,1,1,0,2,3,2,
0,2,1,3,1,3,2,2,3,2,0,0,0,1,0,0,0,1,2,0,3,3,0,1,2,2,2,0,
6,0,0,0,2,0,1,1,1,3,3,2,1,1,0,1,0,0,2,0,2,0,1,0,2,0,0,2,
2,4,1,2,3,2,0,0,0,1,0,0,1,5,2,1,3,2,0,2,1,1,3,0,5,0,0,2,
4,3,4,0,0,0,0,0,0,2,2,0,0,2,0,0,1,1,0,2,1,3,3,2,2,0,0,2,
3,2,4,3,3,4,0,3,0,1,0,1,2,3,4,1,2,6,2,1,2,2];
# Priors
a = 2; b = 1;
# Sufficient stats for data
sy1 = sum(bach); n1 = length(bach);
sy2 = sum(none); n2 = length(none);
# Thetas
thetabach_mc = rand(Gamma(a+sy1, 1/(b+n1)), 5000);
thetanone_mc = rand(Gamma(a+sy2, 1/(b+n2)), 5000);
# Posterior predictive
ybach_mc = map(x -> rand(Poisson(x)), thetabach_mc);
ynone_mc = map(x -> rand(Poisson(x)), thetanone_mc);
# DF for plotting
df = DataFrame(bachelors=ybach_mc, none=ynone_mc)
df = stack(df, [1,2])
names!(df, [:education, :children])
plot(df, x="children", color="education", Geom.histogram(position=:dodge))