# special IPython command to prepare the notebook for matplotlib
%matplotlib inline 

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

# special matplotlib argument for improved plots
from matplotlib import rcParams

#colorbrewer2 Dark2 qualitative color table
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
                (0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
                (0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
                (0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
                (0.4, 0.6509803921568628, 0.11764705882352941),
                (0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
                (0.6509803921568628, 0.4627450980392157, 0.11372549019607843)]

rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'white'
rcParams['patch.facecolor'] = dark2_colors[0]
rcParams['font.family'] = 'StixGeneral'

# Let's look at the help file for the stats.binom function
# stats.binom?

n = 10
p = 0.3
k = np.arange(0,11) # plot all values between 0 and 10
y = stats.binom.pmf(k, n, p)
print y

plt.plot(k, y, 'o-')
plt.title('Binomial: n=%i , p=%.2f' % (n,p),fontsize=15)
plt.xlabel('Number of successes')
plt.ylabel('Probability of successes', fontsize=15)
plt.show()

# your turn

# Try changing p to p = 0.6.
# Try plotting values values larger than 10.

# stats.binom?

data = stats.binom.rvs(n = 10, p = 0.3, size = 10000)
print "Mean: %g" % np.mean(data)
print "SD: %g" % np.std(data, ddof=1)

plt.figure()
plt.hist(data, bins = 10, normed = True)
plt.xlabel("x")
plt.ylabel("density")
plt.show()

# Let's look at the help file for the stats.binom function
# stats.poisson?

lam = 2 # rate parameter lambda
n = np.arange(0, 11)
y = stats.poisson.pmf(n, lam)
print y

plt.plot(n, y, 'o-')
plt.title('Poisson: $\lambda$ =%i' % lam)
plt.xlabel('Number of accidents')
plt.ylabel('Probability of number of accidents')
plt.show()

# your turn

# try changing the rate parameters to 1. 
# Try changing the rate parameter to 8. 
# stats.poisson?

data = stats.poisson.rvs(mu = 2, loc = 0, size = 1000)
print "Mean: %g" % np.mean(data)
print "SD: %g" % np.std(data, ddof=1)

plt.figure()
plt.hist(data, bins = 9, normed = True)
plt.xlim(0, 10)
plt.xlabel("Number of accidents")
plt.title("Simulating Poisson Random Variables")
plt.show()

# your turn 
# plot the pdf of a normal distribution with mean = 0 and 
#    standard deviation = 1

mu = 0 # mean
sigma = 1 # standard deviation
x = np.arange(-5,5,0.1)

y = stats.norm.pdf(x, 0, 1)
plt.plot(x, y)
plt.title('Normal: $\mu$=%.1f, $\sigma^2$=%.1f' % (mu, sigma))
plt.xlabel('x')
plt.ylabel('Probability density')
plt.show()

a = 0.5
b = 0.5
x = np.arange(0.01, 1, 0.01)
y = stats.beta.pdf(x, a, b)
plt.plot(x, y)
plt.title('Beta: a=%.1f, b=%.1f' % (a,b))
plt.xlabel('x')
plt.ylabel('Probability density')
plt.show()

# your turn

# Try changing alpha = 0.8, beta = 0.5
# Try changing alpha = 2, beta = 6
# Try changing alpha = 4, beta = 2
# Try changing alpha = 1, beta = 1

# What do you notice when alpha and beta are both less than 1 
#    compared to both bigger than 1? 

lam = 0.5
x = np.arange(0, 15, 0.1)
y = lam * np.exp(-lam * x) # could also use stats.expon.pdf
plt.plot(x,y)
plt.title('Exponential: $\lambda$ =%.2f' % lam)
plt.xlabel('x')
plt.ylabel('Probability density')
plt.show()

data = stats.expon.rvs(scale = 2, size = 1000)
print "Mean: %g" % np.mean(data)
print "SD: %g" % np.std(data, ddof=1)

plt.figure()
plt.hist(data, bins = 20, normed = True)
plt.xlim(0, 15)
plt.title("Simulating Exponential Random Variables")
plt.show()

# Generate N = 1000 data sets of size 100 numbers drawn from an 
#    exponential distribution (lambda = 0.5)
data = stats.expon.rvs(scale=2, size = (1000, 100))

# Compute sample mean for each N datasets
sample_means = np.mean(data, axis = 1)
                       
print 'The mean of the means is: ', np.mean(sample_means)
print 'The standard deviation of the means is: ', np.std(sample_means, ddof=1) 
plt.figure()
plt.boxplot(sample_means)
plt.figure()
plt.hist(sample_means, normed=True)
plt.show()


n = 20 
for N in [100, 1000, 10000]:
    data = stats.expon.rvs(scale = 2, size=(N, n))
    sample_means = np.mean(data, axis=1) 

    mu = np.mean(sample_means)
    sig = np.std(sample_means, ddof=1)

    plt.figure()
    plt.hist(sample_means, bins=20, normed=True)
    x = np.linspace(0, 4, 100)
    y = stats.norm.pdf(x, loc=mu, scale=sig)
    plt.plot(x, y, color = 'r')
    plt.title('Distribution of sample means using N=%i datasets' % N)

n = 100

for N in [100, 1000, 10000]:
    data = stats.expon.rvs(scale = 2, size=(N, n))
    sample_means = np.mean(data, axis=1) 

    mu = np.mean(sample_means)
    sig = np.std(sample_means, ddof=1)

    print 'Standard deviation of simulated distribution: %g' % sig
    print 'Standard deviation using CLT: %g' % np.sqrt( 4 / float(n))
    print 

N = 1000 
for n in [10, 100, 1000]:
    data = stats.expon.rvs(scale = 2, size=(N, n))
    sample_means = np.mean(data, axis=1) 

    mu = np.mean(sample_means)
    sig = np.std(sample_means, ddof=1)

    plt.figure()
    plt.hist(sample_means, bins=20, normed=True)
    x = np.linspace(0, 4, 100)
    y = stats.norm.pdf(x, loc=mu, scale=sig)
    plt.plot(x, y, color = 'r')
    plt.title('Distribution of sample means using N=%i datasets of size n=%i datasets' % (N, n))

N = 1000

for n in [10, 100, 1000]:
    data = stats.expon.rvs(scale = 2, size=(N, n))
    sample_means = np.mean(data, axis=1) 

    mu = np.mean(sample_means)
    sig = np.std(sample_means, ddof=1)

    print 'Standard deviation of simulated distribution: %g' % sig
    print 'Standard deviation using CLT: %g' % np.sqrt( 4 / float(n))
    print