"""A symple Python program
Written by: Neil D. Lawrence
First written: 2013/10/11
Last written: 2013/10/11"""

print "Running a Python application",
print "... finished."


a = 2*24
print a

a = a + 10
print a

import numpy
x = numpy.random.uniform() # This gives us a random number between 0 and 1, uniformly distributed
print x

import numpy as np
x = np.random.uniform()
print x

x = np.random.uniform(size=10)
print x

np.random.uniform?

# sample 10 values from a normal (Gaussian) distribution with mean 0.0 (loc) and standard deviation 1.0 (scale) 
x=np.random.normal(loc=0.0, scale=1.0, size=10)

x = np.random.normal(size=4)
y = np.random.normal(size=4)
# let's see what we generated
print x
print y
# inner product (or dot product) in python can be computed with np.dot
ip = np.dot(x, y)
print "Inner product is ", ip
# the function np.inner also does the same thing
print "Inner product is also ", np.inner(x, y)
# Or we can do it explicitly, by multiplying all array elements together and summing
print "Inner product is also ", np.sum(x*y)
# which can also be written as
print "Finally, inner product is also ", (x*y).sum()

# here we create a long array for x and y
length = 1000000
x = np.random.normal(size=length)
y = np.random.normal(size=length)

# computing with a for loop for 1,000,000 values takes a noticeable amount of time
ip = 0
for i in xrange(len(x)):
    ip+=x[i]*y[i]
print ip

# inner product computed as a built in
np.dot(x, y)
# this takes no noticeable amount of time

x = np.random.normal(size=10)
mean = x.sum()/10
print mean

mean = x.sum()/x.size
print mean

mean = x.mean()
print mean

variance = (x*x).mean() - mean*mean
print variance

means = []
variances = []
samples = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000] 
for N in samples:
    x = np.random.normal(loc=0.0, scale=1.0, size=N)
    mean = x.mean()
    variance = (x**2).mean() - mean**2
    means.append(mean)
    variances.append(variance)


means = np.array(means)
variances = np.array(variances)
samples = np.array(samples)

import matplotlib as plt
# This is an IPython magic command to make plots appear inline. 
# These "magic" Commands start with % and are sent to the IPython 
# interpreter rather than the Python language interpreter.
%pylab inline

plt.plot(samples, means)
xlabel('samples')
ylabel('mean')


plt.semilogx(samples, means)
xlabel('log samples')
ylabel('mean')

scale = 1.0 # set the standard deviation
loc = 0.0 # set the mean

x_plot = np.linspace(loc - 4*scale, loc + 4*scale, 100) # the x-values to use in the plot

# compute the values of this density at the locations given by x_plot
py = 1/np.sqrt(2*np.pi*scale**2)*np.exp(-0.5*(x_plot-loc)**2/(scale**2))

# sample some random values from this density
x_samps = np.random.normal(loc=loc, scale=scale, size=100)

# Plot the density
plt.plot(x_plot, py)

# Plot crosses where the samples landed. Here we plot their x-location a
# gainst an array filled with 0.05, just to show them of the bottom of the plot. 
plt.plot(x_samps, 0.05*np.ones_like(x_samps), 'gx')
plt.title('The Normal Density')

plt.plot(x_plot, -np.log(py), 'b-')

# let's sample first from the positive class. To separate 
# it from the negative, we'll add 1.5 to the samples.
x_plus = np.random.normal(loc=1.5, size=(50, 2))
# And now the negative class, to separate from the positive
# we subtract 1.5 from the samples
x_minus = np.random.normal(loc=-1.5, size=(50, 2))

# now let's plot our data. We'll make the
# positive class red crosses and the negative
# class green circles
plt.plot(x_plus[:, 0], x_plus[:, 1], 'rx')
plt.plot(x_minus[:, 0], x_minus[:, 1], 'go')

# flip a coin (i.e. generate a random number and check if it is greater than 0.5)
choose_plus = np.random.rand(1)>0.5
if choose_plus:
    # generate a random point from the positives
    index = np.random.randint(0, x_plus.shape[1])
    w = x_plus[index, :] # set the normal vector to that point.
    b = 1
else:
    # generate a random point from the negatives
    index = np.random.randint(0, x_minus.shape[1])
    w = -x_minus[index, :] # set the normal vector to minus that point.
    b = -1

# Compute coordinates for the separating hyperplane
plot_limits = np.asarray([-5, 4])
if abs(w[1])>abs(w[0]):
    # If w[1]>[w[0] in absolute value, plane is likely to be leaving tops of plot.
    x0 = plot_limits
    x1 = -(b + x0*w[0])/w[1]
else:
    # otherwise plane is likely to be leaving sides of plot.
    x1 = plot_limits
    x0 = -(b + x1*w[1])/w[0]

# Plot the data again
plot(x_plus[:, 0], x_plus[:, 1], 'rx')
plot(x_minus[:, 0], x_minus[:, 1], 'go')
# plot a line to represent the separating 'hyperplane'
plot(x0, x1, 'b-')

def hyperplane_coordinates(w, b, plot_limits):
    
    if abs(w[1])>abs(w[0]):
        # If w[1]>w[0] in absolute value, plane is likely to be leaving tops of plot.
        x0 = plot_limits
        x1 = -(b + x0*w[0])/w[1]
    else:
        # otherwise plane is likely to be leaving sides of plot.
        x1 = plot_limits
        x0 = -(b + x1*w[1])/w[0]
    return x0, x1

# import the time model to allow python to pause.
import time
# import the IPython display module to clear the output.
from IPython.display import clear_output

learn_rate = 0.1
num_iterations = 1000
for i in xrange(num_iterations):
    w_updated = False
    # select a point at random from the data
    choose_plus = np.random.uniform(size=1)>0.5
    if choose_plus:
        # choose a point from the positive data
        index = np.random.random_integers(x_plus.shape[0]-1)
        if not np.sign(np.dot(w, x_plus[index, :])+b) == 1:
            # point is currently incorrectly classified
            w = w + learn_rate*x_plus[index, :]
            b = b + learn_rate
            w_updated = True
    else:
        # choose a point from the negative data
        index = np.random.random_integers(x_minus.shape[0]-1)
        if not np.sign(np.dot(w, x_minus[index, :])+b) == -1:
            # point is currently incorrectly classified
            w = w - learn_rate*x_minus[index, :]
            b = b - learn_rate
            w_updated = True
    if w_updated or i==0: 
        # Plot the data again
        plt.gca().clear()
        plt.plot(x_plus[:, 0], x_plus[:, 1], 'rx')
        plt.plot(x_minus[:, 0], x_minus[:, 1], 'go')
        # plot a line to represent the separating 'hyperplane'
        x0, x1 = hyperplane_coordinates(w, b, np.asarray([-5, 4]))
        plt.plot(x0, x1, 'b-')
        display(plt.gcf())
        time.sleep(1)
        clear_output()

x = np.random.normal(size=4)

m_true = 1.4
c_true = -3.1

y = m_true*x+c_true

plt.plot(x, y, 'rx') # plot data as red crosses

noise = np.random.normal(scale=0.5, size=4) # standard deviation of the noise is 0.5
y = m_true*x + c_true + noise
plt.plot(x, y, 'rx')

m_vals = np.linspace(m_true-3, m_true+3, 100) # create an array of linearly separated values around m_true
c_vals = np.linspace(c_true-3, c_true+3, 100) # create an array of linearly separated values around c_true

m_grid, c_grid = np.meshgrid(m_vals, c_vals) 
# we can see that this is now a 100x100 matrix of values with the print command
print "The dimensionality of m_grid is", m_grid.shape

E_grid = np.zeros((100, 100))
for i in xrange(100):
    for j in xrange(100):
        E_grid[i, j] = ((y - m_grid[i, j]*x - c_grid[i, j])**2).sum()

plt.contour(m_vals, c_vals, E_grid)
plt.xlabel('$m$')
plt.ylabel('$c$')

m_star = 3.0
c_star = -5.0

c_grad = -2*(y-m_star*x - c_star).sum()
print "Gradient with respect to c is ", c_grad

m_grad = -2*(x*(y-m_star*x - c_star)).sum()
print "Gradient with respect to m is ", m_grad

print "Original m was ", m_star, " and original c was ", c_star
learn_rate = 0.01
c_star = c_star - learn_rate*c_grad
m_star = m_star - learn_rate*m_grad
print "New m is ", m_star, " and new c is ", c_star


# first let's plot the error surface
f, (ax1, ax2) = plt.subplots(1, 2) # this is to create 'side by side axes'
ax1.contour(m_vals, c_vals, E_grid) # this makes the contour plot on axes 1.
plt.xlabel('$m$')
plt.ylabel('$c$')
m_star = 3.0
c_star = -5.0
for i in xrange(100): # do 100 iterations (parameter updates)
    # compute the gradients
    c_grad = -2*(y-m_star*x - c_star).sum()
    m_grad = -2*(x*(y-m_star*x - c_star)).sum()
    
    # update the parameters
    m_star = m_star - learn_rate*m_grad
    c_star = c_star - learn_rate*c_grad
    
    # update the location of our current best guess on the contour plot
    ax1.plot(m_star, c_star, 'g*')
    
    # show the current status on the plot of the data
    ax2.plot(x, y, 'rx')
    plt.ylim((-9, -1)) # set the y limits of the plot fixed
    x_plot = np.asarray(plt.xlim()) # get the x limits of the plot for plotting the current best line fit.
    y_plot = m_star*x_plot + c_star
    ax2.plot(x_plot, y_plot, 'b-')
    display(plt.gcf())
    time.sleep(0.25) # pause between iterations to see update
    ax2.cla()
    clear_output()

# choose a random point for the update
i = np.random.randint(x.shape[0]-1)
# update m
m_star = m_star + 2*learn_rate*(x[i]*(y[i]-m_star*x[i] - c_star))
# update c
c_star = c_star + 2*learn_rate*(y[i]-m_star*x[i] - c_star)

# first let's plot the error surface
f, (ax1, ax2) = plt.subplots(1, 2) # this is to create 'side by side axes'
ax1.contour(m_vals, c_vals, E_grid) # this makes the contour plot on axes 1.
plt.xlabel('$m$')
plt.ylabel('$c$')
m_star = 3.0
c_star = -5.0
for i in xrange(100): # do 100 iterations (parameter updates)
    # choose a random point
    i = np.random.randint(x.shape[0]-1)

    # update m
    m_star = m_star + 2*learn_rate*(x[i]*(y[i]-m_star*x[i] - c_star))
    # update c
    c_star = c_star + 2*learn_rate*(y[i]-m_star*x[i] - c_star)# compute the gradients
    
    # update the location of our current best guess on the contour plot
    ax1.plot(m_star, c_star, 'g*')
    
    # show the current status on the plot of the data
    ax2.plot(x, y, 'rx')
    plt.ylim((-9, -1)) # set the y limits of the plot fixed
    x_plot = np.asarray(plt.xlim()) # get the x limits of the plot for plotting the current best line fit.
    y_plot = m_star*x_plot + c_star
    ax2.plot(x_plot, y_plot, 'b-')
    display(plt.gcf())
    time.sleep(0.25)  # pause between iterations to see update
    ax2.cla()
    clear_output()

# Generate data from three Gaussian densities
# X0 is Gaussian centred at 2.5, 2.5
X0 = np.random.normal(loc=2.5, size=(30, 2))
# X1 is Gaussian centred at -2.5, -2.5
X1 = np.random.normal(loc=-2.5, size=(30, 2))
# X2 empty for moment
X2 = zeros((30, 2))

# put first column as Gaussian centred at 2.5
X2[:, 0] = np.random.normal(loc=2.5, size=30)

# put second column as Gaussian centred at -2.5
X2[:, 1] = np.random.normal(loc=-2.5, size=30)

plt.plot(X0[:, 0], X0[:, 1], 'rx')
plt.plot(X1[:, 0], X1[:, 1], 'go')
plt.plot(X2[:, 0], X2[:, 1], 'b+')

        
# Put all the data together in one matrix.
X=np.vstack((X0, X1, X2))
np.random.sample?
num_centres = 3
num_data = X.shape[0]
# choose three points to be the centres of the clusters
center_indices = np.random.random_integers(0, X.shape[0], num_centers)
centers = X[center_indices, :]
allocation = np.zeros(num_data)

for i in xrange(X.shape[0]):
    dists = ((centres - X[i, :])**2).sum(1)
    

np.random.random_sample(10)