%pylab inline

class LinearModel(object):
    """
    A generic linear regressor. Uses nonlinear basis functions, can fit with
    either the normal equations or gradient descent
    """
    
    def __init__(self, basisfunc=None):
        """
        Instantiate a linear regressor. If you want to use a custom basis function,
        specify it here. It should accept an array and output an array. The default
        basis function is the identity function.
        """
        self.w = array([])
        self.basisfunc = basisfunc if basisfunc is not None else self.identity
        
    def identity(self, x):
        #identity basis function - for linear models in x
        return x
    
    def basismap(self, X):
        #return X in the new basis (the design matrix)
        Xn = zeros((X.shape[0], self.basisfunc(X[0,:]).shape[0]))
        for i, xi in enumerate(X):
            Xn[i,:] = self.basisfunc(xi)
        return Xn
    
    def fit_gd(self, X, y, itrs=100, learning_rate=0.1, regularization=0.1):
        """
        fit using iterative gradient descent with least squares loss
        itrs - iterations of gd
        learning_rate - learning rate for updates
        regularization - weight decay. Greated values -> more regularization
        """
        
        #first get a new basis by using our basis func
        Xn = self.basismap(X)
        
        #initial weights
        self.w = uniform(-0.1, 0.1, (Xn.shape[1],1))
        
        #now optimize in this new space, using gradient descent
        print 'initial loss:', self.loss(X,y)
        
        for i in range(itrs):
            grad = self.grad(Xn, y, regularization)
            self.w = self.w - learning_rate*grad
        
        print 'final loss:', self.loss(X,y)
        
    def grad(self, X, y, reg):
        """
        Returns the gradient of the loss function with respect to the weights.
        Used in gradient descent training.
        """
        return  -mean((y - dot(X, self.w)) * X, axis=0).reshape(self.w.shape) + reg*self.w
    
    def fit_normal_eqns(self, X, y, reg=1e-5):
        """
        Solves for the weights using the normal equation. 
        """
        Xn = self.basismap(X)
        #self.w = dot(pinv(Xn), y)
        self.w = dot(dot(inv(eye(Xn.shape[1])*reg + dot(Xn.T, Xn)), Xn.T) , y)
    
    def predict(self, X):
        """
        Makes predictions on a matrix of (observations x features)
        """
        Xn = self.basismap(X)
        return dot(Xn, self.w)
    
    def loss(self, X, y):
        #assumes that X is the data matrix (not the design matrix)
        yh = self.predict(X)
        return mean((yh-y)**2)
    

def fourier_basis(x):
    #use sine waves with different amplitudes
    sins =  hstack(tuple(sin(pi*n*x)) for n in arange(0,1,0.1))
    coss = hstack(tuple(cos(pi*n*x)) for n in arange(0,1,0.1))
    return hstack((sins, coss))

def linear_basis(x): #includes a bias!
    return hstack((1,x))

def polynomial_basis(x): #degree 10
    return hstack(tuple(x**i for i in range(10)))

def sigmoid_basis(x): #offset sigmoids
    return hstack(tuple((1+exp(-x-mu))**-1) for mu in arange(-9,9,0.5))

def heavyside_basis(x): #offset heavysides
    return hstack(tuple(0.5*(sign(x-mu)+1)) for mu in arange(-9,9,0.5))


#generate some data
X = arange(-8, 8, 0.1).reshape((-1,1))
y = sin(X) + randn(X.shape[0],X.shape[1])*0.3
scatter(X, y)


model = LinearModel()
model.fit_normal_eqns(X, y, reg=0.1)
Xn = arange(-8, 8, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Linear basis, no bias')
show()

model = LinearModel(basisfunc=linear_basis)
model.fit_normal_eqns(X, y, 0.1)
Xn = arange(-8, 8, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Linear Basis, with bias')
show()

model = LinearModel(basisfunc=fourier_basis)
model.fit_normal_eqns(X, y, 0.1)
Xn = arange(-8, 8, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Fourier basis')
show()

model = LinearModel(basisfunc=polynomial_basis)
model.fit_normal_eqns(X, y, 0.1)
Xn = arange(-8, 8, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Polynomial basis')
show()

model = LinearModel(basisfunc=sigmoid_basis)
model.fit_normal_eqns(X, y, 0.1)
Xn = arange(-8, 8, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Sigmoidal basis')
show()

model = LinearModel(basisfunc=heavyside_basis)
model.fit_normal_eqns(X, y, 0.1)
Xn = arange(-8, 8, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Heavyside basis')
show()

X = arange(-10, 10, 0.5)
y = 0.8*abs(X)**0.3 + 2 + randn(X.shape[0])*0.1
scatter(X,y)
X = X.reshape((-1,1))

model = LinearModel()
model.fit_normal_eqns(X, y, reg=0.1)
Xn = arange(-10, 10, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Linear basis, no bias')
show()

model = LinearModel(basisfunc=linear_basis)
model.fit_normal_eqns(X, y,)
Xn = arange(-10, 10, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Linear basis, with bias')
show()

model = LinearModel(basisfunc=fourier_basis)
model.fit_normal_eqns(X, y)
Xn = arange(-10, 10, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Fourier basis')
show()

model = LinearModel(basisfunc=polynomial_basis)
model.fit_normal_eqns(X, y)
Xn = arange(-10, 10, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Polynomial basis')
show()

model = LinearModel(basisfunc=sigmoid_basis)
model.fit_normal_eqns(X, y, 0.1)
Xn = arange(-10, 10, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Sigmoid basis')
show()

model = LinearModel(basisfunc=heavyside_basis)
model.fit_normal_eqns(X, y, 0.2)
Xn = arange(-10, 10, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
ylim([2,4])
title('Heavyside step function basis')
show()

model = LinearModel(basisfunc=fourier_basis)
model.fit_gd(X, y, regularization=1e-5, itrs=100, learning_rate=0.1)
Xn = arange(-10, 10, 0.05).reshape((-1,1))
yh = model.predict(Xn)
scatter(X, y)
plot(Xn, yh)
title('Fourier basis, fit with gradient descent')
show()


from sklearn.datasets import make_classification
X,y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, 
                          n_repeated=0, n_classes=2, n_clusters_per_class=1)

scatter(X[:,0], X[:,1], c=['r' if yi == 1 else 'b' for yi in y])
y = y.reshape((-1,1))

def heavyside_basis(x):
    return hstack(tuple(0.5*(sign(x-mu)+1)) for mu in arange(-5,5,0.3))

def linear_basis(x):
    return hstack((1,x)).ravel()

def sigmoid_basis(x):
    return hstack(tuple((1+exp(-x-mu))**-1) for mu in arange(-5,5,0.3))

def polynomial_basis(x):
    return hstack(tuple(x**i for i in range(5)))

def rbf_basis(x):
    return hstack(tuple(exp(-(x-mu)**2) for mu in arange(-5,5,0.3)))

#some nice plotting code
def plot_contour_scatter(model, title_text):
    #sample from a lattice (for the nice visualization)
    x1, x2 = meshgrid(arange(-5,5,0.1), arange(-5,5,0.1))
    Xnew = vstack((x1.ravel(), x2.ravel())).T
    
    Z = model.predict(Xnew).reshape((-1,1))
    
    #plot - contour plot and scatter superimposed
    contourf(arange(-5,5,0.1), arange(-5,5,0.1), Z[:,0].reshape(x1.shape),
             cmap ='cool',levels=[-1e10,0.499,0.5, 1e10])
    colorsToUse=  ['r' if yi == 1 else 'b' for yi in y]
    scatter(X[:,0],X[:,1], c=colorsToUse)
    title(title_text)
    show()


model = LinearModel(basisfunc=linear_basis)
model.fit_normal_eqns(X, y, 0.1)
#model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'Linear basis')

model = LinearModel(basisfunc=heavyside_basis)
#model.fit_normal_eqns(X, y, 0.0)
model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'Heavyside Basis')

model = LinearModel(basisfunc=polynomial_basis)
model.fit_normal_eqns(X, y, 0.1)
#model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'Polynomial Basis (degree 3)')

model = LinearModel(basisfunc=sigmoid_basis)
model.fit_normal_eqns(X, y, 0.1)
#model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'Sigmoid Basis')

model = LinearModel(basisfunc=rbf_basis)
#model.fit_normal_eqns(X, y, .5)
model.fit_gd(X, y, itrs=1000, regularization=0.01, learning_rate=.1)
plot_contour_scatter(model, 'RBF Basis')

A = uniform(-0.5, 0.5, (10,2))
def heavyside_basis(x):
    xn = dot(A,x)
    return hstack(tuple(0.5*(sign(xn-mu)+1)) for mu in arange(-5,5,0.5))

def linear_basis(x):
    xn = dot(A,x)
    return hstack((1,xn)).ravel()

def sigmoid_basis(x):
    xn = dot(A,x)
    return hstack(tuple((1+exp(-xn-mu))**-1) for mu in arange(-5,5,0.3))

def polynomial_basis(x):
    xn = dot(A,x)
    return hstack(tuple(xn**i for i in range(5)))

def rbf_basis(x):
    xn = dot(A,x)
    return hstack(tuple(exp(-(xn-mu)**2) for mu in arange(-5,5,0.3)))


model = LinearModel(basisfunc=linear_basis)
model.fit_normal_eqns(X, y, 0.1)
#model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'Linear basis')

model = LinearModel(basisfunc=heavyside_basis)
#model.fit_normal_eqns(X, y, 0.2)
model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'Heavyside basis')

model = LinearModel(basisfunc=sigmoid_basis)
#model.fit_normal_eqns(X, y, 0.1)
model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'Sigmoid basis')

model = LinearModel(basisfunc=polynomial_basis)
#model.fit_normal_eqns(X, y, 0.1)
model.fit_gd(X, y, itrs=1000, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'Polynomial basis')

model = LinearModel(basisfunc=rbf_basis)
#model.fit_normal_eqns(X, y, 0.1)
model.fit_gd(X, y, itrs=200, regularization=0.1, learning_rate=0.01)
plot_contour_scatter(model, 'RBF basis')

from sklearn.datasets import make_classification
X,y = make_classification(n_samples=20, n_features=2, n_informative=1, n_redundant=0, 
                          n_repeated=0, n_classes=2, n_clusters_per_class=1)

scatter(X[:,0], X[:,1], c=['r' if yi == 1 else 'b' for yi in y])


for i in range(5):
    y[i] = 1-y[i]
    
scatter(X[:,0], X[:,1], c=['r' if yi == 1 else 'b' for yi in y])
y = y.reshape((-1,1))

model = LinearModel(basisfunc=sigmoid_basis)
model.fit_normal_eqns(X, y, 0.0)
plot_contour_scatter(model, 'Sigmoid basis, no regularization')
print 'size of weight vector:', norm(model.w)

model = LinearModel(basisfunc=sigmoid_basis)
model.fit_normal_eqns(X, y, 0.9)
plot_contour_scatter(model, 'Sigmoid basis, with regularization')
print 'size of weight vector', norm(model.w)

X = arange(-5, 5, 0.5).reshape((-1,1))
y = sin(X)
#now add some corruption
y[2] += 1
y[-3] -= 1
y[5] += 2
y[7] -= 0.2
y[15] += 1
y[10] -= 1
#plot it
scatter(X,y)

def sigmoid_basis(x):
    return hstack(tuple((1+exp(-x-mu))**-1) for mu in arange(-7,7,0.5))

model = LinearModel(basisfunc=sigmoid_basis)
model.fit_normal_eqns(X, y, 0.0)
yh = model.predict(arange(-7,7,0.1).reshape((-1,1)))
plot(arange(-7,7,0.1), yh)
scatter(X,y)
ylim([-4,2])
print 'size of weight vector', norm(model.w)

model = LinearModel(basisfunc=sigmoid_basis)
model.fit_normal_eqns(X, y, 0.00001)
#model.fit_gd(X, y, itrs=100, regularization=0.001, learning_rate=0.1)
yh = model.predict(arange(-7,7,0.1).reshape((-1,1)))
plot(arange(-7,7,0.1), yh)
scatter(X,y)
ylim([-4,2])
print 'size of weight vector', norm(model.w)

model = LinearModel(basisfunc=sigmoid_basis)
model.fit_normal_eqns(X, y, 0.01)
#model.fit_gd(X, y, itrs=100, regularization=0.001, learning_rate=0.1)
yh = model.predict(arange(-7,7,0.1).reshape((-1,1)))
plot(arange(-7,7,0.1), yh)
scatter(X,y)
ylim([-4,2])
print 'size of weight vector', norm(model.w)