%pylab inline class LinearModel(object): """ A generic linear regressor. Uses nonlinear basis functions, can fit with either the normal equations or gradient descent """ def __init__(self, basisfunc=None): """ Instantiate a linear regressor. If you want to use a custom basis function, specify it here. It should accept an array and output an array. The default basis function is the identity function. """ self.w = array([]) self.basisfunc = basisfunc if basisfunc is not None else self.identity def identity(self, x): #identity basis function - for linear models in x return x def basismap(self, X): #return X in the new basis (the design matrix) Xn = zeros((X.shape[0], self.basisfunc(X[0,:]).shape[0])) for i, xi in enumerate(X): Xn[i,:] = self.basisfunc(xi) return Xn def fit_gd(self, X, y, itrs=100, learning_rate=0.1, regularization=0.1): """ fit using iterative gradient descent with least squares loss itrs - iterations of gd learning_rate - learning rate for updates regularization - weight decay. Greated values -> more regularization """ #first get a new basis by using our basis func Xn = self.basismap(X) #initial weights self.w = uniform(-0.1, 0.1, (Xn.shape[1],1)) #now optimize in this new space, using gradient descent print 'initial loss:', self.loss(X,y) for i in range(itrs): grad = self.grad(Xn, y, regularization) self.w = self.w - learning_rate*grad print 'final loss:', self.loss(X,y) def grad(self, X, y, reg): """ Returns the gradient of the loss function with respect to the weights. Used in gradient descent training. """ return -mean((y - dot(X, self.w)) * X, axis=0).reshape(self.w.shape) + reg*self.w def fit_normal_eqns(self, X, y, reg=1e-5): """ Solves for the weights using the normal equation. """ Xn = self.basismap(X) #self.w = dot(pinv(Xn), y) self.w = dot(dot(inv(eye(Xn.shape[1])*reg + dot(Xn.T, Xn)), Xn.T) , y) def predict(self, X): """ Makes predictions on a matrix of (observations x features) """ Xn = self.basismap(X) return dot(Xn, self.w) def loss(self, X, y): #assumes that X is the data matrix (not the design matrix) yh = self.predict(X) return mean((yh-y)**2) def fourier_basis(x): #use sine waves with different amplitudes sins = hstack(tuple(sin(pi*n*x)) for n in arange(0,1,0.1)) coss = hstack(tuple(cos(pi*n*x)) for n in arange(0,1,0.1)) return hstack((sins, coss)) def linear_basis(x): #includes a bias! return hstack((1,x)) def polynomial_basis(x): #degree 10 return hstack(tuple(x**i for i in range(10))) def sigmoid_basis(x): #offset sigmoids return hstack(tuple((1+exp(-x-mu))**-1) for mu in arange(-9,9,0.5)) def heavyside_basis(x): #offset heavysides return hstack(tuple(0.5*(sign(x-mu)+1)) for mu in arange(-9,9,0.5)) #generate some data X = arange(-8, 8, 0.1).reshape((-1,1)) y = sin(X) + randn(X.shape[0],X.shape[1])*0.3 scatter(X, y) model = LinearModel() model.fit_normal_eqns(X, y, reg=0.1) Xn = arange(-8, 8, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Linear basis, no bias') show() model = LinearModel(basisfunc=linear_basis) model.fit_normal_eqns(X, y, 0.1) Xn = arange(-8, 8, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Linear Basis, with bias') show() model = LinearModel(basisfunc=fourier_basis) model.fit_normal_eqns(X, y, 0.1) Xn = arange(-8, 8, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Fourier basis') show() model = LinearModel(basisfunc=polynomial_basis) model.fit_normal_eqns(X, y, 0.1) Xn = arange(-8, 8, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Polynomial basis') show() model = LinearModel(basisfunc=sigmoid_basis) model.fit_normal_eqns(X, y, 0.1) Xn = arange(-8, 8, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Sigmoidal basis') show() model = LinearModel(basisfunc=heavyside_basis) model.fit_normal_eqns(X, y, 0.1) Xn = arange(-8, 8, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Heavyside basis') show() X = arange(-10, 10, 0.5) y = 0.8*abs(X)**0.3 + 2 + randn(X.shape[0])*0.1 scatter(X,y) X = X.reshape((-1,1)) model = LinearModel() model.fit_normal_eqns(X, y, reg=0.1) Xn = arange(-10, 10, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Linear basis, no bias') show() model = LinearModel(basisfunc=linear_basis) model.fit_normal_eqns(X, y,) Xn = arange(-10, 10, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Linear basis, with bias') show() model = LinearModel(basisfunc=fourier_basis) model.fit_normal_eqns(X, y) Xn = arange(-10, 10, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Fourier basis') show() model = LinearModel(basisfunc=polynomial_basis) model.fit_normal_eqns(X, y) Xn = arange(-10, 10, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Polynomial basis') show() model = LinearModel(basisfunc=sigmoid_basis) model.fit_normal_eqns(X, y, 0.1) Xn = arange(-10, 10, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Sigmoid basis') show() model = LinearModel(basisfunc=heavyside_basis) model.fit_normal_eqns(X, y, 0.2) Xn = arange(-10, 10, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) ylim([2,4]) title('Heavyside step function basis') show() model = LinearModel(basisfunc=fourier_basis) model.fit_gd(X, y, regularization=1e-5, itrs=100, learning_rate=0.1) Xn = arange(-10, 10, 0.05).reshape((-1,1)) yh = model.predict(Xn) scatter(X, y) plot(Xn, yh) title('Fourier basis, fit with gradient descent') show() from sklearn.datasets import make_classification X,y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=1) scatter(X[:,0], X[:,1], c=['r' if yi == 1 else 'b' for yi in y]) y = y.reshape((-1,1)) def heavyside_basis(x): return hstack(tuple(0.5*(sign(x-mu)+1)) for mu in arange(-5,5,0.3)) def linear_basis(x): return hstack((1,x)).ravel() def sigmoid_basis(x): return hstack(tuple((1+exp(-x-mu))**-1) for mu in arange(-5,5,0.3)) def polynomial_basis(x): return hstack(tuple(x**i for i in range(5))) def rbf_basis(x): return hstack(tuple(exp(-(x-mu)**2) for mu in arange(-5,5,0.3))) #some nice plotting code def plot_contour_scatter(model, title_text): #sample from a lattice (for the nice visualization) x1, x2 = meshgrid(arange(-5,5,0.1), arange(-5,5,0.1)) Xnew = vstack((x1.ravel(), x2.ravel())).T Z = model.predict(Xnew).reshape((-1,1)) #plot - contour plot and scatter superimposed contourf(arange(-5,5,0.1), arange(-5,5,0.1), Z[:,0].reshape(x1.shape), cmap ='cool',levels=[-1e10,0.499,0.5, 1e10]) colorsToUse= ['r' if yi == 1 else 'b' for yi in y] scatter(X[:,0],X[:,1], c=colorsToUse) title(title_text) show() model = LinearModel(basisfunc=linear_basis) model.fit_normal_eqns(X, y, 0.1) #model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'Linear basis') model = LinearModel(basisfunc=heavyside_basis) #model.fit_normal_eqns(X, y, 0.0) model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'Heavyside Basis') model = LinearModel(basisfunc=polynomial_basis) model.fit_normal_eqns(X, y, 0.1) #model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'Polynomial Basis (degree 3)') model = LinearModel(basisfunc=sigmoid_basis) model.fit_normal_eqns(X, y, 0.1) #model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'Sigmoid Basis') model = LinearModel(basisfunc=rbf_basis) #model.fit_normal_eqns(X, y, .5) model.fit_gd(X, y, itrs=1000, regularization=0.01, learning_rate=.1) plot_contour_scatter(model, 'RBF Basis') A = uniform(-0.5, 0.5, (10,2)) def heavyside_basis(x): xn = dot(A,x) return hstack(tuple(0.5*(sign(xn-mu)+1)) for mu in arange(-5,5,0.5)) def linear_basis(x): xn = dot(A,x) return hstack((1,xn)).ravel() def sigmoid_basis(x): xn = dot(A,x) return hstack(tuple((1+exp(-xn-mu))**-1) for mu in arange(-5,5,0.3)) def polynomial_basis(x): xn = dot(A,x) return hstack(tuple(xn**i for i in range(5))) def rbf_basis(x): xn = dot(A,x) return hstack(tuple(exp(-(xn-mu)**2) for mu in arange(-5,5,0.3))) model = LinearModel(basisfunc=linear_basis) model.fit_normal_eqns(X, y, 0.1) #model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'Linear basis') model = LinearModel(basisfunc=heavyside_basis) #model.fit_normal_eqns(X, y, 0.2) model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'Heavyside basis') model = LinearModel(basisfunc=sigmoid_basis) #model.fit_normal_eqns(X, y, 0.1) model.fit_gd(X, y, itrs=100, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'Sigmoid basis') model = LinearModel(basisfunc=polynomial_basis) #model.fit_normal_eqns(X, y, 0.1) model.fit_gd(X, y, itrs=1000, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'Polynomial basis') model = LinearModel(basisfunc=rbf_basis) #model.fit_normal_eqns(X, y, 0.1) model.fit_gd(X, y, itrs=200, regularization=0.1, learning_rate=0.01) plot_contour_scatter(model, 'RBF basis') from sklearn.datasets import make_classification X,y = make_classification(n_samples=20, n_features=2, n_informative=1, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=1) scatter(X[:,0], X[:,1], c=['r' if yi == 1 else 'b' for yi in y]) for i in range(5): y[i] = 1-y[i] scatter(X[:,0], X[:,1], c=['r' if yi == 1 else 'b' for yi in y]) y = y.reshape((-1,1)) model = LinearModel(basisfunc=sigmoid_basis) model.fit_normal_eqns(X, y, 0.0) plot_contour_scatter(model, 'Sigmoid basis, no regularization') print 'size of weight vector:', norm(model.w) model = LinearModel(basisfunc=sigmoid_basis) model.fit_normal_eqns(X, y, 0.9) plot_contour_scatter(model, 'Sigmoid basis, with regularization') print 'size of weight vector', norm(model.w) X = arange(-5, 5, 0.5).reshape((-1,1)) y = sin(X) #now add some corruption y[2] += 1 y[-3] -= 1 y[5] += 2 y[7] -= 0.2 y[15] += 1 y[10] -= 1 #plot it scatter(X,y) def sigmoid_basis(x): return hstack(tuple((1+exp(-x-mu))**-1) for mu in arange(-7,7,0.5)) model = LinearModel(basisfunc=sigmoid_basis) model.fit_normal_eqns(X, y, 0.0) yh = model.predict(arange(-7,7,0.1).reshape((-1,1))) plot(arange(-7,7,0.1), yh) scatter(X,y) ylim([-4,2]) print 'size of weight vector', norm(model.w) model = LinearModel(basisfunc=sigmoid_basis) model.fit_normal_eqns(X, y, 0.00001) #model.fit_gd(X, y, itrs=100, regularization=0.001, learning_rate=0.1) yh = model.predict(arange(-7,7,0.1).reshape((-1,1))) plot(arange(-7,7,0.1), yh) scatter(X,y) ylim([-4,2]) print 'size of weight vector', norm(model.w) model = LinearModel(basisfunc=sigmoid_basis) model.fit_normal_eqns(X, y, 0.01) #model.fit_gd(X, y, itrs=100, regularization=0.001, learning_rate=0.1) yh = model.predict(arange(-7,7,0.1).reshape((-1,1))) plot(arange(-7,7,0.1), yh) scatter(X,y) ylim([-4,2]) print 'size of weight vector', norm(model.w)