%matplotlib inline import matplotlib.pyplot as plt import seaborn as sns import numpy as np import time sns.set_context("talk") def score_fun(y_hat, y, clamp=True): """ Scoring function with special penalty for undercall. y_hat and y are 1-D arrays. """ if clamp: y_hat = np.clip(y_hat, 0.0, 600.0) err = y_hat - y score = ( np.sum(np.abs(err[err < 0.0])) * 10.0 + np.sum(np.abs(err[err >= 0.0])) ) / len(err) return score from scipy.optimize import minimize class RidgeRegressSpecialPenalty(object): """ Ridge regression model with special penalty function. """ opts = {'maxiter': 20000} def __init__(self, lam=1.0, tol=1e-3, solver='L-BFGS-B'): self.lam = lam self.tol = tol self.solver = solver def fit(self, X, y, theta_guess, w=None, bounds=None): self.X = X.copy() self.y = y.copy() if w is not None: self.w = w else: self.w = np.ones_like(y) self.m = len(y) ret = minimize( self.cost_fun, theta_guess, jac=True, method=self.solver, tol=self.tol, options=self.opts, bounds=bounds ) self.theta = ret.x return ret def predict(self, X): y_hat = X.dot(self.theta) return y_hat def cost_fun(self, theta): # Initialize penalty array k = np.ones_like(self.y) # Compute residuals and penalize undercalls h_t = self.X.dot(theta) res = h_t - self.y k[res < 0] *= -10.0 # Weights k = k * self.w # Compute cost J = ( (1.0 / self.m) * np.sum(k * res) + (self.lam / (2.0 * self.m)) * np.sum(theta**2) ) # Compute gradient grad = ( (1.0 / self.m) * np.dot(self.X.T, k) + (self.lam / self.m) * theta ) return (J, grad) with np.load("prelim_files.npz") as data: X = data['X'] Y = data['Y'] X_val = data['X_val'] Y_val = data['Y_val'] del data # Uncomment, augment these for grid search # rates = [1.0, .9999, .999, .99, .9, .8] # lambdas = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1,] rates = [0.999,] lambdas = [0.001,] results = [] for rate in rates: for lam in lambdas: t0 = time.clock() # Initialize starting guess, set weights, set bounds guess = np.ones(X.shape[1]) weights = np.flipud(rate ** np.arange(len(Y))) bounds = [(0.0, 600.0)] * len(guess) # Initialize model, fit, predict ridge = RidgeRegressSpecialPenalty(lam=lam, tol=1e-6) ret = ridge.fit(X, Y, guess, weights, bounds) Y_pred_ridge = ridge.predict(X_val) # Score against validation set score = score_fun(Y_pred_ridge, Y_val, clamp=True) # Store results results.append((ret, score, rate, lam, ridge)) print "Status:", ret.status, "Sec:", (time.clock() - t0), "Score:", score, "Rate:", rate, "Lambda:", lam plt.plot(ridge.theta) plt.xlabel("Context Number $j$") _ = plt.ylabel(r"$\theta_j$") plt.plot(Y_pred_ridge) plt.xlabel("Validation Wafer") _ = plt.ylabel("Predicted Die Loss") fig = plt.figure() ax = fig.add_subplot(111, aspect='equal') plt.scatter(Y_val, Y_pred_ridge, marker='.', color=sns.color_palette()[0]) plt.axis([0, 600, 0, 600]) plt.xlabel("Actual Die Loss") _ = plt.ylabel("Predicted Die Loss") Qs = [5.99484250e-04,] Rs = [2.15443469e+02,] offsets = [1.0,] # Uncomment for grid search # Qs = np.logspace(-5.0, -3.0, 10) # Rs = np.logspace(1.0, 3, 10) # offsets = [0.0, 0.5, 1.0, 1.5, 2.0,] H = X results = [] for _, _Q in np.ndenumerate(Qs): for _, R in np.ndenumerate(Rs): for offset in offsets: # Initialize Q = _Q * np.ones(2000) theta_hats = np.zeros((9999, 2000)) theta_hat_minus = np.zeros(2000) K = np.zeros(2000) theta_hat_0 = np.zeros(2000) P = 1.0 * np.ones(2000) # Run the filter for each time step for k in range(len(Y)): # time update (predict) if k != 0: theta_hat_minus = theta_hats[k-1] else: theta_hat_minus = theta_hat_0 P_minus = P + Q # measurement update (correct) K = P_minus * H[k] / (P_minus.dot(H[k]) + R) res = Y[k] - theta_hat_minus.dot(H[k]) theta_hats[k] = np.clip(theta_hat_minus + K * res, 0.0, 600.0) P = (1.0 - K * H[k]) * P_minus theta_offset = offset + theta_hats[-1] Y_pred_kalman = X_val.dot(theta_offset) score = score_fun(Y_pred_kalman, Y_val) results.append([score, _Q, R, offset]) print "Score", score, "Q", _Q, "R", R, "Offset", offset results = np.array(results) plt.plot(theta_offset) plt.xlabel("Context Number $j$") _ = plt.ylabel(r"$\theta_j$") plt.plot(Y_pred_kalman) plt.xlabel("Validation Wafer") _ = plt.ylabel("Predicted Die Loss") fig = plt.figure() ax = fig.add_subplot(111, aspect='equal') plt.scatter(Y_val, Y_pred_kalman, marker='.', color=sns.color_palette()[0]) plt.axis([0, 600, 0, 600]) plt.xlabel("Actual Die Loss") _ = plt.ylabel("Predicted Die Loss") fig = plt.figure() ax = fig.add_subplot(111, aspect='equal') plt.scatter(Y_pred_ridge, Y_pred_kalman, marker='.', color=sns.color_palette()[0]) plt.axis([0, 600, 0, 600]) plt.xlabel("Ridge Predicted Loss") _ = plt.ylabel("Kalman Predicted Loss") fig = plt.figure() ax = fig.add_subplot(211) plt.plot(ridge.theta) plt.ylabel(r"Ridge $\theta_j$") ax = fig.add_subplot(212) plt.plot(theta_offset) plt.xlabel("Context Number $j$") _ = plt.ylabel(r"Kalman $\theta_j$") with np.load("final_files.npz") as data: X_final = data['X_final'] Y_final = data['Y_final'] X_val_final = data['X_val_final'] del data t0 = time.clock() # Initialize starting guess, set weights, set bounds guess = np.zeros(X_final.shape[1]) weights = np.flipud(0.4 ** np.arange(len(Y_final))) bounds = [(0.0, 600.0)] * len(guess) # Initialize model, fit, predict ridge_final = RidgeRegressSpecialPenalty(lam=0.01, tol=1e-6) ret = ridge_final.fit(X_final, Y_final, guess, weights, bounds) # Score against subset of training set Y_pred = ridge_final.predict(X_final[9500:, :]) score = score_fun(Y_pred, Y_final[9500:], clamp=True) print "Status:", ret.status, "Sec:", (time.clock() - t0), "Score:", score plt.plot(ridge.theta) plt.xlabel("Context Number $j$") _ = plt.ylabel(r"$\theta_j$") plt.plot(np.arange(9500, 9999), Y_final[9500:]) plt.plot(np.arange(9500, 9999), Y_pred, 'r-') plt.xlabel("Training Wafer") plt.ylabel("Die Loss") _ = plt.legend(("Actual", "Predicted")) Y_val_final = ridge_final.predict(X_val_final) Y_val_final[Y_val_final > 600.0] = 600.0 Y_val_final[Y_val_final < 0.0] = 0.0 plt.plot(Y_val_final) plt.xlabel("Validation Wafer") _ = plt.ylabel("Predicted Die Loss") Y_val_final = 180.0 * np.ones(10000) np.savetxt('Y_val_final.csv', np.int32(Y_val_final),'%0.0i')