%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time

sns.set_context("talk")

def score_fun(y_hat, y, clamp=True):
    """
    Scoring function with special penalty for undercall.
    
    y_hat and y are 1-D arrays.

    """
    if clamp:
        y_hat = np.clip(y_hat, 0.0, 600.0)

    err = y_hat - y

    score = (
        np.sum(np.abs(err[err < 0.0])) * 10.0 +
        np.sum(np.abs(err[err >= 0.0]))
        ) / len(err)
    
    return score

from scipy.optimize import minimize

class RidgeRegressSpecialPenalty(object):
    """
    Ridge regression model with special penalty function.

    """
    opts = {'maxiter': 20000}

    def __init__(self, lam=1.0, tol=1e-3, solver='L-BFGS-B'):
        self.lam = lam
        self.tol = tol
        self.solver = solver

    def fit(self, X, y, theta_guess, w=None, bounds=None):
        self.X = X.copy()
        self.y = y.copy()
        if w is not None:
            self.w = w
        else:
            self.w = np.ones_like(y)
        self.m = len(y)

        ret = minimize(
            self.cost_fun, theta_guess, jac=True,
            method=self.solver, tol=self.tol, options=self.opts,
            bounds=bounds
            )
        self.theta = ret.x
        return ret

    def predict(self, X):
        y_hat = X.dot(self.theta)
        return y_hat

    def cost_fun(self, theta):
        # Initialize penalty array
        k = np.ones_like(self.y)

        # Compute residuals and penalize undercalls
        h_t = self.X.dot(theta)
        res = h_t - self.y
        k[res < 0] *= -10.0

        # Weights
        k = k * self.w

        # Compute cost
        J = (
            (1.0 / self.m) * np.sum(k * res) +
            (self.lam / (2.0 * self.m)) * np.sum(theta**2)
            )

        # Compute gradient
        grad = (
            (1.0 / self.m) * np.dot(self.X.T, k) +
            (self.lam / self.m) * theta
            )

        return (J, grad)

with np.load("prelim_files.npz") as data:
    X = data['X']
    Y = data['Y']
    X_val = data['X_val']
    Y_val = data['Y_val']
del data

# Uncomment, augment these for grid search
# rates = [1.0, .9999, .999, .99, .9, .8]
# lambdas = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1,]

rates = [0.999,]
lambdas = [0.001,]
results = []
for rate in rates:
    for lam in lambdas:
        t0 = time.clock()
        
        # Initialize starting guess, set weights, set bounds
        guess = np.ones(X.shape[1])
        weights = np.flipud(rate ** np.arange(len(Y)))
        bounds = [(0.0, 600.0)] * len(guess)
        
        # Initialize model, fit, predict
        ridge = RidgeRegressSpecialPenalty(lam=lam, tol=1e-6)
        ret = ridge.fit(X, Y, guess, weights, bounds)
        Y_pred_ridge = ridge.predict(X_val)
        
        # Score against validation set
        score = score_fun(Y_pred_ridge, Y_val, clamp=True)
        
        # Store results
        results.append((ret, score, rate, lam, ridge))
        print "Status:", ret.status, "Sec:", (time.clock() - t0), "Score:", score, "Rate:", rate, "Lambda:", lam

plt.plot(ridge.theta)
plt.xlabel("Context Number $j$")
_ = plt.ylabel(r"$\theta_j$")

plt.plot(Y_pred_ridge)
plt.xlabel("Validation Wafer")
_ = plt.ylabel("Predicted Die Loss")

fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
plt.scatter(Y_val, Y_pred_ridge, marker='.', color=sns.color_palette()[0])
plt.axis([0, 600, 0, 600])
plt.xlabel("Actual Die Loss")
_ = plt.ylabel("Predicted Die Loss")

Qs = [5.99484250e-04,]
Rs = [2.15443469e+02,]
offsets = [1.0,]

# Uncomment for grid search
# Qs = np.logspace(-5.0, -3.0, 10)
# Rs = np.logspace(1.0, 3, 10)
# offsets = [0.0, 0.5, 1.0, 1.5, 2.0,]

H = X
results = []
for _, _Q in np.ndenumerate(Qs):
    for _, R in np.ndenumerate(Rs):
        for offset in offsets:
            
            # Initialize
            Q = _Q * np.ones(2000)
            theta_hats = np.zeros((9999, 2000))
            theta_hat_minus = np.zeros(2000)
            K = np.zeros(2000)
            theta_hat_0 = np.zeros(2000)
            P = 1.0 * np.ones(2000)
            
            # Run the filter for each time step
            for k in range(len(Y)):
                # time update (predict)
                if k != 0:
                    theta_hat_minus = theta_hats[k-1]
                else:
                    theta_hat_minus = theta_hat_0

                P_minus = P + Q

                # measurement update (correct)
                K = P_minus * H[k] / (P_minus.dot(H[k]) + R)
                res = Y[k] - theta_hat_minus.dot(H[k])
                theta_hats[k] = np.clip(theta_hat_minus + K * res, 0.0, 600.0)
                P = (1.0 - K * H[k]) * P_minus
            
            theta_offset = offset + theta_hats[-1]
            Y_pred_kalman = X_val.dot(theta_offset)
            score = score_fun(Y_pred_kalman, Y_val)
            results.append([score, _Q, R, offset])
            print "Score", score, "Q", _Q, "R", R, "Offset", offset

results = np.array(results)

plt.plot(theta_offset)
plt.xlabel("Context Number $j$")
_ = plt.ylabel(r"$\theta_j$")

plt.plot(Y_pred_kalman)
plt.xlabel("Validation Wafer")
_ = plt.ylabel("Predicted Die Loss")

fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
plt.scatter(Y_val, Y_pred_kalman, marker='.', color=sns.color_palette()[0])
plt.axis([0, 600, 0, 600])
plt.xlabel("Actual Die Loss")
_ = plt.ylabel("Predicted Die Loss")

fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
plt.scatter(Y_pred_ridge, Y_pred_kalman, marker='.', color=sns.color_palette()[0])
plt.axis([0, 600, 0, 600])
plt.xlabel("Ridge Predicted Loss")
_ = plt.ylabel("Kalman Predicted Loss")

fig = plt.figure()
ax = fig.add_subplot(211)
plt.plot(ridge.theta)
plt.ylabel(r"Ridge $\theta_j$")
ax = fig.add_subplot(212)
plt.plot(theta_offset)
plt.xlabel("Context Number $j$")
_ = plt.ylabel(r"Kalman $\theta_j$")

with np.load("final_files.npz") as data:
    X_final = data['X_final']
    Y_final = data['Y_final']
    X_val_final = data['X_val_final']
del data

t0 = time.clock()

# Initialize starting guess, set weights, set bounds
guess = np.zeros(X_final.shape[1])
weights = np.flipud(0.4 ** np.arange(len(Y_final)))
bounds = [(0.0, 600.0)] * len(guess)

# Initialize model, fit, predict
ridge_final = RidgeRegressSpecialPenalty(lam=0.01, tol=1e-6)
ret = ridge_final.fit(X_final, Y_final, guess, weights, bounds)

# Score against subset of training set
Y_pred = ridge_final.predict(X_final[9500:, :])
score = score_fun(Y_pred, Y_final[9500:], clamp=True)

print "Status:", ret.status, "Sec:", (time.clock() - t0), "Score:", score

plt.plot(ridge.theta)
plt.xlabel("Context Number $j$")
_ = plt.ylabel(r"$\theta_j$")

plt.plot(np.arange(9500, 9999), Y_final[9500:])
plt.plot(np.arange(9500, 9999), Y_pred, 'r-')
plt.xlabel("Training Wafer")
plt.ylabel("Die Loss")
_ = plt.legend(("Actual", "Predicted"))

Y_val_final = ridge_final.predict(X_val_final)
Y_val_final[Y_val_final > 600.0] = 600.0
Y_val_final[Y_val_final < 0.0] = 0.0
plt.plot(Y_val_final)
plt.xlabel("Validation Wafer")
_ = plt.ylabel("Predicted Die Loss")

Y_val_final = 180.0 * np.ones(10000)

np.savetxt('Y_val_final.csv', np.int32(Y_val_final),'%0.0i')