import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.cross_validation import KFold
%matplotlib inline

X = np.random.randn(20, 2)
y = np.concatenate((np.ones(10)*-1, np.ones(10)))
X[y == 1] = X[y == 1] + 1
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], marker='o', color='red')
plt.scatter(X[y == -1][:, 0], X[y == -1][:, 1], marker='o', color='blue')
plt.xlabel("X[:,0]")
plt.ylabel("X[:,1]")

# Fit the Support Vector Regressor with a linear kernel
reg = SVR(C=10.0, kernel="linear")
reg.fit(X, y)

# plot the hyperplane on the scatterplot
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], marker='o', color='red')
plt.scatter(X[y == -1][:, 0], X[y == -1][:, 1], marker='o', color='blue')
plt.xlabel("X[:,0]")
plt.ylabel("X[:,1]")
# hyperplane
xs = range(int(np.min(X[:,0]))-1, int(np.max(X[:,0]))+1)
m = -reg.coef_[0][0] / reg.coef_[0][1]
c = reg.intercept_ / reg.coef_[0][1]
ys = [m * x + c for x in xs]
plt.plot(xs, ys, color='black', linewidth=2.5)
# support margins
sup = reg.support_vectors_[0]
ys_down = [m * x + (sup[1] - m * sup[0]) for x in xs]
sup = reg.support_vectors_[-1]
ys_up = [m * x + (sup[1] - m * sup[0]) for x in xs]
plt.plot(xs, ys_down, 'k--')
plt.plot(xs, ys_up, 'k--')

ypred = reg.predict(X)
mean_squared_error(y, ypred)

# We use cross-validation to find a good value for C
cs = range(1, 20)
mses = []
for c in cs:
    kfold = KFold(X.shape[0], n_folds=10)
    kmses = []
    for train, test in kfold:
        Xtrain, Xtest, ytrain, ytest = X[train], X[test], y[train], y[test]
        reg = SVR(C=c, kernel="linear")
        reg.fit(Xtrain, ytrain)
        ypred = reg.predict(Xtest)
        kmses.append(mean_squared_error(ytest, ypred))
    mses.append(np.mean(kmses))
plt.plot(cs, mses, linewidth=2.5)
plt.xlabel("C")
plt.ylabel("MSE")

# Data comes from http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/ESL.mixture.rda
# load(url("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/ESL.mixture.rda"))
# dat = data.frame(y=factor(y), x)
# write.csv(dat, "ESLMixture.csv", row.names=FALSE)
eslmix_df = pd.read_csv("../data/ESLMixture.csv")
eslmix_df.head()

# Plot the data to visualize the relationship
x1 = eslmix_df[eslmix_df["y"] == 0]["X1"]
x2 = eslmix_df[eslmix_df["y"] == 0]["X2"]
plt.scatter(x1, x2, marker='o', color='red')
x1 = eslmix_df[eslmix_df["y"] == 1]["X1"]
x2 = eslmix_df[eslmix_df["y"] == 1]["X2"]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("X1")
plt.ylabel("X1")

# Fit a non-linear SVM using a radial kernel
X = eslmix_df[["X1", "X2"]].values
y = eslmix_df["y"]
reg = SVR(C=5.0, kernel="rbf")
reg.fit(X, y)

# plot the hyperplane on the scatterplot
xx, yy = np.meshgrid(np.linspace(-3, 5, 50), np.linspace(-2, 3, 50))
Z = reg.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, levels=[0], linewidths=2, linetypes="--")

x1 = eslmix_df[eslmix_df["y"] == 0]["X1"]
x2 = eslmix_df[eslmix_df["y"] == 0]["X2"]
plt.scatter(x1, x2, marker='o', color='red')
x1 = eslmix_df[eslmix_df["y"] == 1]["X1"]
x2 = eslmix_df[eslmix_df["y"] == 1]["X2"]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("X1")
plt.ylabel("X1")