#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn


# #Support Vector Machines

# ##Example Dataset 1

# In[2]:


import scipy.io


# In[3]:


data1 = scipy.io.loadmat('ex6data1.mat')


# In[4]:


plt.figure(figsize=(8, 6))
plt.title('Example Dataset 1')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 1], data1['X'][:, 1][data1['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 0], data1['X'][:, 1][data1['y'][:, 0] == 0], c='y', marker='o')
plt.show()


# ###$C=1$

# In[5]:


from sklearn import svm


# In[6]:


clf = svm.SVC(kernel='linear', C=1).fit(data1['X'], data1['y'].ravel())
clf.score(data1['X'], data1['y'].ravel())


# In[7]:


w = clf.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(0, 5)
yy = a * xx - clf.intercept_[0] / w[1]

plt.figure(figsize=(8, 6))
plt.title('SVM Decision Boundary with C = 1 (Example Dataset 1)')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 1], data1['X'][:, 1][data1['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 0], data1['X'][:, 1][data1['y'][:, 0] == 0], c='y', marker='o')
plt.plot(xx, yy, 'k-')
plt.show()


# ###$C=100$

# In[8]:


clf2 = svm.SVC(kernel='linear', C=100).fit(data1['X'], data1['y'].ravel())
clf2.score(data1['X'], data1['y'].ravel())


# In[9]:


w = clf2.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(0, 5)
yy = a * xx - clf2.intercept_[0] / w[1]

plt.figure(figsize=(8, 6))
plt.title('SVM Decision Boundary with C = 100 (Example Dataset 1)')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 1], data1['X'][:, 1][data1['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 0], data1['X'][:, 1][data1['y'][:, 0] == 0], c='y', marker='o')
plt.plot(xx, yy, 'k-')
plt.show()


# ##SVM with Gaussian Kernels

# In[10]:


data2 = scipy.io.loadmat('ex6data2.mat')


# In[11]:


plt.figure(figsize=(8, 6))
plt.title('Example Dataset 2')
plt.scatter(data2['X'][:, 0][data2['y'][:, 0] == 1], data2['X'][:, 1][data2['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data2['X'][:, 0][data2['y'][:, 0] == 0], data2['X'][:, 1][data2['y'][:, 0] == 0], c='y', marker='o')
plt.xlim([0, 1])
plt.ylim([0.4, 1])
plt.show()


# In[12]:


clf3 = svm.SVC(kernel='rbf', C=1, gamma=50).fit(data2['X'], data2['y'].ravel())
clf3.score(data2['X'], data2['y'].ravel())


# In[13]:


plt.figure(figsize=(8, 6))
plt.title('SVM (Gaussian Kernel) Decision Boundary (Example Dataset 2)')
plt.scatter(data2['X'][:, 0][data2['y'][:, 0] == 1], data2['X'][:, 1][data2['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data2['X'][:, 0][data2['y'][:, 0] == 0], data2['X'][:, 1][data2['y'][:, 0] == 0], c='y', marker='o')

xx, yy = np.meshgrid(np.linspace(0, 1, 500), np.linspace(0.4, 1, 500))

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf3.decision_function(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Paired, levels=[0])

plt.xlim([0, 1])
plt.ylim([0.4, 1])
plt.show()


# ##Example Dataset 3

# In[14]:


data3 = scipy.io.loadmat('ex6data3.mat')


# In[15]:


plt.figure(figsize=(8, 6))
plt.title('Example Dataset 3')
plt.scatter(data3['X'][:, 0][data3['y'][:, 0] == 1], data3['X'][:, 1][data3['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data3['X'][:, 0][data3['y'][:, 0] == 0], data3['X'][:, 1][data3['y'][:, 0] == 0], c='y', marker='o')
plt.xlim([-0.6, 0.3])
plt.ylim([-0.6, 0.6])
plt.show()


# In[16]:


from sklearn.grid_search import GridSearchCV


# In[17]:


C_range = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
delta_range = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
gamma_range = 1 / (2 *(delta_range ** 2))
params = dict(gamma=gamma_range, C=C_range)

clf4 = GridSearchCV(svm.SVC(), params, cv=3)
clf4.fit(data3['X'], data3['y'].ravel())

print 'best training score : %s' % clf4.best_score_
print clf4.best_estimator_


# In[18]:


plt.figure(figsize=(8, 6))
plt.title('SVM (Gaussian Kernel) Decision Boundary (Example Dataset 3)')
plt.scatter(data3['X'][:, 0][data3['y'][:, 0] == 1], data3['X'][:, 1][data3['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data3['X'][:, 0][data3['y'][:, 0] == 0], data3['X'][:, 1][data3['y'][:, 0] == 0], c='y', marker='o')

xx, yy = np.meshgrid(np.linspace(-0.6, 0.3, 500), np.linspace(-0.6, 0.6, 500))

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf4.decision_function(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Paired, levels=[0])

plt.xlim([-0.6, 0.3])
plt.ylim([-0.6, 0.6])
plt.show()


# In[ ]: