%pylab inline
rcParams['figure.figsize'] = (10, 4) #wide graphs by default
from __future__ import print_function
from __future__ import division

from sklearn.datasets import load_iris

# most examples here are based on examples from the sklearn docs

data = load_iris()

data

type(data)

print(data['DESCR'])

data['data'].shape

data.data.shape

data.feature_names

data.target

data.target.shape

data.target_names

virginicas = argwhere(data.target == list(data.target_names).index('virginica'))[:,0]
print(virginicas)

feature = 1
data.feature_names[feature]

data.data[virginicas].shape

data.data[virginicas][:,1].mean()

data.data[virginicas][:,1].var()

plot(data.data[virginicas][:,1])

setosas = argwhere(data.target == list(data.target_names).index('setosa'))[:,0]

plot(data.data[virginicas][:,1])
plot(data.data[setosas][:,1])

versicolors = argwhere(data.target == list(data.target_names).index('versicolor'))[:,0]

plot(data.data[virginicas][:,1])
plot(data.data[setosas][:,1])
plot(data.data[versicolors][:,1])

title("Feature: " + data.feature_names[1])
legend(['virginica', 'setosa', 'versicolor'])

feature = 0

plot(data.data[virginicas][:,feature])
plot(data.data[setosas][:,feature])
plot(data.data[versicolors][:,feature])

title("Feature: " + data.feature_names[feature])
legend(['virginica', 'setosa', 'versicolor'])

from matplotlib.colors import ListedColormap
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

scatter(data.data[:, 0], data.data[:, 1], c=data.target, cmap=cmap_bold)
xlabel(data.feature_names[0])
ylabel(data.feature_names[1])

from sklearn.dummy import DummyClassifier

clf = DummyClassifier(strategy='uniform')

# Training set (all the samples, just the first two features
X = data.data[:, :2]
y = data.target

clf.fit(X, y)

clf.predict((7.2, 2.5))

clf.predict((7.2, 2.5))

from sklearn import neighbors

X = data.data[:, :2]
y = data.target

n_neighbors = 15
clf = neighbors.KNeighborsClassifier(n_neighbors, )
clf.fit(X, y)

clf.predict((7.2, 2.5))

clf.predict((5.0, 3.5))

scatter(data.data[:, 0], data.data[:, 1], c=data.target, cmap=cmap_bold)
xlabel(data.feature_names[0])
ylabel(data.feature_names[1])
scatter(*zip((7.2, 2.5),(5.0, 3.5)), c='purple', marker='x', lw=8)

h = .02  # step size in the mesh

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

xx

Z = clf.predict(c_[xx.ravel(), yy.ravel()])

Z = Z.reshape(xx.shape)
pcolormesh(xx, yy, Z, cmap=cmap_bold)

n_neighbors = [5, 10, 15, 50]

X = data.data[:, :2]  # we only take the first two features. We could
                      # avoid this ugly slicing by using a two-dim dataset
y = data.target
h = .02  # step size in the mesh
sp = 1
for n in n_neighbors:
    # we create an instance of Neighbours Classifier and fit the data.
    clf = neighbors.KNeighborsClassifier(n, weights='distance')
    clf.fit(X, y)

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, m_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    
    subplot(1,4,sp); sp += 1 
    pcolormesh(xx, yy, Z, cmap=cmap_bold)

    xlim(xx.min(), xx.max())
    ylim(yy.min(), yy.max())
    title("k = %i"% (n))

n_neighbors = 5

X = data.data[:, :2]  # we only take the first two features. We could
                      # avoid this ugly slicing by using a two-dim dataset
y = data.target
h = .02  # step size in the mesh

# Create color maps

for weights in ['uniform', 'distance']:
    # we create an instance of Neighbours Classifier and fit the data.
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
    clf.fit(X, y)

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, m_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    figure()
    pcolormesh(xx, yy, Z, cmap=cmap_bold)

    # Plot also the training points
    scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
    xlim(xx.min(), xx.max())
    ylim(yy.min(), yy.max())
    title("3-Class classification (k = %i, weights = '%s')"
             % (n_neighbors, weights))

n_neighbors = 15

X = data.data[:, :]  # we only take the first two features. We could
                      # avoid this ugly slicing by using a two-dim dataset
y = data.target
clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
clf.fit(X, y)

clf.predict((7.2, 2.5, 3.0, 3.0))

clf.predict((7.2, 2.5, 5.0, 2.4))

data.data[120, :]

X = data.data[:, :2]

from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()

clf.fit(X, y)

Z = clf.predict(c_[xx.ravel(), yy.ravel()])

Z = Z.reshape(xx.shape)
pcolormesh(xx, yy, Z, cmap=cmap_bold)
scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
xlim(xx.min(), xx.max())
ylim(yy.min(), yy.max())

from sklearn.mixture import GMM

clf = GMM(n_components=3)

clf.fit(X)

Z = clf.predict(c_[xx.ravel(), yy.ravel()])

Z = Z.reshape(xx.shape)
pcolormesh(xx, yy, Z, cmap=cmap_bold)
scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
xlim(xx.min(), xx.max())
ylim(yy.min(), yy.max())

from sklearn.svm import SVC

clf = SVC(kernel='linear')

clf.fit(X,y)


Z = clf.predict(c_[xx.ravel(), yy.ravel()])

Z = Z.reshape(xx.shape)
pcolormesh(xx, yy, Z, cmap=cmap_bold)
scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
xlim(xx.min(), xx.max())
ylim(yy.min(), yy.max())

clf = SVC()

clf.fit(data.data, data.target)

clf.predict(data.data[0:50])

clf.predict(data.data[50:100])

clf.predict(data.data[100:150])