%pylab inline rcParams['figure.figsize'] = (10, 4) #wide graphs by default from __future__ import print_function from __future__ import division from sklearn.datasets import load_iris # most examples here are based on examples from the sklearn docs data = load_iris() data type(data) print(data['DESCR']) data['data'].shape data.data.shape data.feature_names data.target data.target.shape data.target_names virginicas = argwhere(data.target == list(data.target_names).index('virginica'))[:,0] print(virginicas) feature = 1 data.feature_names[feature] data.data[virginicas].shape data.data[virginicas][:,1].mean() data.data[virginicas][:,1].var() plot(data.data[virginicas][:,1]) setosas = argwhere(data.target == list(data.target_names).index('setosa'))[:,0] plot(data.data[virginicas][:,1]) plot(data.data[setosas][:,1]) versicolors = argwhere(data.target == list(data.target_names).index('versicolor'))[:,0] plot(data.data[virginicas][:,1]) plot(data.data[setosas][:,1]) plot(data.data[versicolors][:,1]) title("Feature: " + data.feature_names[1]) legend(['virginica', 'setosa', 'versicolor']) feature = 0 plot(data.data[virginicas][:,feature]) plot(data.data[setosas][:,feature]) plot(data.data[versicolors][:,feature]) title("Feature: " + data.feature_names[feature]) legend(['virginica', 'setosa', 'versicolor']) from matplotlib.colors import ListedColormap cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) scatter(data.data[:, 0], data.data[:, 1], c=data.target, cmap=cmap_bold) xlabel(data.feature_names[0]) ylabel(data.feature_names[1]) from sklearn.dummy import DummyClassifier clf = DummyClassifier(strategy='uniform') # Training set (all the samples, just the first two features X = data.data[:, :2] y = data.target clf.fit(X, y) clf.predict((7.2, 2.5)) clf.predict((7.2, 2.5)) from sklearn import neighbors X = data.data[:, :2] y = data.target n_neighbors = 15 clf = neighbors.KNeighborsClassifier(n_neighbors, ) clf.fit(X, y) clf.predict((7.2, 2.5)) clf.predict((5.0, 3.5)) scatter(data.data[:, 0], data.data[:, 1], c=data.target, cmap=cmap_bold) xlabel(data.feature_names[0]) ylabel(data.feature_names[1]) scatter(*zip((7.2, 2.5),(5.0, 3.5)), c='purple', marker='x', lw=8) h = .02 # step size in the mesh x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) xx Z = clf.predict(c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) pcolormesh(xx, yy, Z, cmap=cmap_bold) n_neighbors = [5, 10, 15, 50] X = data.data[:, :2] # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset y = data.target h = .02 # step size in the mesh sp = 1 for n in n_neighbors: # we create an instance of Neighbours Classifier and fit the data. clf = neighbors.KNeighborsClassifier(n, weights='distance') clf.fit(X, y) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) subplot(1,4,sp); sp += 1 pcolormesh(xx, yy, Z, cmap=cmap_bold) xlim(xx.min(), xx.max()) ylim(yy.min(), yy.max()) title("k = %i"% (n)) n_neighbors = 5 X = data.data[:, :2] # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset y = data.target h = .02 # step size in the mesh # Create color maps for weights in ['uniform', 'distance']: # we create an instance of Neighbours Classifier and fit the data. clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights) clf.fit(X, y) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) figure() pcolormesh(xx, yy, Z, cmap=cmap_bold) # Plot also the training points scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) xlim(xx.min(), xx.max()) ylim(yy.min(), yy.max()) title("3-Class classification (k = %i, weights = '%s')" % (n_neighbors, weights)) n_neighbors = 15 X = data.data[:, :] # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset y = data.target clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform') clf.fit(X, y) clf.predict((7.2, 2.5, 3.0, 3.0)) clf.predict((7.2, 2.5, 5.0, 2.4)) data.data[120, :] X = data.data[:, :2] from sklearn.naive_bayes import GaussianNB clf = GaussianNB() clf.fit(X, y) Z = clf.predict(c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) pcolormesh(xx, yy, Z, cmap=cmap_bold) scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) xlim(xx.min(), xx.max()) ylim(yy.min(), yy.max()) from sklearn.mixture import GMM clf = GMM(n_components=3) clf.fit(X) Z = clf.predict(c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) pcolormesh(xx, yy, Z, cmap=cmap_bold) scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) xlim(xx.min(), xx.max()) ylim(yy.min(), yy.max()) from sklearn.svm import SVC clf = SVC(kernel='linear') clf.fit(X,y) Z = clf.predict(c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) pcolormesh(xx, yy, Z, cmap=cmap_bold) scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) xlim(xx.min(), xx.max()) ylim(yy.min(), yy.max()) clf = SVC() clf.fit(data.data, data.target) clf.predict(data.data[0:50]) clf.predict(data.data[50:100]) clf.predict(data.data[100:150])