from sklearn.datasets import make_circles X, y = make_circles(noise=.1, factor=.5) print "X.shape:", X.shape print "unique labels: ", np.unique(y) plt.prism() # this sets a nice color map plt.scatter(X[:, 0], X[:, 1], c=y) X_train = X[:50] y_train = y[:50] X_test = X[50:] y_test = y[50:] from sklearn.linear_model import LogisticRegression logreg = LogisticRegression() logreg.fit(X_train, y_train) plt.prism() from utility import plot_decision_boundary y_pred_test = logreg.predict(X_test) plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred_test, marker='^') plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train) plot_decision_boundary(logreg, X) plt.xlim(-1.5, 1.5) plt.ylim(-1.5, 1.5) print "Accuracy of logistic regression on test set:", logreg.score(X_test, y_test) from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=5) # we specify that this knn should always use 5 neighbors knn.fit(X_train, y_train) y_pred_test = knn.predict(X_test) plt.prism() # gives us a nice color map plt.xlim(-1.5, 1.5) plt.ylim(-1.5, 1.5) plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred_test, marker='^') plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train) print "Accuracy of KNN test set:", knn.score(X_test, y_test) from sklearn.datasets import fetch_mldata from sklearn.utils import shuffle mnist = fetch_mldata("MNIST original") X_digits, y_digits = mnist.data, mnist.target X_digits, y_digits = shuffle(X_digits, y_digits) X_digits_train = X_digits[:1000] y_digits_train = y_digits[:1000] X_digits_valid = X_digits[1000:2000] y_digits_valid = y_digits[1000:2000] X_digits_test = X_digits[2000:3000] y_digits_test = y_digits[2000:3000] knn_digits = KNeighborsClassifier(n_neighbors=20) knn_digits.fit(X_digits_train, y_digits_train) print "KNN validation accuracy on MNIST digits: ", knn_digits.score(X_digits_valid, y_digits_valid) print "KNN test accuracy on MNIST digits: ", knn_digits.score(X_digits_test, y_digits_test) knn_digits = KNeighborsClassifier(n_neighbors=3) knn_digits.fit(X_digits_train, y_digits_train) y_digits_valid_pred = knn_digits.predict(X_digits_valid) neighbors = knn_digits._tree.query(X_digits_valid, k=3)[1] plt.rc("image", cmap="binary") # this sets a black on white colormap # plot X_digits_valid[0] plt.subplot(1, 4, 1) plt.imshow(X_digits_valid[0].reshape(28, 28)) plt.title("Query") # plot three nearest neighbors from the training set for i in [0, 1, 2]: plt.subplot(1, 4, 2 + i) plt.title("%dth neighbor" % i) plt.imshow(X_digits_train[neighbors[0, i]].reshape(28, 28)) wrong = np.where(y_digits_valid_pred != y_digits_valid)[0] # the != part gives a mask, the "where" gives us the indices print "Wrong prediction on the following images: ", wrong index = wrong[0] plt.rc("image", cmap="binary") # this sets a black on white colormap # plot X_digits_valid[index] plt.subplot(1, 4, 1) plt.imshow(X_digits_valid[index].reshape(28, 28)) plt.title("Query") # plot three nearest neighbors from the training set for i in [0, 1, 2]: plt.subplot(1, 4, 2 + i) plt.title("%dth neighbor" % i) plt.imshow(X_digits_train[neighbors[index, i]].reshape(28, 28))