from sklearn.datasets import load_digits digits = load_digits() %pylab inline # copied from notebook 02_sklearn_data.ipynb fig = plt.figure(figsize=(6, 6)) # figure size in inches fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) # plot the digits: each image is 8x8 pixels for i in range(64): ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) ax.imshow(digits.images[i], cmap=plt.cm.binary) # label the image with the target value ax.text(0, 7, str(digits.target[i])) from sklearn.decomposition import RandomizedPCA pca = RandomizedPCA(n_components=2) proj = pca.fit_transform(digits.data) plt.scatter(proj[:, 0], proj[:, 1], c=digits.target) plt.colorbar() from sklearn.manifold import Isomap iso = Isomap(n_neighbors=5, n_components=2) proj = iso.fit_transform(digits.data) plt.scatter(proj[:, 0], proj[:, 1], c=digits.target) plt.colorbar() from sklearn.naive_bayes import GaussianNB from sklearn import cross_validation # split the data into training and validation sets data_train, data_test, target_train, target_test = cross_validation.train_test_split(digits.data, digits.target) # train the model clf = GaussianNB() clf.fit(data_train, target_train) # predict the labels of the test data predicted = clf.predict(data_test) expected = target_test from sklearn import metrics print metrics.classification_report(expected, predicted) print metrics.confusion_matrix(expected, predicted) cv = cross_validation.KFold(digits.data.shape[0], 5, shuffle=True, random_state=0) clf = GaussianNB() print cross_validation.cross_val_score(clf, digits.data, digits.target, cv=cv)