rcParams['figure.figsize'] = (16, 4) # A lot of code below borrowed from the scikits learn docs from sklearn.datasets import load_iris data = load_iris() from sklearn import tree clf = tree.DecisionTreeClassifier() clf.fit(data.data, data.target) clf.predict(data.data[0]) clf.predict(data.data[10]) clf.predict(data.data) from IPython.display import Image #needed to render in notebook import StringIO, pydot #needed to convert dot format to png dot_data = StringIO.StringIO() tree.export_graphviz(clf, out_file=dot_data) graph = pydot.graph_from_dot_data(dot_data.getvalue()) Image(graph.create_png()) from sklearn.naive_bayes import GaussianNB clf2 = GaussianNB() clf2.fit(data.data, data.target) from sklearn import cross_validation kf = cross_validation.KFold(len(data.data), n_folds=10) len(kf) pc_folds = [] for train_index, test_index in kf: clf2 = GaussianNB() clf2.fit(data.data[train_index], data.target[train_index]) predictions = clf2.predict(data.data[test_index]) pc_right = sum(predictions == data.target[test_index])/float(len(test_index)) pc_folds.append(pc_right) pc_folds mean(pc_folds) min(pc_folds) from sklearn import metrics predictions = clf2.predict(data.data) predictions metrics.accuracy_score(data.target, predictions) print metrics.classification_report(data.target, predictions) metrics.confusion_matrix(data.target, predictions) confmat = metrics.confusion_matrix(data.target, predictions) imshow(confmat,interpolation='nearest', cmap=cm.gray_r) confmat = metrics.confusion_matrix(data.target, predictions) imshow(confmat.max() - confmat,interpolation='nearest', cmap=cm.gray) for rownum, row in enumerate(confmat): for colnum, val in enumerate(row): text(colnum, rownum, str(val/sum(data.target == rownum).astype(float)), fontsize=24, color='black' if val < confmat.max()/2 else 'white', ha='center', va='center' ) xticks(arange(len(data.target_names)), data.target_names) yticks(arange(len(data.target_names)), data.target_names);