rcParams['figure.figsize'] = (16, 4)
# A lot of code below borrowed from the scikits learn docs
from sklearn.datasets import load_iris
data = load_iris()
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf.fit(data.data, data.target)
DecisionTreeClassifier(compute_importances=None, criterion='gini', max_depth=None, max_features=None, min_density=None, min_samples_leaf=1, min_samples_split=2, random_state=None, splitter='best')
clf.predict(data.data[0])
array([0])
clf.predict(data.data[10])
array([0])
clf.predict(data.data)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
from IPython.display import Image #needed to render in notebook
import StringIO, pydot #needed to convert dot format to png
dot_data = StringIO.StringIO()
tree.export_graphviz(clf, out_file=dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())
from sklearn.naive_bayes import GaussianNB
clf2 = GaussianNB()
clf2.fit(data.data, data.target)
GaussianNB()
from sklearn import cross_validation
kf = cross_validation.KFold(len(data.data), n_folds=10)
len(kf)
10
pc_folds = []
for train_index, test_index in kf:
clf2 = GaussianNB()
clf2.fit(data.data[train_index], data.target[train_index])
predictions = clf2.predict(data.data[test_index])
pc_right = sum(predictions == data.target[test_index])/float(len(test_index))
pc_folds.append(pc_right)
pc_folds
[1.0, 1.0, 1.0, 0.93333333333333335, 0.93333333333333335, 0.8666666666666667, 1.0, 0.8666666666666667, 0.8666666666666667, 1.0]
mean(pc_folds)
0.94666666666666688
min(pc_folds)
0.8666666666666667
from sklearn import metrics
predictions = clf2.predict(data.data)
predictions
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
metrics.accuracy_score(data.target, predictions)
0.96666666666666667
print metrics.classification_report(data.target, predictions)
precision recall f1-score support 0 1.00 1.00 1.00 50 1 0.94 0.96 0.95 50 2 0.96 0.94 0.95 50 avg / total 0.97 0.97 0.97 150
metrics.confusion_matrix(data.target, predictions)
array([[50, 0, 0], [ 0, 48, 2], [ 0, 3, 47]])
confmat = metrics.confusion_matrix(data.target, predictions)
imshow(confmat,interpolation='nearest', cmap=cm.gray_r)
<matplotlib.image.AxesImage at 0x3d88690>
confmat = metrics.confusion_matrix(data.target, predictions)
imshow(confmat.max() - confmat,interpolation='nearest', cmap=cm.gray)
for rownum, row in enumerate(confmat):
for colnum, val in enumerate(row):
text(colnum, rownum, str(val/sum(data.target == rownum).astype(float)), fontsize=24, color='black' if val < confmat.max()/2 else 'white', ha='center', va='center' )
xticks(arange(len(data.target_names)), data.target_names)
yticks(arange(len(data.target_names)), data.target_names);
By Andrés Cabrera mantaraya36@gmail.com
For course MAT 240E at UCSB
This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/