%pylab inline
rcParams['figure.figsize'] = (10, 4) #wide graphs by default
from __future__ import print_function
from __future__ import division
# A lot of code below borrowed from the scikits learn docs
Populating the interactive namespace from numpy and matplotlib
from sklearn.datasets import load_iris
data = load_iris()
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf.fit(data.data, data.target)
DecisionTreeClassifier(compute_importances=None, criterion='gini', max_depth=None, max_features=None, max_leaf_nodes=None, min_density=None, min_samples_leaf=1, min_samples_split=2, random_state=None, splitter='best')
clf.predict(data.data[0])
array([0])
clf.predict(data.data[10])
array([0])
clf.predict(data.data)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
from IPython.display import Image #needed to render in notebook
import StringIO, pydot #needed to convert dot format to png
dot_data = StringIO.StringIO()
tree.export_graphviz(clf, out_file=dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())
from sklearn.naive_bayes import GaussianNB
clf2 = GaussianNB()
clf2.fit(data.data, data.target)
GaussianNB()
from sklearn import cross_validation
kf = cross_validation.KFold(len(data.data), n_folds=10)
len(kf)
10
pc_folds = []
for train_index, test_index in kf:
clf2 = GaussianNB()
clf2.fit(data.data[train_index], data.target[train_index])
predictions = clf2.predict(data.data[test_index])
pc_right = sum(predictions == data.target[test_index])/float(len(test_index))
pc_folds.append(pc_right)
pc_folds
[1.0, 1.0, 1.0, 0.93333333333333335, 0.93333333333333335, 0.8666666666666667, 1.0, 0.8666666666666667, 0.8666666666666667, 1.0]
mean(pc_folds)
0.94666666666666688
min(pc_folds)
0.8666666666666667
from sklearn import metrics
predictions = clf2.predict(data.data)
predictions
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
metrics.accuracy_score(data.target, predictions)
0.96666666666666667
print(metrics.classification_report(data.target, predictions))
precision recall f1-score support 0 1.00 1.00 1.00 50 1 0.94 0.96 0.95 50 2 0.96 0.94 0.95 50 avg / total 0.97 0.97 0.97 150
metrics.confusion_matrix(data.target, predictions)
array([[50, 0, 0], [ 0, 48, 2], [ 0, 3, 47]])
confmat = metrics.confusion_matrix(data.target, predictions)
imshow(confmat,interpolation='nearest', cmap=cm.gray_r)
<matplotlib.image.AxesImage at 0x7ff03c61fa50>
confmat = metrics.confusion_matrix(data.target, predictions)
imshow(confmat.max() - confmat,interpolation='nearest', cmap=cm.gray)
for rownum, row in enumerate(confmat):
for colnum, val in enumerate(row):
text(colnum, rownum, str(val/sum(data.target == rownum).astype(float)), fontsize=24, color='black' if val < confmat.max()/2 else 'white', ha='center', va='center' )
xticks(arange(len(data.target_names)), data.target_names)
yticks(arange(len(data.target_names)), data.target_names);
Cov = array([[2.9, -2.2], [-2.2, 6.5]])
X = random.multivariate_normal([1,2], Cov, size=200)
figure(figsize=(4,4))
scatter(X[:,0], X[:,1])
axis('equal') # equal scaling on both axis;
print(cov(X,rowvar=False))
[[ 2.33164768 -1.78245345] [-1.78245345 5.34877268]]
from sklearn.decomposition import PCA
pca = PCA()
X_pca = pca.fit_transform(X)
pca.components_, pca.mean_
(array([[-0.42069867, 0.90720043], [-0.90720043, -0.42069867]]), array([ 1.27415321, 1.85932007]))
figure(figsize=(4,4))
scatter(X_pca[:,0], X_pca[:,1])
# axis('equal');
axis('equal') # equal scaling on both axis;
var(X_pca[:,0]), var(X_pca[:,1])
(6.1444781813112739, 1.4975400813297965)
notes = ["A", "B", "C", "D"]
markov_model = [ [ 0.5, 0.3, 0.2, 0],
[0.2, 0.1, 0.6, 0.1],
[0.1, 0, 0.2, 0.7],
[0, 0, 0.9, 0.1] ]
state = random.randint(0, 4)
print(state)
2
throw = random.random()
print(throw)
0.919680708341
probabilities = markov_model[state]
cumsum(probabilities)
array([ 0.1, 0.1, 0.3, 1. ])
cumsum(probabilities) > throw
array([False, False, False, True], dtype=bool)
argmax(cumsum(probabilities) > throw)
3
state = argmax(cumsum(probabilities) > throw)
print(state)
3
throw = random.random()
state = argmax(cumsum(probabilities) > throw)
probabilities = markov_model[state]
print(state)
2
throw = random.random()
state = argmax(cumsum(probabilities) > throw)
probabilities = markov_model[state]
print(state)
3
def markov(matrix, state, num_iter=10):
out = [state]
probabilities = markov_model[state]
for i in range(num_iter):
throw = random.random()
state = argmax(cumsum(probabilities) > throw)
probabilities = markov_model[state]
out.append(state)
return out
markov(markov_model, 0)
[0, 1, 2, 3, 2, 0, 0, 0, 0, 0, 0]
markov_model
[[0.5, 0.3, 0.2, 0], [0.2, 0.1, 0.6, 0.1], [0.1, 0, 0.2, 0.7], [0, 0, 0.9, 0.1]]
markov(markov_model, 3)
[3, 2, 3, 2, 0, 1, 2, 3, 2, 3, 2]
Hidden Markov Models have a "hidden" internal state which determines the output
possible_outputs = ['M', 'N', 'O']
output_probabilites = [ [ 0.1, 0.9, 0],
[0.8, 0.1, 0.1],
[0.1, 0, 0.9],
[0.2, 0.3, 0.5]]
states = markov(markov_model, 3)
for state in states:
throw = random.random()
out_index = argmax(cumsum(output_probabilites[state]) > throw)
print(possible_outputs[out_index])
N M O O O M N M N M O
def genHMM(markov_model, initial_state, possible_outputs, output_probabilites, num_iter=10):
out = []
states = markov(markov_model, initial_state)
for state in states:
throw = random.random()
out_index = argmax(cumsum(output_probabilites[state]) > throw)
out.append(possible_outputs[out_index])
return out, states
genHMM(markov_model, 3, possible_outputs, output_probabilites)
(['N', 'O', 'N', 'O', 'N', 'O', 'M', 'N', 'O', 'M', 'O'], [3, 2, 3, 2, 3, 2, 3, 3, 2, 3, 2])
By Andrés Cabrera mantaraya36@gmail.com
For course MAT 201A at UCSB
This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/