Notebook

In [115]:

# load the ex3data1
# it's in mat format in the course, but can be convert to txt in octave: 
# save('ex3data1.txt', 'X', '-ascii')
# save('ex3data1.y.txt', 'y', '-ascii')

import numpy as np

def load_dataset():
    data = []
    y = []    
    for line in open('data/ex3data1.txt'):
        data.append(map(double, line.split()))
    for line in open('data/ex3data1.y.txt'):
        y.append(int(float(line.strip())))
        
    return np.array(data, np.double), np.array(y, np.int)
    
X, y = load_dataset()

In [116]:

%%time

from sklearn.base import BaseEstimator
from lbfgs import LBFGS
import lbfgs

def sigmoid(X, theta):
    return 1 / (1 + np.exp(-np.dot(X, theta)))

def f(theta, g, lr, X, y):
    lr.theta[:] = theta
    m = X.shape[0]
    predicted = sigmoid(X, theta)
    g[:] = gradient(theta, X, y)
    return -np.sum(1.0* y * log(predicted) + (1 - y) * log(1 - predicted)) / m

def gradient(theta, X, y):
    m = X.shape[0]
    predicted = sigmoid(X, theta)
    error = predicted - y
    return np.dot(X.T, error) / m                

class LogisticRegression(BaseEstimator):
    
    def fit(self, X, y):
        n = X.shape[1]
        x0 = np.zeros(n)
        self.theta = np.zeros(n)
        opt = LBFGS()
        try:
            opt.minimize(f, x0, args=[self, X, y])
        except Exception, e:
            print repr(e)
        return self
        
    def predict_proba(self, X):
        positive = sigmoid(X, self.theta).reshape(X.shape[0], 1)
        return np.hstack((1-positive, positive))
    
lr = LogisticRegression()
lr.fit(X, y == 10)

LBFGSError('The line-search routine reaches the maximum number of evaluations.',)
CPU times: user 1.19 s, sys: 27.6 ms, total: 1.22 s
Wall time: 438 ms

In [117]:

%%time
from sklearn.cross_validation import train_test_split
from sklearn.multiclass import OneVsRestClassifier
ovr = OneVsRestClassifier(LogisticRegression())
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
ovr.fit(X_train, y_train)
print ovr.estimators_

LBFGSError('The line-search routine reaches the maximum number of evaluations.',)
LBFGSError('The line-search routine reaches the maximum number of evaluations.',)
LBFGSError('The line-search routine reaches the maximum number of evaluations.',)
LBFGSError('The line-search routine reaches the maximum number of evaluations.',)
LBFGSError('A rounding error occurred; alternatively, no line-search step satisfies the sufficient decrease and curvature conditions.',)
LBFGSError('The line-search routine reaches the maximum number of evaluations.',)
LBFGSError('The line-search routine reaches the maximum number of evaluations.',)
LBFGSError('The line-search routine reaches the maximum number of evaluations.',)
[LogisticRegression(), LogisticRegression(), LogisticRegression(), LogisticRegression(), LogisticRegression(), LogisticRegression(), LogisticRegression(), LogisticRegression(), LogisticRegression(), LogisticRegression()]
CPU times: user 12 s, sys: 400 ms, total: 12.4 s
Wall time: 4.14 s

In [118]:

from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score
predicted = ovr.predict(X_test)
print 'accuracy', accuracy_score(predicted, y_test)
print confusion_matrix(predicted, y_test)

accuracy 0.8645
[[186   3   1   1   0   1   1   2   0   0]
 [  2 164  13   4   3   3   4   5   1   0]
 [  0   2 167   1  10   0   8   3   2   4]
 [  0   1   2 162   0   1   1   4  12   0]
 [  0   1  14   0 165   2   0   6   2   1]
 [  0   2   0   3   2 195   1   2   0   1]
 [  1   3   4   2   0   0 163   1  15   0]
 [  6  11  10   8  24   3   3 157   2   1]
 [  2   4   3  10   6   0   6   4 174   2]
 [  0   3   0   2   0   0   1   1   1 196]]

In [ ]: