from sklearn import linear_model
import pylab as pl
import datetime
/Users/fabian/envs/p27/lib/python2.7/site-packages/scikits/__init__.py:1: UserWarning: Module sklearn was already imported from /Users/fabian/dev/scikit-learn/sklearn/__init__.pyc, but /Users/fabian/envs/p27/lib/python2.7/site-packages is being added to sys.path __import__('pkg_resources').declare_namespace(__name__)
# callback for the benchmarks
def callback(w, X, y, alpha):
if 1. / alpha in bench.keys():
bench[1. / alpha].append(datetime.datetime.now())
bench[1. / alpha].append(linear_model.logistic._logistic_loss(w, X, y, alpha))
else:
bench[1. / alpha] = [datetime.datetime.now()]
bench[1. / alpha].append(linear_model.logistic._logistic_loss(w, X, y, alpha))
X = np.load('/Users/fabian/dev/scikit-learn/X_haxby.npy')
y = np.load('/Users/fabian/dev/scikit-learn/y_haxby.npy')
y = np.sign(y - np.asarray(y).mean())
alphas = np.logspace(0, 6, 30)
coefs, Cs = linear_model.logistic_regression_path(
X, y, fit_intercept=False, gtol=1e-32, Cs = 1. / alphas, method='lbfgs',
callback=callback)
print('end')
true_loss = {}
for i, C in enumerate(Cs):
tmp = linear_model.logistic._logistic_loss(coefs[i], X, y, 1. / C)
true_loss[C] = tmp
cm = pl.cm.coolwarm_r
f, axarr = pl.subplots(3, sharex=True, sharey=True)
methods = ('lbfgs', 'newton-cg', 'liblinear')
for i_m, m in enumerate(methods):
bench = {}
start = datetime.datetime.now()
_, _ = linear_model.logistic_regression_path(X, y, fit_intercept=False,
Cs = 1. / alphas, method=m, callback=callback)
for i, C in enumerate(Cs):
prec_bench = np.array(bench[C][1::2]) - true_loss[C]
time_bench = map(lambda x: (x - start).total_seconds(), bench[C][0::2])
color = cm((i+5.) / alphas.size)
axarr[i_m].plot(time_bench, prec_bench, c=color, lw=4)
axarr[i_m].set_title('Method = %s' % m, fontsize=18)
axarr[i_m].vlines(np.max(time_bench), *axarr[i_m].get_ylim())
axarr[i_m].axis('tight')
axarr[i_m].text(np.max(time_bench) - 2.5, 2.0, 'Total time: %.02f' % np.max(time_bench), fontsize=18)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(10.5,8.5)
pl.ylabel(r'distance to optimal value of loss function', fontsize='xx-large', verticalalignment='bottom')
pl.xlabel(r'time (in seconds)', fontsize='xx-large')
#pl.savefig('logistic_path_convergence_%s.png' % int(corr))
pl.show()
pl.show()
end
n_samples, n_features = 10 ** 4, 10 ** 3
corr = 1.
np.random.seed(0)
X = np.random.randn(n_samples, n_features)
w = np.random.randn(n_features)
y = np.sign(X.dot(w))
X += 0.8 * np.random.randn(n_samples, n_features) # add noise
X+= corr # this makes it correlated by adding a constant term
X = np.hstack((X, np.ones((X.shape[0], 1)))) # add a column of ones
y = np.sign(y - np.asarray(y).mean())
alphas = np.logspace(0, 6, 30)
coefs, Cs = linear_model.logistic_regression_path(
X, y, fit_intercept=False, gtol=1e-16, Cs = 1. / alphas, method='lbfgs')
print('end')
true_loss = {}
for i, C in enumerate(Cs):
tmp = linear_model.logistic._logistic_loss(coefs[i], X, y, 1. / C)
true_loss[C] = tmp
cm = pl.cm.coolwarm_r
f, axarr = pl.subplots(3, sharex=True, sharey=True)
methods = ('lbfgs', 'newton-cg', 'liblinear')
for i_m, m in enumerate(methods):
bench = {}
start = datetime.datetime.now()
_, _ = linear_model.logistic_regression_path(X, y, fit_intercept=False,
Cs = 1. / alphas, method=m, callback=callback)
for i, C in enumerate(Cs):
prec_bench = np.array(bench[C][1::2]) - true_loss[C]
time_bench = map(lambda x: (x - start).total_seconds(), bench[C][0::2])
color = cm((i+5.) / alphas.size)
axarr[i_m].plot(time_bench, prec_bench, c=color, lw=4)
axarr[i_m].set_title('Method = %s' % m)
axarr[i_m].vlines(np.max(time_bench), *axarr[i_m].get_ylim())
axarr[i_m].axis('tight')
axarr[i_m].text(np.max(time_bench) - 4.5, 2.0, 'Total time: %.02f' % np.max(time_bench), fontsize=18)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(10.5,8.5)
pl.ylabel(r'distance to optimal value of loss function', fontsize='xx-large', verticalalignment='bottom')
pl.xlabel(r'time (in seconds)', fontsize='xx-large')
#pl.savefig('logistic_path_convergence_%s.png' % int(corr))
pl.show()
pl.show()
end
bench
{}