from sklearn import linear_model import pylab as pl import datetime # callback for the benchmarks def callback(w, X, y, alpha): if 1. / alpha in bench.keys(): bench[1. / alpha].append(datetime.datetime.now()) bench[1. / alpha].append(linear_model.logistic._logistic_loss(w, X, y, alpha)) else: bench[1. / alpha] = [datetime.datetime.now()] bench[1. / alpha].append(linear_model.logistic._logistic_loss(w, X, y, alpha)) X = np.load('/Users/fabian/dev/scikit-learn/X_haxby.npy') y = np.load('/Users/fabian/dev/scikit-learn/y_haxby.npy') y = np.sign(y - np.asarray(y).mean()) alphas = np.logspace(0, 6, 30) coefs, Cs = linear_model.logistic_regression_path( X, y, fit_intercept=False, gtol=1e-32, Cs = 1. / alphas, method='lbfgs', callback=callback) print('end') true_loss = {} for i, C in enumerate(Cs): tmp = linear_model.logistic._logistic_loss(coefs[i], X, y, 1. / C) true_loss[C] = tmp cm = pl.cm.coolwarm_r f, axarr = pl.subplots(3, sharex=True, sharey=True) methods = ('lbfgs', 'newton-cg', 'liblinear') for i_m, m in enumerate(methods): bench = {} start = datetime.datetime.now() _, _ = linear_model.logistic_regression_path(X, y, fit_intercept=False, Cs = 1. / alphas, method=m, callback=callback) for i, C in enumerate(Cs): prec_bench = np.array(bench[C][1::2]) - true_loss[C] time_bench = map(lambda x: (x - start).total_seconds(), bench[C][0::2]) color = cm((i+5.) / alphas.size) axarr[i_m].plot(time_bench, prec_bench, c=color, lw=4) axarr[i_m].set_title('Method = %s' % m, fontsize=18) axarr[i_m].vlines(np.max(time_bench), *axarr[i_m].get_ylim()) axarr[i_m].axis('tight') axarr[i_m].text(np.max(time_bench) - 2.5, 2.0, 'Total time: %.02f' % np.max(time_bench), fontsize=18) fig = matplotlib.pyplot.gcf() fig.set_size_inches(10.5,8.5) pl.ylabel(r'distance to optimal value of loss function', fontsize='xx-large', verticalalignment='bottom') pl.xlabel(r'time (in seconds)', fontsize='xx-large') #pl.savefig('logistic_path_convergence_%s.png' % int(corr)) pl.show() pl.show() n_samples, n_features = 10 ** 4, 10 ** 3 corr = 1. np.random.seed(0) X = np.random.randn(n_samples, n_features) w = np.random.randn(n_features) y = np.sign(X.dot(w)) X += 0.8 * np.random.randn(n_samples, n_features) # add noise X+= corr # this makes it correlated by adding a constant term X = np.hstack((X, np.ones((X.shape[0], 1)))) # add a column of ones y = np.sign(y - np.asarray(y).mean()) alphas = np.logspace(0, 6, 30) coefs, Cs = linear_model.logistic_regression_path( X, y, fit_intercept=False, gtol=1e-16, Cs = 1. / alphas, method='lbfgs') print('end') true_loss = {} for i, C in enumerate(Cs): tmp = linear_model.logistic._logistic_loss(coefs[i], X, y, 1. / C) true_loss[C] = tmp cm = pl.cm.coolwarm_r f, axarr = pl.subplots(3, sharex=True, sharey=True) methods = ('lbfgs', 'newton-cg', 'liblinear') for i_m, m in enumerate(methods): bench = {} start = datetime.datetime.now() _, _ = linear_model.logistic_regression_path(X, y, fit_intercept=False, Cs = 1. / alphas, method=m, callback=callback) for i, C in enumerate(Cs): prec_bench = np.array(bench[C][1::2]) - true_loss[C] time_bench = map(lambda x: (x - start).total_seconds(), bench[C][0::2]) color = cm((i+5.) / alphas.size) axarr[i_m].plot(time_bench, prec_bench, c=color, lw=4) axarr[i_m].set_title('Method = %s' % m) axarr[i_m].vlines(np.max(time_bench), *axarr[i_m].get_ylim()) axarr[i_m].axis('tight') axarr[i_m].text(np.max(time_bench) - 4.5, 2.0, 'Total time: %.02f' % np.max(time_bench), fontsize=18) fig = matplotlib.pyplot.gcf() fig.set_size_inches(10.5,8.5) pl.ylabel(r'distance to optimal value of loss function', fontsize='xx-large', verticalalignment='bottom') pl.xlabel(r'time (in seconds)', fontsize='xx-large') #pl.savefig('logistic_path_convergence_%s.png' % int(corr)) pl.show() pl.show() bench