from scipy.optimize import fmin_l_bfgs_b from numpy import log, exp, hstack def logistic(x): return 1/(1+exp(-x)) def log_loss(y_orig, n, k, eps=1e-15): y = y_orig.copy() y[y < eps] = eps y[y > 1 - eps] = 1 - eps x = -k * log(y) - (n - k) * log(1 - y) return x.sum()/n.sum() def lbfgs_func(x, X, n, k, lambd=1.0): intercept, theta = x[0], x[1:] y = logistic(X.dot(theta) + intercept) penalty = lambd/2*(theta*theta).sum()/n.sum() obj = log_loss(y, n, k) + penalty r = n*logistic(X.dot(theta)+intercept) - k grad_intercept = r.sum()/n.sum() grad_coefs = (X.T.dot(r) + lambd*theta)/n.sum() grad = hstack([grad_intercept, grad_coefs]) return obj, grad import numpy as np from sklearn import linear_model X = np.matrix('1 0; 1 0; 1 0; 1 0; 0 1; 0 1; 0 1; 0 1').A y = np.array([1, 1, 1, 0, 1, 0, 0, 0]) C = 10 clf = linear_model.LogisticRegression(C=C) clf.fit(X, y) clf.intercept_, clf.coef_ X = np.array([[1, 0], [0, 1]]) n = np.array([4, 4]) k = np.array([3, 1]) lambd = 1/C x0 = np.zeros(X.shape[1]+1) x_optimal, *_ = fmin_l_bfgs_b(lbfgs_func, x0, args=(X, n, k, lambd)) intercept, theta = x_optimal[0], x_optimal[1:] intercept, theta