from sklearn.datasets import load_iris iris = load_iris() X = iris.data y = iris.target print X.shape print y.shape from sklearn.svm import LinearSVC #LinearSVC? clf = LinearSVC(loss = 'l2') print clf clf = clf.fit(X, y) clf.coef_ # shape of clf.coef_ [n_classes x n_features] (coefficients are the weights used in inner product) clf.intercept_ # shape of clf.intercept [n_classes x 1] (Constants in decision function.) X_new = [[ 5.0, 3.6, 1.3, 0.25]] print clf.predict(X_new) %matplotlib inline import numpy as np import matplotlib.pyplot as plt from sklearn import svm X = iris.data[:, 2:4] # we only take the first two features. We could y = iris.target h = .02 # step size in the mesh # we create an instance of SVM and fit out data. We do not scale our # data since we want to plot the support vectors C = 1.0 # SVM regularization parameter svc = svm.SVC(kernel='linear', C=C).fit(X, y) rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X, y) poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(X, y) lin_svc = svm.LinearSVC(C=C).fit(X, y) # create a mesh to plot in x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # title for the plots titles = ['SVC with linear kernel', 'LinearSVC (linear kernel)', 'SVC with RBF kernel', 'SVC with polynomial (degree 3) kernel'] plt.figure(figsize=(16,10)) for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)): # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. plt.subplot(2, 2, i + 1) plt.subplots_adjust(wspace=0.1, hspace=0.25) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], s=80, c=y, cmap=plt.cm.Paired) plt.xlabel('Sepal length (first feature)',fontsize=15) plt.ylabel('Sepal width (second feature)',fontsize=15) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.title(titles[i],fontsize=15) plt.show() from sklearn import linear_model #linear_model.LogisticRegression? clf2 = linear_model.LogisticRegression(C=1e5) X = iris.data [:,2:4] y = iris.target clf2.fit (X, y) X_new = [[3.6, 0.25]] print clf2.predict_proba(X_new) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = clf2.predict(np.c_[xx.ravel(), yy.ravel()]) Zprob = clf2.predict_proba(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) Zprob = Zprob.reshape(xx.shape[0],xx.shape[1],3) plt.figure(figsize=(16,10)) # title for the plots titles = ['Logistic regression (LR)', 'LR class 1 - probabilities', 'LR class 2 - probabilities', 'LR class 3 - probabilities'] labels = ['class 1', 'class 2', 'class 3'] # number of classes and plot colors n_classes = np.amax(y)+1 plot_colors = 'rgy' for i, boundaries in enumerate((Z, Zprob[:,:,0], Zprob[:,:,1], Zprob[:,:,2])): # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. plt.subplot(2, 2, i + 1) plt.subplots_adjust(wspace=0.1, hspace=0.25) plt.pcolormesh(xx, yy, boundaries, cmap=plt.cm.Paired) plt.colorbar() # Plot also the training for j, color in zip(range(n_classes), plot_colors): idx = np.where(y == j) plt.scatter(X[idx, 0], X[idx, 1], s=100, c=color, label=labels[j], cmap=plt.cm.Paired) #plt.scatter(X[:, 0], X[:, 1], s=100, c=y, edgecolors='k', cmap=plt.cm.Paired) plt.xlabel('Sepal length', fontsize=15) plt.ylabel('Sepal width', fontsize=15) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.title(titles[i],fontsize=15) plt.legend(loc='upper left') plt.show() y_model = clf2.predict(X) print y_model == y print "Accuracy:", float(np.sum(y_model == y)) / len(y) from sklearn.cross_validation import cross_val_score # evaluate the model using 10-fold cross-validation scores = cross_val_score(linear_model.LogisticRegression(C=1e2), X, y, scoring='accuracy', cv=10) print scores print scores.mean()