# Učitaj osnovne biblioteke... import scipy as sp import sklearn %pylab inline seven_X = sp.array([[2,1],[2,3],[1,2],[3,2],[5,2],[5,4],[6,3]]) seven_y = sp.array([1,1,1,1,-1,-1,-1]) def plot_problem(X, y, h=None, surfaces=True) : ''' Plots a two-dimensional labeled dataset (X,y) and, if function h(x) is given, the decision boundaries (surfaces=False) or decision surfaces (surfaces=True) ''' assert X.shape[1] == 2, "Dataset is not two-dimensional" if h!=None : # Create a mesh to plot in r = 0.02 # mesh resolution x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, r), np.arange(y_min, y_max, r)) XX=np.c_[xx.ravel(), yy.ravel()] try: Z_test = h(XX) if shape(Z_test) == () : # h returns a scalar when applied to a matrix; map explicitly Z = sp.array(map(h,XX)) else : Z = Z_test except ValueError: # can't apply to a matrix; map explicitly Z = sp.array(map(h,XX)) # Put the result into a color plot Z = Z.reshape(xx.shape) if surfaces : plt.contourf(xx, yy, Z, cmap=plt.cm.Pastel1) else : plt.contour(xx, yy, Z) # Plot the dataset scatter(X[:,0],X[:,1],c=y, cmap=plt.cm.Paired,marker='o',s=50); plot_problem(seven_X, seven_y) from sklearn.svm import SVC from sklearn.linear_model import Perceptron, LogisticRegression X = sp.append(seven_X,[[12,8]],axis=0) y = sp.append(seven_y,-1) from sklearn.metrics import hinge_loss X1, y1 = sp.append(seven_X, [[3,3]], axis=0), sp.append(seven_y, -1) X2, y2 = sp.append(seven_X, [[2,2]], axis=0), sp.append(seven_y, -1) from sklearn.datasets import make_classification from sklearn.cross_validation import train_test_split def plot_error_surface(err,(c1,c2),(g1,g2)) : xticks(range(0,g2-g1+1,5),range(g1,g2,5)); xlabel("gamma") yticks(range(0,c2-c1+1,5),range(c1,c2,5)); ylabel("C") p = contour(err); imshow(1-err, interpolation='bilinear', origin='lower',cmap=cm.gray) clabel(p, inline=1, fontsize=10); show(); from sklearn.datasets import make_classification from sklearn.cross_validation import train_test_split from sklearn.datasets import make_classification X, y = make_classification(n_samples=500,n_features=2,n_classes=2,n_redundant=0,n_clusters_per_class=1) X[:,1] = X[:,1]*100+1000 X[0,1] = 3000 plot_problem(X,y) from sklearn.preprocessing import StandardScaler, MinMaxScaler data = sp.loadtxt("/home/jan/Downloads/glass.data", delimiter=",") glass_X, glass_y = data[:,1:10], data[:,10] from sklearn import cross_validation X_train, X_test, y_train, y_test = cross_validation.train_test_split(glass_X,glass_y,train_size=2.0/3,random_state=42) print X_train.shape, X_test.shape