%matplotlib inline import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression model = LinearRegression(normalize=True) print model.normalize print model x = np.array([0, 1, 2]) y = np.array([0, 1, 2]) plt.plot(x, y, marker='o'); X = x[:, np.newaxis] # The input data for sklearn is 2D: (samples == 3 x features == 1) X model.fit(X, y) model.coef_ from sklearn import neighbors, datasets iris = datasets.load_iris() X, y = iris.data, iris.target knn = neighbors.KNeighborsClassifier(n_neighbors=1) knn.fit(X, y) # What kind of iris has 3cm x 5cm sepal and 4cm x 2cm petal? print iris.target_names[knn.predict([[3, 5, 4, 2]])] # A plot of the sepal space and the prediction of the KNN from fig_code import plot_iris_classification plot_iris_classification(neighbors.KNeighborsClassifier, n_neighbors=3) from sklearn.svm import SVC unknown_iris = [[3, 5, 4, 2]] # Use SVC to train and predict the species of the unknown iris... # Create some simple data np.random.seed(0) X = np.random.random(size=(20, 1)) y = 3 * X.squeeze() + 2 + np.random.normal(size=20) # Fit a linear regression to it from sklearn.linear_model import LinearRegression model = LinearRegression(fit_intercept=True) model.fit(X, y) print "Model coefficient: %.5f, and intercept: %.5f" % (model.coef_, model.intercept_) # Plot the data and the model prediction X_test = np.linspace(0, 1, 100)[:, np.newaxis] y_test = model.predict(X_test) import pylab as pl plt.plot(X.squeeze(), y, 'o') plt.plot(X_test.squeeze(), y_test); X, y = iris.data, iris.target from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(X) X_reduced = pca.transform(X) print "Reduced dataset shape:", X_reduced.shape plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y); print "Meaning of the 2 components:" for component in pca.components_: print " + ".join("%.2f x %s" % (value, name) for value, name in zip(component, iris.feature_names)) from sklearn.cluster import KMeans k_means = KMeans(n_clusters=3, random_state=0) k_means.fit(X_reduced) y_pred = k_means.predict(X_reduced) plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_pred); from sklearn.neighbors import KNeighborsClassifier X, y = iris.data, iris.target clf = KNeighborsClassifier(n_neighbors=1) clf.fit(X, y) y_pred = clf.predict(X) print(np.all(y == y_pred)) from sklearn.metrics import confusion_matrix print(confusion_matrix(y, y_pred)) from sklearn.cross_validation import train_test_split Xtrain, Xtest, ytrain, ytest = train_test_split(X, y) clf.fit(Xtrain, ytrain) ypred = clf.predict(Xtest) print(confusion_matrix(ytest, ypred)) # %run fig_code/svm_gui.py # or, if this doesn't work, try # !python fig_code/svm_gui.py