import numpy as np import scipy as sp import pandas as pd import sklearn import seaborn as sns from matplotlib import pyplot as plt %matplotlib inline import sklearn.cross_validation ## your code here ## your code here ## your code here from sklearn.ensemble import RandomForestClassifier from sklearn.cross_validation import cross_val_score ## your code here ## your code here ## your code here ## your code here """ cutoff_predict(clf, X, cutoff) Inputs: clf: a **trained** classifier object X: a 2D numpy array of features cutoff: a float giving the cutoff value used to convert predicted probabilities into a 0/1 prediction. Output: a numpy array of 0/1 predictions. """ ## your code here def custom_f1(cutoff): def f1_cutoff(clf, X, y): ypred = cutoff_predict(clf, X, cutoff) return sklearn.metrics.f1_score(y, ypred) return f1_cutoff ## your code here ## your code here from sklearn.tree import DecisionTreeClassifier import sklearn.linear_model import sklearn.svm def plot_decision_surface(clf, X_train, Y_train): plot_step=0.1 if X_train.shape[1] != 2: raise ValueError("X_train should have exactly 2 columnns!") x_min, x_max = X_train[:, 0].min() - plot_step, X_train[:, 0].max() + plot_step y_min, y_max = X_train[:, 1].min() - plot_step, X_train[:, 1].max() + plot_step xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)) clf.fit(X_train,Y_train) if hasattr(clf, 'predict_proba'): Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:,1] else: Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Reds) plt.scatter(X_train[:,0],X_train[:,1],c=Y_train,cmap=plt.cm.Paired) plt.show() ## your code here ## your Code here