from sklearn.datasets import load_boston data = load_boston() print data.keys() print data.data.shape print data.target.shape print data.DESCR %pylab inline plt.hist(data.target) plt.xlabel('price ($1000s)') plt.ylabel('count') plt.show() n_chns = 6 fig, axs = plt.subplots(1, n_chns, sharey=True, figsize=(24,8)) for ii in range(0,n_chns): axs[ii].scatter (data.data[:,ii],data.target) axs[ii].set_title('Median value (Y) vs {0} (X)'.format(data.feature_names[ii]),fontsize=16) plt.show() from sklearn.linear_model import LinearRegression clf = LinearRegression() clf.fit(data.data, data.target) predicted = clf.predict(data.data) plt.figure(figsize=(10,8)) plt.scatter(data.target, predicted) plt.plot([0, 50], [0, 50], '--k', linewidth=3) plt.axis('tight') plt.xlabel('True price ($1000s)',fontsize=15) plt.ylabel('Predicted price ($1000s)', fontsize=15) plt.tick_params(axis='both', which='major', labelsize=15) plt.grid('on') plt.show() # What is the correlation between the observations and the property price? # The coefficients (shows the correlation between the property price and property related information) print('Attribute information and Model coefficients:') for ii in range(0,len(clf.coef_)): print ('{0}: {1:.3f}'.format(data.feature_names[ii],clf.coef_[ii])) # The mean square error print("\nResidual sum of squares: %.2f" % np.mean((predicted - data.target) ** 2)) print("Residual mean deviation: %.2f (K USD)" % np.sqrt(np.mean((predicted - data.target) ** 2))) from sklearn.tree import DecisionTreeRegressor # Instantiate the model, fit the results, and scatter in vs. out clf = DecisionTreeRegressor() clf.fit(data.data, data.target) predicted = clf.predict(data.data) plt.figure(figsize=(10,8)) plt.scatter(data.target, predicted) plt.plot([0, 50], [0, 50], '--k', linewidth=3) plt.axis('tight') plt.xlabel('True price ($1000s)',fontsize=15) plt.ylabel('Predicted price ($1000s)', fontsize=15) plt.tick_params(axis='both', which='major', labelsize=15) plt.grid('on') plt.show() # x from 0 to 10 x = 30 * np.random.random(40) # y = a*x + b with noise y = 0.5 * x + 5.0 + np.random.normal(size=x.shape) * 3 # create a linear regression classifier clf = LinearRegression() clf.fit(x[:, None], y) # predict y from the data x_new = np.linspace(20, 50, 10) y_new = clf.predict(x_new[:, None]) # plot the results fig = plt.figure(figsize=(10,8)) ax = fig.add_subplot(111); ax.scatter(x, y, label='model fitting data') ax.scatter(x_new, y_new, color='r', label='new observations & predictions') ax.set_xlabel('x',fontsize=15) ax.set_ylabel('y',fontsize=15) ax.legend(loc='upper left', fontsize=15) ax.tick_params(axis='both', which='major', labelsize=15) ax.axis('tight') ax.grid('on')