%matplotlib inline import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn import datasets boston = datasets.load_boston() print boston.DESCR boston.keys() boston.data.shape boston.feature_names df = pd.DataFrame(boston.data) df.head() df.columns = boston.feature_names df['MEDV'] = boston.target df.head() df.describe() plt.hist(df.MEDV) plt.title('Housing prices') plt.xlabel('price') plt.ylabel('frequency') plt.show() plt.scatter(df.RM, df.MEDV) plt.title('Average Number of Rooms vs Housing prices') plt.xlabel('average number of rooms') plt.ylabel('price') plt.show() from pandas.tools.plotting import scatter_matrix scatter_matrix(df, figsize=(14, 14), diagonal='hist') corrs = df.corr() corrs plt.pcolor(corrs, cmap='bwr', vmin=-1.0, vmax=1.0) plt.yticks(np.arange(0.5, len(corrs.index), 1), corrs.index) plt.xticks(np.arange(0.5, len(corrs.columns), 1), corrs.columns) plt.colorbar() from sklearn.linear_model import LinearRegression lr = LinearRegression(normalize=True) lr.fit(boston.data, boston.target) predicted = lr.predict(boston.data) plt.hist(boston.target - predicted, bins=50) plt.show() zip(boston.feature_names, lr.coef_) from sklearn.linear_model import RidgeCV rcv = RidgeCV(alphas=np.array([0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0]), normalize=True) rcv.fit(boston.data, boston.target) rcv.alpha_ zip(boston.feature_names, rcv.coef_) from sklearn.linear_model import LassoCV lcv = LassoCV(alphas=np.array([0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0]), normalize=True) lcv.fit(boston.data, boston.target) lcv.alpha_ zip(boston.feature_names, lcv.coef_) plt.scatter(df.INDUS, df.MEDV) plt.scatter(df.AGE, df.MEDV) plt.scatter(df.RAD, df.MEDV) plt.scatter(df.TAX, df.MEDV) from sklearn.linear_model import ElasticNetCV encv = ElasticNetCV(alphas=np.array([0.0001, 0.0003, 0.01, 0.03, 0.1, 0.3, 1.0]), \ l1_ratio=np.array([0.5, 0.8, 0.9, 0.95, 0.99, 0.995, 1.0]), normalize=True) encv.fit(boston.data, boston.target) zip(boston.feature_names, encv.coef_) encv.alpha_, encv.l1_ratio_