%pylab inline
Populating the interactive namespace from numpy and matplotlib
# any other imports here
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from __future__ import division
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)
from sklearn import linear_model as lm
a = np.array([(1,2,3)])
a
array([[1, 2, 3]])
a*2
array([[2, 4, 6]])
a*a
array([[1, 4, 9]])
3.385+0.48+1.35+465+36.33
506.54499999999996
3.385**2
11.458224999999999
import pandas as pd
import numpy as np
from sklearn import linear_model as lm
practice_set = pd.DataFrame({
'x': [3.385, 0.48, 1.35, 465, 36.33],
'y': [44.5, 15.5, 8.1, 423, 119.5],
})
A = np.array([[1, 1, 1, 1, 1], practice_set['x']])
print np.linalg.inv(A.dot(A.T)).dot(A.dot(practice_set['y']))
model = lm.LinearRegression().fit(practice_set[['x']], practice_set['y'])
print model.intercept_
print model.coef_
[ 37.20089608 0.83821876] 37.2008960793 [ 0.83821876]
from sklearn import linear_model as lm
from sklearn.datasets import load_boston
from sklearn import cross_validation as cv
from sklearn import metrics
boston = load_boston()
desc = boston.DESCR
bostondf = pd.DataFrame(boston.data, columns=boston.feature_names)
y_col = 'MEDV'
bostondf[y_col] = boston.target
x_cols = [
'CRIM',
'ZN',
'INDUS',
'CHAS',
'NOX',
'RM',
'AGE',
'DIS',
'RAD',
'TAX',
'PTRATIO',
'B',
'LSTAT',
]
x_train, x_test, y_train, y_test = cv.train_test_split(bostondf[x_cols],
bostondf[y_col],
test_size=0.333,
random_state=1234)
alpha = np.arange(0.000000000001,0.1,0.001)
score = []
for a in alpha:
model = lm.Ridge(a).fit(x_train, y_train)
score.append(metrics.mean_squared_error(y_test, model.predict(x_test)))
plt.figure(figsize=(8, 6), dpi=80)
ax.set_yscale('log')
plt.plot(alpha,score)
[<matplotlib.lines.Line2D at 0x10d365b90>]