%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
data1 = pd.read_csv('ex1data1.txt', header=None, names=['x', 'y'])
data1.head()
x | y | |
---|---|---|
0 | 6.1101 | 17.5920 |
1 | 5.5277 | 9.1302 |
2 | 8.5186 | 13.6620 |
3 | 7.0032 | 11.8540 |
4 | 5.8598 | 6.8233 |
plt.figure(figsize=(8, 6))
plt.plot(data1['x'], data1['y'], 'rx')
plt.show()
from sklearn import linear_model
clf = linear_model.LinearRegression()
clf.fit(data1[['x']].values, data1['y'].values)
print clf.coef_
print clf.intercept_
[ 1.19303364] -3.89578087831
plt.figure(figsize=(8, 6))
plt.plot(data1['x'], data1['y'], 'rx')
plt.plot(data1['x'], clf.predict(data1[['x']]))
plt.show()
data2 = pd.read_csv('ex1data2.txt', header=None, names=['size', 'number of bedrooms', 'price'], dtype=float)
data2.head()
size | number of bedrooms | price | |
---|---|---|---|
0 | 2104 | 3 | 399900 |
1 | 1600 | 3 | 329900 |
2 | 2400 | 3 | 369000 |
3 | 1416 | 2 | 232000 |
4 | 3000 | 4 | 539900 |
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(data2[['size', 'number of bedrooms']].values)
features = scaler.transform(data2[['size', 'number of bedrooms']].values)
print scaler.mean_, scaler.std_
[ 2000.68085106 3.17021277] [ 7.86202619e+02 7.52842809e-01]
clf2 = linear_model.LinearRegression()
clf2.fit(features, data2['price'].values)
print clf2.coef_
print clf2.intercept_
[ 109447.79646964 -6578.35485416] 340412.659574