from sklearn import linear_model
import pandas as pd
train = pd.read_csv('train.csv')
train.Age = train.Age.fillna(train.Age.mean())
for i, sex in enumerate(train.Sex):
if sex=='male':
train.Sex[i]=1
else:
train.Sex[i]=0
logiReg = linear_model.LogisticRegression()
y = train['Survived']
print type(y)
<class 'pandas.core.series.Series'>
X = train[['Age', 'Sex']]
print type(X)
<class 'pandas.core.frame.DataFrame'>
logiReg.fit(X, y)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, penalty='l2', random_state=None, tol=0.0001)
print logiReg.coef_ # 回帰係数
print logiReg.intercept_ # 切片
print logiReg.score(X, y) # 決定係数
[[-0.0042936 -2.41865573]] [ 1.11913633] 0.786756453423
py = logiReg.predict(X) # 当てはめ
table = pd.crosstab(y, py)
table
col_0 | 0 | 1 |
---|---|---|
Survived | ||
0 | 468 | 81 |
1 | 109 | 233 |
(468+233)/(468+233+81+109.0)
0.7867564534231201