% matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, datasets
# 乱数によりデータを生成
np.random.seed(0)
regdata = datasets.make_regression(100, 1, noise=20.0)
# 学習を行いモデルのパラメータを表示
lin = linear_model.LinearRegression()
lin.fit(regdata[0],regdata[1])
print("coef and intercept:", lin.coef_, lin.intercept_)
print("socre :",lin.score(regdata[0],regdata[1]))
xr = [-2.5 , 2.5]
plt.plot(xr, lin.coef_ * xr + lin.intercept_)
plt.scatter(regdata[0],regdata[1])
plt.show()
coef and intercept: [ 42.85335573] -1.62836365406 socre : 0.803335728656
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, datasets
# データの読み込み
diabetes = datasets.load_diabetes()
# データを訓練用と評価用に分ける.
data_train = diabetes.data[:300]
target_train = diabetes.target[:300]
data_test = diabetes.data[-300:]
target_test = diabetes.target[-300:]
# 学習させる
lin = linear_model.LinearRegression()
lin.fit(data_train,target_train)
#print("coef and intercept:", lin.coef_, lin.intercept_)
# あてはまり度合いを表示
print("socre :", lin.score(data_test, target_test))
# 最初の評価用データについて結果を予想して,実際の値を並べて表示
print("prediction :", lin.predict(data_test[0]))
print("Actual value:", target_test[0])
socre : 0.509135026149 prediction : [ 167.3982978] Actual value: 235.0
/Users/nakazawaminoru/.pyenv/versions/3.5.0/lib/python3.5/site-packages/sklearn/utils/validation.py:386: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample. DeprecationWarning)
diabetes.data.shape
(442, 10)
diabetes.target.shape
(442,)
diabetes.data[1]
array([-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872, -0.01916334, 0.07441156, -0.03949338, -0.06832974, -0.09220405])
diabetes.target[1]
75.0
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
# Load the diabetes dataset
diabetes = datasets.load_diabetes()
# Use only one feature
diabetes_X = diabetes.data[:, np.newaxis, 2]
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]
# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)
# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean square error
print("Residual sum of squares: %.2f"
% np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test))
# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test, color='black')
plt.plot(diabetes_X_test, regr.predict(diabetes_X_test), color='blue',
linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
Coefficients: [ 938.23786125] Residual sum of squares: 2548.07 Variance score: 0.47
diabetes_X_train[0]
array([ 0.06169621])
diabetes_y_train[0]
151.0
# -*- coding: utf-8 -*-
import sklearn.datasets as datasets
from sklearn.linear_model import LogisticRegression
from sklearn import cross_validation
# データの読み込み
iris = datasets.load_iris()
# 種類が2であるものを捨てる
data = iris.data[iris.target != 2]
target = iris.target[iris.target != 2]
# ロジスティクス回帰による学習と交差検定の評価
logi = LogisticRegression()
scores = cross_validation.cross_val_score(logi,data,target,cv=5)
# 結果を表示する
print(scores)
[ 1. 1. 1. 1. 1.]
data.shape
(100, 4)
target.shape
(100,)
data[0]
array([ 5.1, 3.5, 1.4, 0.2])
target[0]
0
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D #3次元で記述するためのライブラリ
from sklearn import datasets
# from sklearn.decomposition import PCA #PCA分析用のライブラリ
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features.
Y = iris.target
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
plt.figure(2, figsize=(8, 6))
plt.clf()
# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D #3次元で記述するためのライブラリ
from sklearn import datasets
# from sklearn.decomposition import PCA #PCA分析用のライブラリ
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features.
Y = iris.target
# plot the first three PCA dimensions
fig = plt.figure(1, figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
X_reduced = PCA(n_components=3).fit_transform(iris.data)
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=Y,
cmap=plt.cm.Paired)
ax.set_title("First three PCA directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])
plt.show()