交互作用を含むトイデータを作成¶

In [1]:

import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [2]:

def create_toy_dataset(n=100):
    #　交互作用を含まない変数
    ind_vars = np.random.randn(n, 2)
    # 交互作用のもとになる変数
    int_var_src = np.random.randn(n, 2)
    X = np.hstack([ind_vars, int_var_src])
    y = ind_vars[:, 1] +  2 * int_var_src[:, 0] * int_var_src[:, 1]
    return X, y

X, y = create_toy_dataset(2000)

In [3]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

lr = LinearRegression().fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

rf = RandomForestRegressor().fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# 精度評価
fig, ax = plt.subplots(figsize=(5.0, 5.0))
ax.plot([-6, 6], [-6, 6])
ax.set_xlim([-6, 6])
ax.scatter(y_test, y_pred_lr, alpha=0.5, label='Linear Regression')
ax.scatter(y_test, y_pred_rf, alpha=0.5, label='Random Forest')
ax.legend()
plt.show()

In [4]:

print(rf.feature_importances_)

[0.03665598 0.24377962 0.35179661 0.36776779]

In [ ]: