import pandas as pd
import numpy as np
import scipy as sp
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from pandas.tools.plotting import scatter_matrix
%matplotlib inline
df = pd.read_csv("data/word2vec_pos_neg.csv",header=None)
columns = ["c%s" % i for i in range(len(df.columns))]
df.columns = columns
df.head(2)
c0 | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c9 | ... | c16 | c17 | c18 | c19 | c20 | c21 | c22 | c23 | c24 | c25 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 4 | 0.20660 | 0.41592 | 0.45056 | -0.23147 | 0.58613 | -0.05117 | 0.17879 | 0.28899 | 0.19298 | ... | 0.77105 | -0.74493 | -0.20409 | 0.14887 | 0.76356 | 1.09495 | -0.24309 | -0.11910 | -0.09759 | -0.66048 |
1 | 4 | 0.10013 | 0.03716 | 0.25556 | -0.34029 | 0.26409 | -0.05785 | 0.21658 | 0.35261 | 0.26524 | ... | 0.23682 | -0.43798 | -0.09044 | 0.22246 | 0.38293 | 0.59845 | -0.04738 | 0.06381 | -0.01929 | -0.53391 |
2 rows × 26 columns
# Plot the data to visualize the relationship
x1 = df[df["c0"] == 0]["c1"]
x2 = df[df["c0"] == 0]["c2"]
plt.scatter(x1, x2, marker='o', color='red')
x1 = df[df["c0"] == 4]["c1"]
x2 = df[df["c0"] == 4]["c2"]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("c1")
plt.ylabel("c2")
<matplotlib.text.Text at 0x150f36c10>
# Plot the data to visualize the relationship
x1 = df[df["c0"] == 0]["c1"]
x2 = df[df["c0"] == 0]["c3"]
plt.scatter(x1, x2, marker='o', color='red')
x1 = df[df["c0"] == 4]["c1"]
x2 = df[df["c0"] == 4]["c3"]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("c1")
plt.ylabel("c3")
<matplotlib.text.Text at 0x159de7f50>
regressor = 'c4'
# Plot the data to visualize the relationship
x1 = df[df["c0"] == 0]["c1"]
x2 = df[df["c0"] == 0][regressor]
plt.scatter(x1, x2, marker='o', color='red')
x1 = df[df["c0"] == 4]["c1"]
x2 = df[df["c0"] == 4][regressor]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("c1")
plt.ylabel(regressor)
<matplotlib.text.Text at 0x15b38fd90>
regressor = 'c5'
# Plot the data to visualize the relationship
x1 = df[df["c0"] == 0]["c1"]
x2 = df[df["c0"] == 0][regressor]
plt.scatter(x1, x2, marker='o', color='red')
x1 = df[df["c0"] == 4]["c1"]
x2 = df[df["c0"] == 4][regressor]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("c1")
plt.ylabel(regressor)
<matplotlib.text.Text at 0x15b467690>
regressor = 'c6'
# Plot the data to visualize the relationship
x1 = df[df["c0"] == 0]["c1"]
x2 = df[df["c0"] == 0][regressor]
plt.scatter(x1, x2, marker='o', color='red')
x1 = df[df["c0"] == 4]["c1"]
x2 = df[df["c0"] == 4][regressor]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("c1")
plt.ylabel(regressor)
<matplotlib.text.Text at 0x15b6bbbd0>
regressor = 'c7'
# Plot the data to visualize the relationship
x1 = df[df["c0"] == 0]["c1"]
x2 = df[df["c0"] == 0][regressor]
plt.scatter(x1, x2, marker='o', color='red')
x1 = df[df["c0"] == 4]["c1"]
x2 = df[df["c0"] == 4][regressor]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("c1")
plt.ylabel(regressor)
<matplotlib.text.Text at 0x15b34ac10>
regressor = 'c8'
# Plot the data to visualize the relationship
x1 = df[df["c0"] == 0]["c1"]
x2 = df[df["c0"] == 0][regressor]
plt.scatter(x1, x2, marker='o', color='red')
x1 = df[df["c0"] == 4]["c1"]
x2 = df[df["c0"] == 4][regressor]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("c1")
plt.ylabel(regressor)
<matplotlib.text.Text at 0x15b847510>
regressor = 'c9'
# Plot the data to visualize the relationship
x1 = df[df["c0"] == 0]["c1"]
x2 = df[df["c0"] == 0][regressor]
plt.scatter(x1, x2, marker='o', color='red')
x1 = df[df["c0"] == 4]["c1"]
x2 = df[df["c0"] == 4][regressor]
plt.scatter(x1, x2, marker='o', color='blue')
plt.xlabel("c1")
plt.ylabel(regressor)
<matplotlib.text.Text at 0x15b96c810>
#http://pandas.pydata.org/pandas-docs/version/0.15.0/visualization.html#scatter-matrix-plot
scatter_matrix(df, alpha=0.2, figsize=(14, 14), diagonal='kde');