# load le dataset
import pandas as pd
filename = '../data/cars93.csv'
df = pd.read_csv(filename)
df.shape
df.columns
df.head(8)
df['MPG.city']
df.dtypes
import matplotlib.pyplot as plt
%matplotlib inline
df.Horsepower.hist(bins = 10)
df['Manufacturer'].head()
df.Manufacturer.head()
df['MPG.city'].describe()
df.corr()
import seaborn as sns
corr = df.corr()
fig, ax = plt.subplots(1,1, figsize =(9,9))
sns.heatmap(corr,
xticklabels=corr.columns.values,
yticklabels=corr.columns.values)
df.describe()
df.dtypes
df.Type.value_counts()
df.Origin.value_counts()
condition = (df['Origin'] == 'non-USA' ) & (df.Type == 'Midsize')
condition
df[condition].shape
col = 'Origin'
condition = df[col].isnull()
df[condition].shape
import numpy as np
df['log_weight'] = df.Weight.apply( lambda w : np.log(w + 1) )
df['log_weight'].hist(bins = 100)
df['Weight'].hist(bins = 100)