%pylab inline

from pandas import *

# Subplotting example
a = randn(50).cumsum()
b = randn(50).cumsum()
c = randn(50).cumsum()


df=DataFrame({'a':a,'b':b,'c':c})

df.plot(subplots=True)

df.plot()

# Adding options
#Generate a random graph 
a = randn(50).cumsum()

df = DataFrame(a) # convert it to a dataframe

df.plot(title='Random Plot',kind='line')

# Pandas data frames have a built in plot function with defaults:
df = DataFrame(np.random.randn(10, 4).cumsum(0),
columns=['A', 'B', 'C', 'D'],
index=np.arange(0, 100, 10))

df

df.plot(title='Plot automatically finds Indexes and Places Labels')

titanic = read_csv("data/titanic.csv")

titanic

SurvivedByClass=titanic.groupby('pclass').survived.sum()

SurvivedByClass

SurvivedByClass.plot(kind='bar',title='Titanic Survivors by Class')

# Combine multiple groups and plot
titanic.groupby(['sex','pclass']).survived.sum().plot(kind='barh',title='Titanic Survivors by Class and Gender')

# Plot CrossTabs
death_counts = crosstab([titanic.pclass, titanic.sex], titanic.survived.astype(bool))

death_counts.plot(kind='bar', stacked=True, title='Passenger Outcomes by Gender and Class', color=['white','blue'], grid=False)


# Histogram of a particular column
titanic.fare.hist(grid=False,bins=25,range=(0,100))

# Density plot -- Estimate the 'true' distribution
titanic.fare.dropna().plot(kind='kde', xlim=(0,100))

titanic.fare.hist(bins=25, normed=True, color='lightseagreen')

titanic.fare.plot(kind='kde', xlim=(0,600), style='r--')

titanic.boxplot(column='fare',by='pclass',grid=False)

# Combine pandas plots by running them together -- if pandas thinks they can be overlapped, it will
titanic.fare.hist(bins=25, normed=True, color='lightseagreen')
titanic.fare.plot(kind='kde', xlim=(0,100), style='r--')
titanic.boxplot(column='fare',by='pclass',grid=False)


import matplotlib.pyplot as plt

x = np.arange(50)
y = np.random.randn(50)
plt.scatter(x,y)


%pylab inline

#Single Plot
a = randn(50).cumsum()

plt.plot(a, color='red')


fig = figure()

# Two rows, one column, first plot
ax1 = fig.add_subplot(2,1,1)
ax1.plot(a, color='red')

#Two rows, one column, second plot
ax2 = fig.add_subplot(2,1,2)
ax2.scatter(np.arange(50), randn(50))

# Exercise: Try 1 row, two columns
# Exercise: Try 1 row, one column

fig, ax = plt.subplots(2,3)

ax[0,1].plot(randn(50), color='green', linestyle='-')
ax[1,2].scatter(np.arange(50), randn(50), color='red')
ax[1,0].hist(randn(1000))
plt.show() #Similar to print()


# Exercise: Swap the top row and bottom row plots
# Excecise: Change the line style to dotted and add circles as markers 
# Exercise: change the bins to 100 for the histogram

fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one') 
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')
ax.set_title('Three random lines')
ax.legend(loc='best')
plt.show()

#Exercise: Change the color and line style of each line using the minimum amount of characters


# Save the file
plt.savefig('ThreeRandomLines.pdf')

%pylab inline

import pandas as pd

### ScatterPlots from Data Frames

# Combine pandas with matplotlib for scatterplots
baseball = pd.read_csv("data/baseball.csv")
#Offensive Statistics from MLB -- 2008.  Glossary: http://www.baseballprospectus.com/glossary/index.php?context=all&category=true

baseball.head()


fig = plt.figure()
ax = fig.add_subplot(1,1,1) # one row, one column, first plot
ax.set_title("Hits vs. At Bats")
ax.set_xlabel("At Bats")
ax.set_ylabel("Hits")
ax.scatter(baseball.ab, baseball.h) 


fig = plt.figure()
ax = fig.add_subplot(1,1,1) # one row, one column, first plot
ax.set_title("At Bats vs. Hits.  Size = Home Runs")
ax.set_xlabel("At Bats")
ax.set_ylabel("Hits")
plt.scatter(baseball.ab, baseball.h, s=baseball.hr*10, alpha=0.5)
xlim(0, 700); ylim(0, 200)

baseball

xkcd()
#plt.scatter(baseball.ab, baseball.h, c=baseball.hr, s=40, cmap='hot')
fig = plt.figure()
ax = fig.add_subplot(1,1,1) # one row, one column, first plot
ax.set_title("At Bats vs. Hits, Size and Color = Home Runs")
ax.set_xlabel("At Bats")
ax.set_ylabel("Hits")
plt.scatter(baseball.ab, baseball.h, c=baseball.hr,cmap='hot')
xlim(0, 700); ylim(0, 200)


#With Titanic Data
import seaborn as sns
sns.set(style="darkgrid")

df = sns.load_dataset("titanic")

pal = dict(male="#6495ED", female="#F08080")
g = sns.lmplot("age", "survived", col="sex", hue="sex", data=df,
               palette=pal, y_jitter=.02, logistic=True)
g.set(xlim=(0, 80), ylim=(-.05, 1.05))

# Seaborn with Anscombe Data
import seaborn as sns
sns.set(style="ticks")

df = sns.load_dataset("anscombe")
sns.lmplot("x", "y", col="dataset", hue="dataset", data=df,
           col_wrap=2, ci=None, palette="muted", size=4,
           scatter_kws={"s": 50, "alpha": 1})