from ggplot import ggplot
import ggplot as gg
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import pandas as pd
import qgrid
import seaborn as sns

%matplotlib inline
qgrid.nbinstall()

# Pull in the CSV, drop NAs
df = pd.read_csv('mthood_snotel.csv', header=7, parse_dates=['Date']).dropna()
qgrid.show_grid(df, remote_js=True)

# Let's start with some basic histograms of our key dimensions
sns.set_context(rc={"figure.figsize": (15, 7)})
sns.distplot(df['Precipitation Accumulation (in)'], bins=50)

sns.distplot(df['Snow Water Equivalent (in)'], bins=100)

sns.kdeplot(df['Air Temperature Maximum (degF)'], shade=True);
sns.kdeplot(df['Air Temperature Minimum (degF)'], shade=True);
sns.kdeplot(df['Air Temperature Average (degF)'], shade=True);

# We can use Seaborn + IPython interact widgets to do quick comparison of dimensions
subset = df.drop(['Date'], axis=1)
dims = subset.columns.tolist()
@interact 
def linear_comp(x=dims, y=dims):
    sns.jointplot(x, y, data=subset, size=9)

# How closely to average and Maximum temps follow one another?
sns.lmplot("Air Temperature Minimum (degF)", "Air Temperature Maximum (degF)", df, size=10)

# Now to use some Pandas timeseries magic to look at monthly trends

# First we need to set the Date column as the Index
indexed = df.set_index('Date')
resampled = indexed.resample('MS').dropna()
qgrid.show_grid(resampled, remote_js=True)

# Exploratory: Pandas plotting should let us take a nice quick look at the data
# Going to use Seaborn to set our plot context
sns.set_context(rc={"figure.figsize": (18, 9)})
resampled.plot()

# ggplot is quite good at handling timeseries. Let's use it to look at long-term trends
resampled['Date'] = resampled.index
(ggplot(gg.aes(x='Date', y='Snow Water Equivalent (in)'), data=resampled) 
 + gg.geom_line()
 + gg.stat_smooth())

# What about temperatures?
(ggplot(gg.aes(x='Date', y='Air Temperature Average (degF)'), data=resampled) 
 + gg.geom_line()
 + gg.stat_smooth())

# I want to look at monthly statistics, so need to create a column that's just months
resampled['Month'] = resampled.index.month
monthly_grouped = resampled.groupby('Month').mean()
# Matplotlib now has context managers to set styles. Let's try the bmh style
with plt.style.context('bmh'):
    sns.set_context(rc={"figure.figsize": (18, 9)})
    monthly_grouped.plot()

res_dims = resampled.columns.tolist()
@interact 
def res_comp(x=res_dims, y=res_dims):
    sns.jointplot(x, y, data=resampled, size=9)

qgrid.show_grid(monthly_grouped)

# Back to ggplot
monthly_grouped['Month'] = monthly_grouped.index
ggplot(gg.aes(x='Month', y='Snow Water Equivalent (in)'),
       data=monthly_grouped) + gg.geom_line()

# Let's do some faceting to look at some monthly statistics
(ggplot(gg.aes(x='Air Temperature Average (degF)'), data=resampled)
 + gg.geom_density(alpha=0.25)
 + gg.facet_wrap('Month')
 + gg.labs("Air Temperature Average (degF)", "Freq"))

(ggplot(gg.aes(x='Snow Water Equivalent (in)'), data=resampled)
 + gg.geom_density(alpha=0.25)
 + gg.facet_wrap('Month')
 + gg.labs("Snow Water Equivalent (in)", "Freq"))

# Seaborn also has very powerful faceting mechanisms. Let's look at the monthly average temperatures
# again, but in a FacetGrid
months = resampled['Month'].unique()
months.sort()
months
g = sns.FacetGrid(resampled, row="Month", hue="Month", palette="deep",
                  size=1.8, aspect=4, hue_order=months, row_order=months)
g.map(sns.distplot, 'Air Temperature Average (degF)');

pair_cols = resampled[['Snow Water Equivalent (in)', 'Precipitation Accumulation (in)', 
                       'Air Temperature Average (degF)', 'Month']].reset_index(drop=True)
pair_cols.head()
pair = sns.PairGrid(pair_cols, hue="Month", palette="GnBu_d")
pair.map(plt.scatter)
pair.add_legend()

from IPython.core.display import HTML

# Use the following if running locally:
# styles = open("styles/custom.css", "r").read()

# This is for nbviewer:
styles = open("custom.css", "r").read()

HTML(styles)