from IPython.display import VimeoVideo

VimeoVideo('59324550',width=900,height=768)

%matplotlib inline

import os

from pandas import *
import pandas as pd
import pandas.io.data as web
import datetime
import matplotlib
import matplotlib.pyplot as plt
import statsmodels as sm
import seaborn
seaborn.set()

pd.__version__

startdate = datetime.datetime(2000,1,1)
enddate = datetime.datetime.today()
df = web.DataReader(['AAPL','GOOGL','TSLA','YNDX'],'yahoo',start=startdate,end=enddate)

normvol = df.Volume/df.Volume.YNDX.max()

normvol.plot()

goog = df.Close.GOOGL.dropna()
yndx = df.Close.YNDX.dropna()

googret = goog.pct_change()
yndxret = yndx.pct_change()

pd.rolling_corr(googret,yndxret,20).dropna().plot()
pd.rolling_corr(googret,yndxret,250).dropna().plot()

from matplotlib.pyplot import *

goog.plot(alpha=0.45)
pd.rolling_mean(goog,50).plot(color='k')
pd.rolling_mean(goog,250).plot(color='k',linestyle='--')
ax = twinx()
pd.rolling_kurt(goog,250).plot(color='r')

%matplotlib inline
matplotlib.rcParams['figure.dpi'] = 300

data_dir = './data/sicherung_eex_daten/energiespot/'
for filename in os.listdir(data_dir):
    print filename

df = pd.read_excel(data_dir+'energy_intraday_history_2012.xls',sheetname='Intraday-Spot')

df.head()

df = pd.read_excel(data_dir+'energy_intraday_history_2012.xls',sheetname='Intraday-Spot',header=1,index_col=0)        

df.head()

df.columns

df.columns = [column.replace(' ','').replace('\n','') for column in df.columns]

df.columns

df = pd.read_excel(data_dir+'energy_intraday_history_2012.xls',sheetname='Intraday-Spot',\
                   header=1, parse_dates = [['Delivery Day','Hour\nfrom']],index_col=0)        

try:
    del df['Hour\nto']
except:
    pass
df.columns = [column.replace(' ','').replace('\n','') for column in df.columns]

df.head()

df.VolumeMW.plot()

df.index.get_duplicates()

df.ix[df.index.get_duplicates()]

dfgby = df.groupby(df.index).first()
dfgby.ix['2012-10-28 02:00']

def wavg(group):
    w = group['VolumeMW']*group['AveragePriceEUR']
    d = group
    return (d*w).sum()/w.sum()
    
grouped = df.groupby(df.index).apply(wavg)
grouped.ix['2012-10-28 02:00']

df = grouped
df.VolumeMW.plot()

df.AveragePriceEUR.plot()

ts = df.VolumeMW

ts['10/2012'].plot()

ts['10-2012'].plot()

ts['09-09-2012'].plot()

df.sort_index(inplace=True)

df.VolumeMW.plot()

ts = df.VolumeMW
with plt.xkcd():
    ts.asfreq(freq='W').plot()

ts.asfreq(freq='W').plot()

ts.describe()

ts.resample('W-FRI').plot()

ts.resample('W-SUN',how=['mean','max','min']).plot()

resampled = ts.resample('30t')
resampled

resampled.interpolate()

df.resample('D').AveragePriceEUR.plot(style='b')
df.resample('D').VolumeMW.plot(secondary_y=True, style='r', alpha = 0.4)

ax1 = df.resample('D').AveragePriceEUR.plot(style='b')
ax2 = df.resample('D').VolumeMW.plot(secondary_y=True, style='r', alpha = 0.4)
ax1.set_ylabel('Average Price (EUR)')
ax2.set_ylabel('Volume (MW)')

rolling_std(df.AveragePriceEUR.resample('1D'), window = 20).plot()

corr_vol_avgp20 = rolling_corr(df.VolumeMW, df.AveragePriceEUR, window=20)
corr_vol_avgp200 = rolling_corr(df.VolumeMW, df.AveragePriceEUR, window=200)
corr_vol_avgp20.plot(alpha=0.7,label='20day correlation')
corr_vol_avgp200.plot(style='red',label='200day correlation')

rolling_mean(corr_vol_avgp20,window=100).plot()

!ls $data_dir

dseries = {}
for filename in os.listdir(data_dir):
    if 'Konflikt' not in filename and 'energy_intraday' in filename:
        dseries[filename.split('_')[-1][:4]] = pd.read_excel(data_dir+filename,sheetname='Intraday-Spot',\
                                                             header=1, parse_dates = [['Delivery Day','Hour\nfrom']],index_col=0)      

df = concat(dseries.values())

list(df.index.get_duplicates())

df = df.groupby(df.index).first()
df.columns = [column.replace(' ','').replace('\n','') for column in df.columns]

from matplotlib.pyplot import *

df.VolumeMW.asfreq('W-FRI').plot()
df.VolumeMW.asfreq('W-MON').plot()
df.VolumeMW.asfreq('W-SUN').plot()
legend(['FRI','MON','SUN'],loc='best')

df.ix[:,['VolumeMW','HighPriceEUR']].plot(subplots=True)

!head './data/kl_10400_00_akt_txt.txt'

from IPython.display import display, HTML 
HTML('http://www.dwd.de/bvbw/generator/DWDWWW/Content/Oeffentlichkeit/KU/KU2/KU21/klimadaten/german/format__kl,templateId=raw,property=publicationFile.html/format_kl.html')

table_description = pd.read_html('http://www.dwd.de/bvbw/generator/DWDWWW/Content/Oeffentlichkeit/KU/KU2/KU21/klimadaten/german/format__kl,templateId=raw,property=publicationFile.html/format_kl.html',skiprows=1,header=1)[0]

table_description.head()

widths = table_description.iloc[:,8].diff().values
widths

col_names = table_description.ix[:,3]
widths=[2,5,4,2,2,4,5,1,5,1,5,1,5,1,4,1,4,1,3,1,4,1,1,4,1,4,1,4,1,4,1,4,1,1,4,1,1,4,1,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1]
df_temp = pd.read_fwf('./data/kl_10400_00_akt_txt.txt',widths=widths)

df_temp.head()

df_temp = pd.read_fwf('./data/kl_10400_00_akt_txt.txt',widths=widths,header=None,parse_dates = [[2,3,4]], index_col = 0)
df_temp.ix[:5,[14,16,29]]

df_temp = df_temp[[14,16,29]].apply(lambda x: x/10.)
df_temp.head()

df_temp.columns = ['HighTemp','LowTemp','MeanTemp']
df_temp.head()

df_temp.index.name = 'Date'
df_temp.head()

df2 = df.join(df_temp, how='left')

from statsmodels.tsa.api import *

df2['AmpTemp'] = df2.HighTemp-df2.LowTemp
data = df2[['AveragePriceEUR','VolumeMW','AmpTemp']].asfreq('D')
model = VAR(data,missing='drop')     # NaN will produce LinalgError, hence the missing='drop'

results= model.fit()
results.plot()

results.plot_acorr()

lag_order = results.k_ar
results.forecast(data.values[-lag_order:],2)

results.plot_forecast(2)
legend(loc='best')

model = pd.ols(y=df2.AveragePriceEUR, x = df2[['AmpTemp','VolumeMW']])

print(model)

model.y_fitted.plot()
model.y.plot(style='k')
model.y_predict.plot(style='red')

%reload_ext version_information

%version_information numpy, scipy, matplotlib, pandas, statsmodels

df = pd.read_html('lista.html')[6]
df.head()

from IPython.core.display import HTML
def css_styling():
    styles = open("./styles/custom.css", "r").read()
    return HTML(styles)
css_styling()