from IPython.display import VimeoVideo VimeoVideo('59324550',width=900,height=768) %matplotlib inline import os from pandas import * import pandas as pd import pandas.io.data as web import datetime import matplotlib import matplotlib.pyplot as plt import statsmodels as sm import seaborn seaborn.set() pd.__version__ startdate = datetime.datetime(2000,1,1) enddate = datetime.datetime.today() df = web.DataReader(['AAPL','GOOGL','TSLA','YNDX'],'yahoo',start=startdate,end=enddate) normvol = df.Volume/df.Volume.YNDX.max() normvol.plot() goog = df.Close.GOOGL.dropna() yndx = df.Close.YNDX.dropna() googret = goog.pct_change() yndxret = yndx.pct_change() pd.rolling_corr(googret,yndxret,20).dropna().plot() pd.rolling_corr(googret,yndxret,250).dropna().plot() from matplotlib.pyplot import * goog.plot(alpha=0.45) pd.rolling_mean(goog,50).plot(color='k') pd.rolling_mean(goog,250).plot(color='k',linestyle='--') ax = twinx() pd.rolling_kurt(goog,250).plot(color='r') %matplotlib inline matplotlib.rcParams['figure.dpi'] = 300 data_dir = './data/sicherung_eex_daten/energiespot/' for filename in os.listdir(data_dir): print filename df = pd.read_excel(data_dir+'energy_intraday_history_2012.xls',sheetname='Intraday-Spot') df.head() df = pd.read_excel(data_dir+'energy_intraday_history_2012.xls',sheetname='Intraday-Spot',header=1,index_col=0) df.head() df.columns df.columns = [column.replace(' ','').replace('\n','') for column in df.columns] df.columns df = pd.read_excel(data_dir+'energy_intraday_history_2012.xls',sheetname='Intraday-Spot',\ header=1, parse_dates = [['Delivery Day','Hour\nfrom']],index_col=0) try: del df['Hour\nto'] except: pass df.columns = [column.replace(' ','').replace('\n','') for column in df.columns] df.head() df.VolumeMW.plot() df.index.get_duplicates() df.ix[df.index.get_duplicates()] dfgby = df.groupby(df.index).first() dfgby.ix['2012-10-28 02:00'] def wavg(group): w = group['VolumeMW']*group['AveragePriceEUR'] d = group return (d*w).sum()/w.sum() grouped = df.groupby(df.index).apply(wavg) grouped.ix['2012-10-28 02:00'] df = grouped df.VolumeMW.plot() df.AveragePriceEUR.plot() ts = df.VolumeMW ts['10/2012'].plot() ts['10-2012'].plot() ts['09-09-2012'].plot() df.sort_index(inplace=True) df.VolumeMW.plot() ts = df.VolumeMW with plt.xkcd(): ts.asfreq(freq='W').plot() ts.asfreq(freq='W').plot() ts.describe() ts.resample('W-FRI').plot() ts.resample('W-SUN',how=['mean','max','min']).plot() resampled = ts.resample('30t') resampled resampled.interpolate() df.resample('D').AveragePriceEUR.plot(style='b') df.resample('D').VolumeMW.plot(secondary_y=True, style='r', alpha = 0.4) ax1 = df.resample('D').AveragePriceEUR.plot(style='b') ax2 = df.resample('D').VolumeMW.plot(secondary_y=True, style='r', alpha = 0.4) ax1.set_ylabel('Average Price (EUR)') ax2.set_ylabel('Volume (MW)') rolling_std(df.AveragePriceEUR.resample('1D'), window = 20).plot() corr_vol_avgp20 = rolling_corr(df.VolumeMW, df.AveragePriceEUR, window=20) corr_vol_avgp200 = rolling_corr(df.VolumeMW, df.AveragePriceEUR, window=200) corr_vol_avgp20.plot(alpha=0.7,label='20day correlation') corr_vol_avgp200.plot(style='red',label='200day correlation') rolling_mean(corr_vol_avgp20,window=100).plot() !ls $data_dir dseries = {} for filename in os.listdir(data_dir): if 'Konflikt' not in filename and 'energy_intraday' in filename: dseries[filename.split('_')[-1][:4]] = pd.read_excel(data_dir+filename,sheetname='Intraday-Spot',\ header=1, parse_dates = [['Delivery Day','Hour\nfrom']],index_col=0) df = concat(dseries.values()) list(df.index.get_duplicates()) df = df.groupby(df.index).first() df.columns = [column.replace(' ','').replace('\n','') for column in df.columns] from matplotlib.pyplot import * df.VolumeMW.asfreq('W-FRI').plot() df.VolumeMW.asfreq('W-MON').plot() df.VolumeMW.asfreq('W-SUN').plot() legend(['FRI','MON','SUN'],loc='best') df.ix[:,['VolumeMW','HighPriceEUR']].plot(subplots=True) !head './data/kl_10400_00_akt_txt.txt' from IPython.display import display, HTML HTML('http://www.dwd.de/bvbw/generator/DWDWWW/Content/Oeffentlichkeit/KU/KU2/KU21/klimadaten/german/format__kl,templateId=raw,property=publicationFile.html/format_kl.html') table_description = pd.read_html('http://www.dwd.de/bvbw/generator/DWDWWW/Content/Oeffentlichkeit/KU/KU2/KU21/klimadaten/german/format__kl,templateId=raw,property=publicationFile.html/format_kl.html',skiprows=1,header=1)[0] table_description.head() widths = table_description.iloc[:,8].diff().values widths col_names = table_description.ix[:,3] widths=[2,5,4,2,2,4,5,1,5,1,5,1,5,1,4,1,4,1,3,1,4,1,1,4,1,4,1,4,1,4,1,4,1,1,4,1,1,4,1,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1] df_temp = pd.read_fwf('./data/kl_10400_00_akt_txt.txt',widths=widths) df_temp.head() df_temp = pd.read_fwf('./data/kl_10400_00_akt_txt.txt',widths=widths,header=None,parse_dates = [[2,3,4]], index_col = 0) df_temp.ix[:5,[14,16,29]] df_temp = df_temp[[14,16,29]].apply(lambda x: x/10.) df_temp.head() df_temp.columns = ['HighTemp','LowTemp','MeanTemp'] df_temp.head() df_temp.index.name = 'Date' df_temp.head() df2 = df.join(df_temp, how='left') from statsmodels.tsa.api import * df2['AmpTemp'] = df2.HighTemp-df2.LowTemp data = df2[['AveragePriceEUR','VolumeMW','AmpTemp']].asfreq('D') model = VAR(data,missing='drop') # NaN will produce LinalgError, hence the missing='drop' results= model.fit() results.plot() results.plot_acorr() lag_order = results.k_ar results.forecast(data.values[-lag_order:],2) results.plot_forecast(2) legend(loc='best') model = pd.ols(y=df2.AveragePriceEUR, x = df2[['AmpTemp','VolumeMW']]) print(model) model.y_fitted.plot() model.y.plot(style='k') model.y_predict.plot(style='red') %reload_ext version_information %version_information numpy, scipy, matplotlib, pandas, statsmodels df = pd.read_html('lista.html')[6] df.head() from IPython.core.display import HTML def css_styling(): styles = open("./styles/custom.css", "r").read() return HTML(styles) css_styling()