%autosave 10 import numpy as np # this imports the NumPy library data = np.random.standard_normal((5, 1000)) # generate 5 sets with 1000 rn each data[:, :5].round(3) # print first five values of each set rounded to 3 digits import matplotlib as mpl # this imports matplotlib import matplotlib.pyplot as plt # this imports matplotlib.pyplot %matplotlib inline # inline plotting plt.hist([data[0], data[1], data[2]], label=['Set 0', 'Set 1', 'Set 2']) plt.grid(True) # grid for better readability plt.legend() plt.figure() # initialize figure object plt.grid(True) for data_set in enumerate(data): # iterate over all rows plt.plot(data_set[1].cumsum(), label='Set %s' % data_set[0]) # plot the running cumulative sums for each row plt.legend(loc=0) # write legend with labels data.mean(axis=1) # average value of the 5 sets data.std(axis=1) # standard deviation of the 5 sets np.corrcoef(data).round(3) # correltion matrix of the 5 data sets import pandas as pd import pandas.io.data as pdd from urllib import urlretrieve index = pdd.DataReader('^GDAXI', data_source='yahoo', start='2007/3/30') # e.g. the EURO STOXX 50 ticker symbol -- ^SX5E index.head(n=5) index.info() index.tail() index['Returns'] = np.log(index['Close'] / index['Close'].shift(1)) index[['Close', 'Returns']].plot(subplots=True, style='b', figsize=(8, 5)) index['Mov_Vol'] = pd.rolling_std(index['Returns'], window=252) * np.sqrt(252) index[['Close', 'Returns', 'Mov_Vol']].plot(subplots=True, style='b', figsize=(8, 5)) index["42d"] = pd.rolling_mean(index["Close"], window=42) index["252d"] = pd.rolling_mean(index["Close"], window=252) index[["Close", "42d", "252d"]].plot(figsize=(8, 5)) index["diff"] = index["42d"] - index["252d"] index[["Close", "diff"]].plot(subplots=True, figsize=(8, 5)) sigdiff = 100.0 index["Signal"] = np.where(index["diff"] > sigdiff, 1, 0) index["Signal"] = np.where(index["diff"] < -sigdiff, -1, index["Signal"]) index[["Close", "diff", "Signal"]].plot(subplots=True, figsize=(8, 5)) # !!AI when writing up maybe exclude log, makes it easier to explain index["Returns"] = np.log(index["Close"] / index["Close"].shift(1)) index["Strategy"] = (index["Signal"] * index["Returns"]) index["Earnings"] = index["Strategy"].cumsum() index[["Close", "Signal", "Earnings"]].plot(subplots=True, figsize=(10, 8)) import pandas as pd import datetime as dt from urllib import urlretrieve es_url = 'http://www.stoxx.com/download/historical_values/hbrbcpe.txt' vs_url = 'http://www.stoxx.com/download/historical_values/h_vstoxx.txt' urlretrieve(es_url, 'es.txt') urlretrieve(vs_url, 'vs.txt') lines = open('es.txt').readlines() # reads the whole file line-by-line lines[:5] # header not well formatted lines[3883:3890] # from 27.12.2001 additional semi-colon # look; the format changes half-way in the data set!! An additional semi-colon at end. This will throw off pandas. # We add an extra "DEL" column so that when we read it in we can delete it after, # to deal with the additional semi-colon (additional column). Don't forget to # delete it! lines = open('es.txt').readlines() # reads the whole file line-by-line new_file = open('es50.txt', 'w') # opens a new file new_file.writelines('date' + lines[3][:-1].replace(' ', '') + ';DEL' + lines[3][-1]) # writes the corrected third line (additional column name) # of the orginal file as first line of new file new_file.writelines(lines[4:-1]) # writes the remaining lines of the orginal file list(open('es50.txt'))[:5] # opens the new file for inspection es = pd.read_csv('es50.txt', index_col=0, parse_dates=True, sep=';', dayfirst=True) del es['DEL'] # delete the helper column es.info() vs = pd.read_csv('vs.txt', index_col=0, header=2, parse_dates=True, sep=',', dayfirst=True) # you can alternatively read from the Web source directly # without saving the csv file to disk: # vs = pd.read_csv(vs_url, index_col=0, header=2, # parse_dates=True, sep=',', dayfirst=True) # Dump EUROSTOXX data that existed before VSTOXX starts, no point # having it, i.e. all data before 2000-01-01. import datetime as dt data = pd.DataFrame({'EUROSTOXX' : es['SX5E'][es.index > dt.datetime(1999, 12, 31)]}) data = data.join(pd.DataFrame({'VSTOXX' : vs['V2TX'][vs.index > dt.datetime(1999, 12, 31)]})) data.info() data.head() # Confirms stylized theory. When index falls volatility spikes. data.plot(subplots=True, grid=True, style='b', figsize=(10, 5)) # Log returns helps comparing two different time series in a # mathematical way. Seems a common pattern. rets = np.log(data / data.shift(1)) rets.head() xdat = rets['EUROSTOXX'] ydat = rets['VSTOXX'] model = pd.ols(y=ydat, x=xdat) model # Again, confirms stylized theory. Highly negative correlation. plt.plot(xdat, ydat, 'r.') ax = plt.axis() # grab axis values x = np.linspace(ax[0], ax[1] + 0.01) plt.plot(x, model.beta[1] + model.beta[0] * x, 'b', lw=2) plt.grid(True) plt.axis('tight') mpl_dates = mpl.dates.date2num(rets.index) plt.figure(figsize=(8, 4)) plt.scatter(rets['EUROSTOXX'], rets['VSTOXX'], c=mpl_dates, marker='o') plt.grid(True) plt.xlabel('EUROSTOXX') plt.ylabel('VSTOXX') plt.colorbar(ticks=mpl.dates.DayLocator(interval=250), format=mpl.dates.DateFormatter('%d %b %y')) import statsmodels.api as sma import scipy.stats rets.head() r1 = rets["EUROSTOXX"] print r1.head() r1.values rets = rets.dropna() # This is a benchmark; normally distributed data looks # like this. sma.qqplot(np.random.standard_normal(1000), line='s') pass # This is qqplot for classic fat tails. sma.qqplot(rets["EUROSTOXX"].values, line='s') pass # This is qqplot for classic fat tails. sma.qqplot(rets["VSTOXX"].values, line='s') pass scipy.stats.normaltest(rets["EUROSTOXX"].values) scipy.stats.normaltest(rets["VSTOXX"].values) scipy.stats.shapiro(rets["VSTOXX"].values) def normality_tests(array): print "Skew: %s" % (scipy.stats.skew(array), ) print "Skew test: %s" % (scipy.stats.skewtest(array), ) print "Kurt: %s" % (scipy.stats.kurtosis(array), ) print "Kurt test: %s" % (scipy.stats.kurtosistest(array), ) print "Normal test: %s" % (scipy.stats.normaltest(array), ) normality_tests(np.random.standard_normal(10000)) normality_tests(rets["VSTOXX"].values) rets.hist(bins=20, figsize=(10, 5)) data = data.dropna() # Reindex so we compare like to like data = data / data.ix[0] * 100 data.head() invest = 100 cratio = 0.3 data['Equity'] = (1 - cratio) * invest / data['EUROSTOXX'][0] data['Volatility'] = cratio * invest / data['VSTOXX'][0] data['Static'] = (data['Equity'] * data['EUROSTOXX'] + data['Volatility'] * data['VSTOXX']) # Not amazing, but shows how to start. Wouldn't impress # an investor. data[['EUROSTOXX', 'Static']].plot(figsize=(10, 5)) for i in xrange(1, len(data)): evalue = data['Equity'][i - 1] * data['EUROSTOXX'][i] # value of equity position vvalue = data['Volatility'][i - 1] * data['VSTOXX'][i] # value of volatility position tvalue = evalue + vvalue # total wealth data['Equity'][i] = (1 - cratio) * tvalue / data['EUROSTOXX'][i] # re-allocation of total wealth to equity ... data['Volatility'][i] = cratio * tvalue / data['VSTOXX'][i] # ... and volatility position data['Dynamic'] = (data['Equity'] * data['EUROSTOXX'] + data['Volatility'] * data['VSTOXX']) data.head() (data['Volatility'] * data['VSTOXX'] / data['Dynamic'])[:5] (data['Equity'] * data['EUROSTOXX'] / data['Dynamic'])[:5] data[['EUROSTOXX', 'Dynamic']].plot(figsize=(10, 5)) np.linspace(0, 1, num=20) import scipy.optimize def my_investment(cratio): invest = 100 data['Equity'] = (1 - cratio) * invest / data['EUROSTOXX'][0] data['Volatility'] = cratio * invest / data['VSTOXX'][0] for i in xrange(1, len(data)): evalue = data['Equity'][i - 1] * data['EUROSTOXX'][i] # value of equity position vvalue = data['Volatility'][i - 1] * data['VSTOXX'][i] # value of volatility position tvalue = evalue + vvalue # total wealth data['Equity'][i] = (1 - cratio) * tvalue / data['EUROSTOXX'][i] # re-allocation of total wealth to equity ... data['Volatility'][i] = cratio * tvalue / data['VSTOXX'][i] # ... and volatility position data['Dynamic'] = (data['Equity'] * data['EUROSTOXX'] + data['Volatility'] * data['VSTOXX']) return -data["Dynamic"][-1] # :) yay! # reference: http://scipy-lectures.github.io/advanced/mathematical_optimization/ #scipy.optimize.brent(my_investment) # -512.953971939 for 0.488 print my_investment(0.488) url = 'http://hopey.netfonds.no/posdump.php?' url += 'date=%s%s%s&paper=AAPL.O&csv_format=csv' % ('2014', '02', '19') # you may have to adjust the date since only recent dates are available urlretrieve(url, 'aapl.csv') AAPL = pd.read_csv('aapl.csv', index_col=0, header=0, parse_dates=True) AAPL.info() AAPL['bid'].plot() AAPL = AAPL[AAPL.index > dt.datetime(2014, 2, 19, 10, 0, 0)] # only data later than 10am at that day # this resamples the record frequency to 5 minutes, using mean as aggregation rule # and fillna(method='ffill') is "forward fill", use last valid value. AAPL_5min = AAPL.resample(rule='5min', how='mean').fillna(method='ffill') AAPL_5min.head() AAPL_5min['bid'].plot() #!!AI how does numexpr factor in here? AAPL_5min['bid'].apply(lambda x: 2 * 540 - x).plot() # this mirrors the stock price development at