%autosave 10

import numpy as np  # this imports the NumPy library

data = np.random.standard_normal((5, 1000))  # generate 5 sets with 1000 rn each
data[:, :5].round(3)  # print first five values of each set rounded to 3 digits

import matplotlib as mpl  # this imports matplotlib
import matplotlib.pyplot as plt  # this imports matplotlib.pyplot
%matplotlib inline
  # inline plotting

plt.hist([data[0], data[1], data[2]], label=['Set 0', 'Set 1', 'Set 2'])
plt.grid(True)  # grid for better readability
plt.legend()

plt.figure()  # initialize figure object
plt.grid(True) 
for data_set in enumerate(data):  # iterate over all rows
    plt.plot(data_set[1].cumsum(), label='Set %s' % data_set[0])
        # plot the running cumulative sums for each row
plt.legend(loc=0)  # write legend with labels

data.mean(axis=1)  # average value of the 5 sets

data.std(axis=1)  # standard deviation of the 5 sets

np.corrcoef(data).round(3)  # correltion matrix of the 5 data sets

import pandas as pd
import pandas.io.data as pdd
from urllib import urlretrieve

index = pdd.DataReader('^GDAXI', data_source='yahoo', start='2007/3/30')
  # e.g. the EURO STOXX 50 ticker symbol -- ^SX5E

index.head(n=5)

index.info()

index.tail()

index['Returns'] = np.log(index['Close'] / index['Close'].shift(1))

index[['Close', 'Returns']].plot(subplots=True, style='b', figsize=(8, 5))

index['Mov_Vol'] = pd.rolling_std(index['Returns'], window=252) * np.sqrt(252)

index[['Close', 'Returns', 'Mov_Vol']].plot(subplots=True, style='b', figsize=(8, 5))

index["42d"] = pd.rolling_mean(index["Close"], window=42)
index["252d"] = pd.rolling_mean(index["Close"], window=252)
index[["Close", "42d", "252d"]].plot(figsize=(8, 5))

index["diff"] = index["42d"] - index["252d"]
index[["Close", "diff"]].plot(subplots=True, figsize=(8, 5))

sigdiff = 100.0

index["Signal"] = np.where(index["diff"] > sigdiff, 1, 0)
index["Signal"] = np.where(index["diff"] < -sigdiff, -1, index["Signal"])
index[["Close", "diff", "Signal"]].plot(subplots=True, figsize=(8, 5))

# !!AI when writing up maybe exclude log, makes it easier to explain
index["Returns"] = np.log(index["Close"] / index["Close"].shift(1))

index["Strategy"] = (index["Signal"] * index["Returns"])
index["Earnings"] = index["Strategy"].cumsum()
index[["Close", "Signal", "Earnings"]].plot(subplots=True, figsize=(10, 8))

import pandas as pd
import datetime as dt
from urllib import urlretrieve

es_url = 'http://www.stoxx.com/download/historical_values/hbrbcpe.txt'
vs_url = 'http://www.stoxx.com/download/historical_values/h_vstoxx.txt'
urlretrieve(es_url, 'es.txt')
urlretrieve(vs_url, 'vs.txt')

lines = open('es.txt').readlines()  # reads the whole file line-by-line

lines[:5]  # header not well formatted

lines[3883:3890]  # from 27.12.2001 additional semi-colon
# look; the format changes half-way in the data set!! An additional semi-colon at end. This will throw off pandas.

# We add an extra "DEL" column so that when we read it in we can delete it after,
# to deal with the additional semi-colon (additional column). Don't forget to
# delete it!

lines = open('es.txt').readlines()  # reads the whole file line-by-line
new_file = open('es50.txt', 'w')  # opens a new file
new_file.writelines('date' + lines[3][:-1].replace(' ', '') + ';DEL' + lines[3][-1])
    # writes the corrected third line (additional column name)
    # of the orginal file as first line of new file
new_file.writelines(lines[4:-1])  # writes the remaining lines of the orginal file

list(open('es50.txt'))[:5]  # opens the new file for inspection

es = pd.read_csv('es50.txt', index_col=0, parse_dates=True, sep=';', dayfirst=True)

del es['DEL']  # delete the helper column

es.info()

vs = pd.read_csv('vs.txt', index_col=0, header=2, parse_dates=True, sep=',', dayfirst=True)

# you can alternatively read from the Web source directly
# without saving the csv file to disk:
# vs = pd.read_csv(vs_url, index_col=0, header=2,
#                  parse_dates=True, sep=',', dayfirst=True)

# Dump EUROSTOXX data that existed before VSTOXX starts, no point
# having it, i.e. all data before 2000-01-01.

import datetime as dt
data = pd.DataFrame({'EUROSTOXX' :
            es['SX5E'][es.index > dt.datetime(1999, 12, 31)]})
data = data.join(pd.DataFrame({'VSTOXX' :
            vs['V2TX'][vs.index > dt.datetime(1999, 12, 31)]}))
data.info()

data.head()

# Confirms stylized theory. When index falls volatility spikes.

data.plot(subplots=True, grid=True, style='b', figsize=(10, 5))

# Log returns helps comparing two different time series in a
# mathematical way. Seems a common pattern.

rets = np.log(data / data.shift(1)) 
rets.head()

xdat = rets['EUROSTOXX']
ydat = rets['VSTOXX']
model = pd.ols(y=ydat, x=xdat)
model

# Again, confirms stylized theory. Highly negative correlation.

plt.plot(xdat, ydat, 'r.')
ax = plt.axis()  # grab axis values
x = np.linspace(ax[0], ax[1] + 0.01)
plt.plot(x, model.beta[1] + model.beta[0] * x, 'b', lw=2)
plt.grid(True)
plt.axis('tight')

mpl_dates = mpl.dates.date2num(rets.index)
plt.figure(figsize=(8, 4))
plt.scatter(rets['EUROSTOXX'], rets['VSTOXX'], c=mpl_dates, marker='o')
plt.grid(True)
plt.xlabel('EUROSTOXX')
plt.ylabel('VSTOXX')
plt.colorbar(ticks=mpl.dates.DayLocator(interval=250),
          format=mpl.dates.DateFormatter('%d %b %y'))

import statsmodels.api as sma
import scipy.stats
rets.head()

r1 = rets["EUROSTOXX"]
print r1.head()
r1.values

rets = rets.dropna()

# This is a benchmark; normally distributed data looks
# like this.

sma.qqplot(np.random.standard_normal(1000), line='s')
pass

# This is qqplot for classic fat tails.

sma.qqplot(rets["EUROSTOXX"].values, line='s')
pass

# This is qqplot for classic fat tails.

sma.qqplot(rets["VSTOXX"].values, line='s')
pass

scipy.stats.normaltest(rets["EUROSTOXX"].values)

scipy.stats.normaltest(rets["VSTOXX"].values)

scipy.stats.shapiro(rets["VSTOXX"].values)

def normality_tests(array):
    print "Skew: %s" % (scipy.stats.skew(array), )
    print "Skew test: %s" % (scipy.stats.skewtest(array), )
    print "Kurt: %s" % (scipy.stats.kurtosis(array), )
    print "Kurt test: %s" % (scipy.stats.kurtosistest(array), )
    print "Normal test: %s" % (scipy.stats.normaltest(array), )

normality_tests(np.random.standard_normal(10000))

normality_tests(rets["VSTOXX"].values)

rets.hist(bins=20, figsize=(10, 5))

data = data.dropna()

# Reindex so we compare like to like
data = data / data.ix[0] * 100

data.head()

invest = 100
cratio = 0.3
data['Equity'] = (1 - cratio) * invest / data['EUROSTOXX'][0]
data['Volatility'] = cratio * invest / data['VSTOXX'][0]

data['Static'] = (data['Equity'] * data['EUROSTOXX']
                + data['Volatility'] * data['VSTOXX'])

# Not amazing, but shows how to start. Wouldn't impress
# an investor.
data[['EUROSTOXX', 'Static']].plot(figsize=(10, 5))

for i in xrange(1, len(data)):
    evalue = data['Equity'][i - 1] * data['EUROSTOXX'][i]
      # value of equity position
    vvalue = data['Volatility'][i - 1] * data['VSTOXX'][i]
      # value of volatility position
    tvalue = evalue + vvalue
      # total wealth 
    data['Equity'][i] = (1 - cratio) * tvalue / data['EUROSTOXX'][i]
      # re-allocation of total wealth to equity ...
    data['Volatility'][i] = cratio * tvalue / data['VSTOXX'][i]
      # ... and volatility position

data['Dynamic'] = (data['Equity'] * data['EUROSTOXX']
                + data['Volatility'] * data['VSTOXX'])

data.head()

(data['Volatility'] * data['VSTOXX'] / data['Dynamic'])[:5]

(data['Equity'] * data['EUROSTOXX'] / data['Dynamic'])[:5]

data[['EUROSTOXX', 'Dynamic']].plot(figsize=(10, 5))

np.linspace(0, 1, num=20)

import scipy.optimize

def my_investment(cratio):
    invest = 100
    data['Equity'] = (1 - cratio) * invest / data['EUROSTOXX'][0]
    data['Volatility'] = cratio * invest / data['VSTOXX'][0]
    for i in xrange(1, len(data)):
        evalue = data['Equity'][i - 1] * data['EUROSTOXX'][i]
          # value of equity position
        vvalue = data['Volatility'][i - 1] * data['VSTOXX'][i]
          # value of volatility position
        tvalue = evalue + vvalue
          # total wealth 
        data['Equity'][i] = (1 - cratio) * tvalue / data['EUROSTOXX'][i]
          # re-allocation of total wealth to equity ...
        data['Volatility'][i] = cratio * tvalue / data['VSTOXX'][i]
          # ... and volatility position
    data['Dynamic'] = (data['Equity'] * data['EUROSTOXX']
                    + data['Volatility'] * data['VSTOXX'])
    return -data["Dynamic"][-1]

# :) yay!
# reference: http://scipy-lectures.github.io/advanced/mathematical_optimization/
#scipy.optimize.brent(my_investment)  # -512.953971939 for 0.488
print my_investment(0.488)

url = 'http://hopey.netfonds.no/posdump.php?'
url += 'date=%s%s%s&paper=AAPL.O&csv_format=csv' % ('2014', '02', '19')
# you may have to adjust the date since only recent dates are available
urlretrieve(url, 'aapl.csv')

AAPL = pd.read_csv('aapl.csv', index_col=0, header=0, parse_dates=True)

AAPL.info()

AAPL['bid'].plot()

AAPL = AAPL[AAPL.index > dt.datetime(2014, 2, 19, 10, 0, 0)]
  # only data later than 10am at that day

# this resamples the record frequency to 5 minutes, using mean as aggregation rule
# and fillna(method='ffill') is "forward fill", use last valid value.
AAPL_5min = AAPL.resample(rule='5min', how='mean').fillna(method='ffill')
AAPL_5min.head()

AAPL_5min['bid'].plot()

#!!AI how does numexpr factor in here?

AAPL_5min['bid'].apply(lambda x: 2 * 540 - x).plot()
  # this mirrors the stock price development at