# special IPython command to prepare the notebook for matplotlib
%matplotlib inline
import numpy as np
import pandas as pd
import math
import scipy
import random
import Quandl
import matplotlib.pyplot as plt
import string
# set some nicer defaults for matplotlib
from matplotlib import rcParams
import scipy.stats as stats
#these colors come from colorbrewer2.org. Each is an RGB triplet
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
(0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
(0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
(0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
(0.4, 0.6509803921568628, 0.11764705882352941),
(0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
(0.6509803921568628, 0.4627450980392157, 0.11372549019607843),
(0.4, 0.4, 0.4)]
rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.grid'] = True
rcParams['axes.facecolor'] = '#eeeeee'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'none'
authtoken='Cx1CtXeu61zjTzpehmNV'
data = Quandl.get(['BUNDESBANK/BBK01_WT5511.1'], collapse="daily", authtoken=authtoken, trim_start ="January 1 2000", trim_end="December 31 2012", colapse="weekely")
data.columns = ["gold"]
data.head()
Token Cx1CtXeu61zjTzpehmNV activated and saved for later use. Returning Dataframe for [u'BUNDESBANK.BBK01_WT5511.1']
gold | |
---|---|
Date | |
2000-01-04 | 282.05 |
2000-01-05 | 282.10 |
2000-01-06 | 280.35 |
2000-01-07 | 282.00 |
2000-01-10 | 281.70 |
data['gold_pdiff'] = float('nan')
data.head()
gold | gold_pdiff | |
---|---|---|
Date | ||
2000-01-04 | 282.05 | NaN |
2000-01-05 | 282.10 | NaN |
2000-01-06 | 280.35 | NaN |
2000-01-07 | 282.00 | NaN |
2000-01-10 | 281.70 | NaN |
for i in range(len(data)):
if(np.isnan(data['gold'].ix[i])):
print i, data.ix[i]
for i in range(1, len(data)):
data['gold_pdiff'].ix[i] = (data['gold'].ix[i] - data['gold'].ix[i-1]) / data['gold'].ix[i-1]
data.head()
gold | gold_pdiff | |
---|---|---|
Date | ||
2000-01-04 | 282.05 | NaN |
2000-01-05 | 282.10 | 0.000177 |
2000-01-06 | 280.35 | -0.006203 |
2000-01-07 | 282.00 | 0.005886 |
2000-01-10 | 281.70 | -0.001064 |
sandp = Quandl.get(['YAHOO/INDEX_GSPC.6'], collapse="daily", authtoken=authtoken, trim_start ="January 1 2000", trim_end="December 31 2012", colapse="weekely")
sandp.columns = ["sandp"]
sandp.head()
Token Cx1CtXeu61zjTzpehmNV activated and saved for later use. Returning Dataframe for [u'YAHOO.INDEX_GSPC.6']
sandp | |
---|---|
Date | |
2000-01-03 | 1455.22 |
2000-01-04 | 1399.42 |
2000-01-05 | 1402.11 |
2000-01-06 | 1403.45 |
2000-01-07 | 1441.47 |
sandp['sandp_pdiff'] = float('nan')
sandp.head()
sandp | sandp_pdiff | |
---|---|---|
Date | ||
2000-01-03 | 1455.22 | NaN |
2000-01-04 | 1399.42 | NaN |
2000-01-05 | 1402.11 | NaN |
2000-01-06 | 1403.45 | NaN |
2000-01-07 | 1441.47 | NaN |
for i in range(len(sandp)):
if(np.isnan(sandp['sandp'].ix[i])):
print i, sandp.ix[i]
for i in range(1, len(sandp)):
sandp['sandp_pdiff'].ix[i] = (sandp['sandp'].ix[i] - sandp['sandp'].ix[i-1]) / sandp['sandp'].ix[i-1]
sandp.head()
sandp | sandp_pdiff | |
---|---|---|
Date | ||
2000-01-03 | 1455.22 | NaN |
2000-01-04 | 1399.42 | -0.038345 |
2000-01-05 | 1402.11 | 0.001922 |
2000-01-06 | 1403.45 | 0.000956 |
2000-01-07 | 1441.47 | 0.027090 |
fx_euro = Quandl.get(['QUANDL/USDEUR.1'], collapse="daily", authtoken=authtoken, trim_start ="January 1 2000", trim_end="December 31 2012", colapse="weekely")
fx_euro.columns = ["fx_euro"]
fx_euro.head()
Token Cx1CtXeu61zjTzpehmNV activated and saved for later use. Returning Dataframe for [u'QUANDL.USDEUR.1']
fx_euro | |
---|---|
Date | |
2000-01-03 | 0.994890 |
2000-01-04 | 0.982722 |
2000-01-05 | 0.971969 |
2000-01-06 | 0.969311 |
2000-01-07 | 0.969049 |
fx_euro['fx_euro_pdiff'] = float('nan')
fx_euro.head()
fx_euro | fx_euro_pdiff | |
---|---|---|
Date | ||
2000-01-03 | 0.994890 | NaN |
2000-01-04 | 0.982722 | NaN |
2000-01-05 | 0.971969 | NaN |
2000-01-06 | 0.969311 | NaN |
2000-01-07 | 0.969049 | NaN |
for i in range(len(sandp)):
if(np.isnan(fx_euro['fx_euro'].ix[i])):
print i, fx_euro.ix[i]
for i in range(1, len(fx_euro)):
fx_euro['fx_euro_pdiff'].ix[i] = (fx_euro['fx_euro'].ix[i] - fx_euro['fx_euro'].ix[i-1]) / fx_euro['fx_euro'].ix[i-1]
sandp.head()
sandp | sandp_pdiff | |
---|---|---|
Date | ||
2000-01-03 | 1455.22 | NaN |
2000-01-04 | 1399.42 | -0.038345 |
2000-01-05 | 1402.11 | 0.001922 |
2000-01-06 | 1403.45 | 0.000956 |
2000-01-07 | 1441.47 | 0.027090 |
gsp = pd.concat([data, sandp, fx_euro], axis=1)
gsp.head()
gold | gold_pdiff | sandp | sandp_pdiff | fx_euro | fx_euro_pdiff | |
---|---|---|---|---|---|---|
Date | ||||||
2000-01-03 | NaN | NaN | 1455.22 | NaN | 0.994890 | NaN |
2000-01-04 | 282.05 | NaN | 1399.42 | -0.038345 | 0.982722 | -0.012230 |
2000-01-05 | 282.10 | 0.000177 | 1402.11 | 0.001922 | 0.971969 | -0.010942 |
2000-01-06 | 280.35 | -0.006203 | 1403.45 | 0.000956 | 0.969311 | -0.002735 |
2000-01-07 | 282.00 | 0.005886 | 1441.47 | 0.027090 | 0.969049 | -0.000270 |
gsp = gsp[~gsp.gold.isnull()]
gsp = gsp[~gsp.gold_pdiff.isnull()]
gsp = gsp[~gsp.sandp.isnull()]
gsp = gsp[~gsp.sandp_pdiff.isnull()]
gsp = gsp[~gsp.fx_euro.isnull()]
gsp = gsp[~gsp.fx_euro_pdiff.isnull()]
gsp.head()
gold | gold_pdiff | sandp | sandp_pdiff | fx_euro | fx_euro_pdiff | |
---|---|---|---|---|---|---|
Date | ||||||
2000-01-05 | 282.10 | 0.000177 | 1402.11 | 0.001922 | 0.971969 | -0.010942 |
2000-01-06 | 280.35 | -0.006203 | 1403.45 | 0.000956 | 0.969311 | -0.002735 |
2000-01-07 | 282.00 | 0.005886 | 1441.47 | 0.027090 | 0.969049 | -0.000270 |
2000-01-10 | 281.70 | -0.001064 | 1457.60 | 0.011190 | 0.970752 | 0.001757 |
2000-01-11 | 281.70 | 0.000000 | 1438.56 | -0.013063 | 0.973829 | 0.003170 |
# print(np.where(~np.isfinite(gsp.gold)))
x = gsp['gold'].values
y = gsp['sandp'].values
plt.scatter(x, y)
m, b = np.polyfit(x, y, 1)
plt.plot(x, np.array(x) * m + b)
[<matplotlib.lines.Line2D at 0x107c4cc10>]
x = gsp['fx_euro'].values
y = gsp['sandp'].values
plt.scatter(x, y)
m, b = np.polyfit(x, y, 1)
plt.plot(x, np.array(x) * m + b)
[<matplotlib.lines.Line2D at 0x1086a6150>]
plt.scatter(gsp['fx_euro_pdiff'], gsp['sandp_pdiff'])
<matplotlib.collections.PathCollection at 0x107da8dd0>
gsp['1_or_0'] = float('nan')
gsp.head()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 5 entries, 2000-01-05 00:00:00 to 2000-01-11 00:00:00 Data columns (total 7 columns): gold 5 non-null values gold_pdiff 5 non-null values sandp 5 non-null values sandp_pdiff 5 non-null values fx_euro 5 non-null values fx_euro_pdiff 5 non-null values 1_or_0 0 non-null values dtypes: float64(7)
gsp['1_or_0'][gsp['sandp_pdiff'] > 0] = 1
gsp['1_or_0'][gsp['sandp_pdiff'] <= 0] = 0
print gsp.head()
gold gold_pdiff sandp sandp_pdiff fx_euro fx_euro_pdiff \ Date 2000-01-05 282.10 0.000177 1402.11 0.001922 0.971969 -0.010942 2000-01-06 280.35 -0.006203 1403.45 0.000956 0.969311 -0.002735 2000-01-07 282.00 0.005886 1441.47 0.027090 0.969049 -0.000270 2000-01-10 281.70 -0.001064 1457.60 0.011190 0.970752 0.001757 2000-01-11 281.70 0.000000 1438.56 -0.013063 0.973829 0.003170 1_or_0 Date 2000-01-05 1 2000-01-06 1 2000-01-07 1 2000-01-10 1 2000-01-11 0
nn_prep = []
for i in gsp.iterrows():
i = i[1]
nn_prep.append([[i['gold_pdiff'], i['fx_euro_pdiff']], [i['1_or_0']]])
gsp_train = nn_prep[:-50]
gsp_test = nn_prep[-50:]
assert len(gsp) == len(gsp_test) + len(gsp_train)
# Back-Propagation Neural Networks
#
# Written in Python. See http://www.python.org/
# Placed in the public domain.
# Neil Schemenauer <nas@arctrix.com>
random.seed(0)
# calculate a random number where: a <= rand < b
def rand(a, b):
return (b-a)*random.random() + a
# Make a matrix (we could use NumPy to speed this up)
def makeMatrix(I, J, fill=0.0):
m = []
for i in range(I):
m.append([fill]*J)
return m
# our sigmoid function, tanh is a little nicer than the standard 1/(1+e^-x)
def sigmoid(x):
return math.tanh(x)
# derivative of our sigmoid function, in terms of the output (i.e. y)
def dsigmoid(y):
return 1.0 - y**2
class NN:
def __init__(self, ni, nh, no):
# number of input, hidden, and output nodes
self.ni = ni + 1 # +1 for bias node
self.nh = nh
self.no = no
# activations for nodes
self.ai = [1.0]*self.ni
self.ah = [1.0]*self.nh
self.ao = [1.0]*self.no
# create weights
self.wi = makeMatrix(self.ni, self.nh)
self.wo = makeMatrix(self.nh, self.no)
# set them to random vaules
for i in range(self.ni):
for j in range(self.nh):
self.wi[i][j] = rand(-0.2, 0.2)
for j in range(self.nh):
for k in range(self.no):
self.wo[j][k] = rand(-2.0, 2.0)
# last change in weights for momentum
self.ci = makeMatrix(self.ni, self.nh)
self.co = makeMatrix(self.nh, self.no)
def update(self, inputs):
if len(inputs) != self.ni-1:
raise ValueError, 'wrong number of inputs'
# input activations
for i in range(self.ni-1):
#self.ai[i] = sigmoid(inputs[i])
self.ai[i] = inputs[i]
# hidden activations
for j in range(self.nh):
summ = 0.0
for i in range(self.ni):
summ = summ + self.ai[i] * self.wi[i][j]
self.ah[j] = sigmoid(summ)
# output activations
for k in range(self.no):
summ = 0.0
for j in range(self.nh):
summ = summ + self.ah[j] * self.wo[j][k]
self.ao[k] = sigmoid(summ)
return self.ao[:]
def backPropagate(self, targets, N, M):
if len(targets) != self.no:
raise ValueError, 'wrong number of target values'
# calculate error terms for output
output_deltas = [0.0] * self.no
for k in range(self.no):
error = targets[k]-self.ao[k]
output_deltas[k] = dsigmoid(self.ao[k]) * error
# calculate error terms for hidden
hidden_deltas = [0.0] * self.nh
for j in range(self.nh):
error = 0.0
for k in range(self.no):
error = error + output_deltas[k]*self.wo[j][k]
hidden_deltas[j] = dsigmoid(self.ah[j]) * error
# update output weights
for j in range(self.nh):
for k in range(self.no):
change = output_deltas[k]*self.ah[j]
self.wo[j][k] = self.wo[j][k] + N*change + M*self.co[j][k]
self.co[j][k] = change
#print N*change, M*self.co[j][k]
# update input weights
for i in range(self.ni):
for j in range(self.nh):
change = hidden_deltas[j]*self.ai[i]
self.wi[i][j] = self.wi[i][j] + N*change + M*self.ci[i][j]
self.ci[i][j] = change
# calculate error
error = 0.0
for k in range(len(targets)):
error = error + 0.5*(targets[k]-self.ao[k])**2
return error
def test(self, patterns):
for p in patterns:
print p[0], '->', self.update(p[0])
def weights(self):
print 'Input weights:'
for i in range(self.ni):
print self.wi[i]
print
print 'Output weights:'
for j in range(self.nh):
print self.wo[j]
def train(self, patterns, iterations=1000, N=0.5, M=0.1):
# N: learning rate
# M: momentum factor
for i in xrange(iterations):
error = 0.0
for p in patterns:
inputs = p[0]
targets = p[1]
self.update(inputs)
error = error + self.backPropagate(targets, N, M)
if i % 100 == 0:
print 'error %-14f' % error
def demo():
# create a network with two input, two hidden, and one output nodes
n = NN(2, 2, 1)
# train it with some patterns
n.train(gsp_train)
# test it
n.test(gsp_test)
demo()
error 439.792905 error 438.780083 error 438.820909 error 438.821522 error 438.819745 error 438.818208 error 438.816996 error 438.816046 error 438.815300 error 438.814714 [-0.0055801974531406494, -0.0011700140272399094] -> [0.48620277993651795] [0.0058992805755395681, -0.002623642552971176] -> [0.4704945153286617] [0.00014304105278214847, -0.0070234361109993244] -> [0.47983598517553055] [-0.0087242562929061782, -0.002565506677766935] -> [0.49068514425335763] [-0.0044726590679555617, 0.0038850191564856829] -> [0.48347942980057235] [-0.0046376811594202897, 0.0010128525242347141] -> [0.4844053393634913] [-0.0049504950495049506, 0.00066759765245553103] -> [0.4849119083803649] [0.0038045068773778167, 0.0033735492370096883] -> [0.4719730420222759] [-0.0064139941690962102, 0.0010999571445268915] -> [0.48677027392046485] [0.0026262036766851473, -0.00036312040607454658] -> [0.47464208905162175] [0.0030558789289871946, -0.0024018604723314543] -> [0.4745529002737109] [-0.0087044828086464531, -0.0001464282521002473] -> [0.49009076376770705] [-0.01712278647738914, 0.0061936557473082918] -> [0.49932253238421104] [0.0075938058368076235, 0.0050181935276123139] -> [0.4659437390683361] [0.022905275602187084, 0.00125725549200215] -> [0.44270666536731185] [-0.0089569488587113548, -0.00021119945932948875] -> [0.4904355741380318] [0.010349854227405248, 0.0024593931277822594] -> [0.46247115914652404] [0.0017313518972731207, 0.0036385324287872911] -> [0.47489060073523387] [-0.0063373181621777328, 0.00075331771556407467] -> [0.48675131452065307] [-0.00014494854326714017, 0.00096001139298296679] -> [0.47822992542562204] [-0.00057987822557262973, -0.00097433448000781647] -> [0.47932519287313813] [-0.007832898172323759, -0.0022137819049713996] -> [0.4894372521749539] [0.0077485380116959065, -0.00022429068072215734] -> [0.46712459584740723] [0.0062382126795299583, -0.0011624942640085963] -> [0.4696109182269211] [-0.004181084198385236, -0.0021401006625760949] -> [0.484556158451352] [0.0028905911258852437, -0.0037496914689404124] -> [0.4751362737903817] [0.0072056492289955323, -0.007516055861432498] -> [0.46987165665335806] [0.0, -0.0025816665459474533] -> [0.4789202335201181] [-0.0035770496494491343, 0.0] -> [0.48321320302487303] [-0.0094773118897185527, 0.002236924688905342] -> [0.4905388605935582] [0.0021745433458973617, -0.00082549138086065435] -> [0.47540596469365004] [-0.0059308549110371764, -0.0021237058667374241] -> [0.48690057534839476] [-0.0065483119906868453, -0.0029561572551080688] -> [0.48792017212422023] [-0.0021971583418778379, -0.0031198060175683909] -> [0.48209546022016964] [-0.0058719906048150319, -0.002152966367244491] -> [0.4868291399158365] [0.0023626698168930892, 0.0019888233362249331] -> [0.4744126214862578] [0.0067766647024160281, 0.0068935257792270129] -> [0.4666610224668308] [0.00073163593795727241, 0.0038686500577119558] -> [0.47625467400500915] [0.0016084222839596433, -0.0021070908581194542] -> [0.4765380412383043] [-0.010364963503649635, -0.0044444617061658257] -> [0.4932415217693953] [0.0010326006785661601, -0.0039168310600021777] -> [0.4778104231933276] [-0.0038314176245210726, -0.0057325481052303579] -> [0.48495798944094587] [0.0056213017751479289, -0.0024370195880701181] -> [0.47085184190337226] [-0.014710208884966167, -0.00099903913232341109] -> [0.4979413274830795] [-0.0044789489399820843, -0.0038473143242626173] -> [0.48537253533993147] [-0.011247750449910018, -0.00092850879107373894] -> [0.4935605216912154] [0.0086455331412103754, 0.0025815881135740001] -> [0.4650227779089431] [-0.0043609022556390974, -0.00036138937066810406] -> [0.484365896608805] [0.0021144842168856668, -0.0023393248019886563] -> [0.4758795759381603] [0.0031650339110776186, -4.8932866751775121e-05] -> [0.47378804565458604]
3156 50