In [54]:

# special IPython command to prepare the notebook for matplotlib
%matplotlib inline

import numpy as np
import pandas as pd
import math
import scipy
import random
import Quandl
import matplotlib.pyplot as plt

import string

# set some nicer defaults for matplotlib
from matplotlib import rcParams

import scipy.stats as stats

#these colors come from colorbrewer2.org. Each is an RGB triplet
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
                (0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
                (0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
                (0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
                (0.4, 0.6509803921568628, 0.11764705882352941),
                (0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
                (0.6509803921568628, 0.4627450980392157, 0.11372549019607843),
                (0.4, 0.4, 0.4)]

rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.grid'] = True
rcParams['axes.facecolor'] = '#eeeeee'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'none'

In [2]:

authtoken='Cx1CtXeu61zjTzpehmNV'

In [3]:

data = Quandl.get(['BUNDESBANK/BBK01_WT5511.1'], collapse="daily", authtoken=authtoken, trim_start ="January 1 2000", trim_end="December 31 2012", colapse="weekely")
data.columns = ["gold"]
data.head()

Token Cx1CtXeu61zjTzpehmNV activated and saved for later use.
Returning Dataframe for  [u'BUNDESBANK.BBK01_WT5511.1']

Out[3]:

	gold
Date
2000-01-04	282.05
2000-01-05	282.10
2000-01-06	280.35
2000-01-07	282.00
2000-01-10	281.70

In [4]:

data['gold_pdiff'] = float('nan')
data.head()

Out[4]:

	gold	gold_pdiff
Date
2000-01-04	282.05	NaN
2000-01-05	282.10	NaN
2000-01-06	280.35	NaN
2000-01-07	282.00	NaN
2000-01-10	281.70	NaN

In [5]:

for i in range(len(data)):
    if(np.isnan(data['gold'].ix[i])):
        print i, data.ix[i]

In [6]:

for i in range(1, len(data)):
    data['gold_pdiff'].ix[i] = (data['gold'].ix[i] - data['gold'].ix[i-1]) / data['gold'].ix[i-1]

data.head()

Out[6]:

	gold	gold_pdiff
Date
2000-01-04	282.05	NaN
2000-01-05	282.10	0.000177
2000-01-06	280.35	-0.006203
2000-01-07	282.00	0.005886
2000-01-10	281.70	-0.001064

In [7]:

sandp = Quandl.get(['YAHOO/INDEX_GSPC.6'], collapse="daily", authtoken=authtoken, trim_start ="January 1 2000", trim_end="December 31 2012", colapse="weekely")
sandp.columns = ["sandp"]
sandp.head()

Token Cx1CtXeu61zjTzpehmNV activated and saved for later use.
Returning Dataframe for  [u'YAHOO.INDEX_GSPC.6']

Out[7]:

	sandp
Date
2000-01-03	1455.22
2000-01-04	1399.42
2000-01-05	1402.11
2000-01-06	1403.45
2000-01-07	1441.47

In [8]:

sandp['sandp_pdiff'] = float('nan')
sandp.head()

Out[8]:

	sandp	sandp_pdiff
Date
2000-01-03	1455.22	NaN
2000-01-04	1399.42	NaN
2000-01-05	1402.11	NaN
2000-01-06	1403.45	NaN
2000-01-07	1441.47	NaN

In [9]:

for i in range(len(sandp)):
    if(np.isnan(sandp['sandp'].ix[i])):
        print i, sandp.ix[i]

In [10]:

for i in range(1, len(sandp)):
    sandp['sandp_pdiff'].ix[i] = (sandp['sandp'].ix[i] - sandp['sandp'].ix[i-1]) / sandp['sandp'].ix[i-1]

sandp.head()

Out[10]:

	sandp	sandp_pdiff
Date
2000-01-03	1455.22	NaN
2000-01-04	1399.42	-0.038345
2000-01-05	1402.11	0.001922
2000-01-06	1403.45	0.000956
2000-01-07	1441.47	0.027090

In [11]:

fx_euro = Quandl.get(['QUANDL/USDEUR.1'], collapse="daily", authtoken=authtoken, trim_start ="January 1 2000", trim_end="December 31 2012", colapse="weekely")
fx_euro.columns = ["fx_euro"]
fx_euro.head()

Token Cx1CtXeu61zjTzpehmNV activated and saved for later use.
Returning Dataframe for  [u'QUANDL.USDEUR.1']

Out[11]:

	fx_euro
Date
2000-01-03	0.994890
2000-01-04	0.982722
2000-01-05	0.971969
2000-01-06	0.969311
2000-01-07	0.969049

In [12]:

fx_euro['fx_euro_pdiff'] = float('nan')
fx_euro.head()

Out[12]:

	fx_euro	fx_euro_pdiff
Date
2000-01-03	0.994890	NaN
2000-01-04	0.982722	NaN
2000-01-05	0.971969	NaN
2000-01-06	0.969311	NaN
2000-01-07	0.969049	NaN

In [13]:

for i in range(len(sandp)):
    if(np.isnan(fx_euro['fx_euro'].ix[i])):
        print i, fx_euro.ix[i]

for i in range(1, len(fx_euro)):
    fx_euro['fx_euro_pdiff'].ix[i] = (fx_euro['fx_euro'].ix[i] - fx_euro['fx_euro'].ix[i-1]) / fx_euro['fx_euro'].ix[i-1]

sandp.head()

Out[13]:

	sandp	sandp_pdiff
Date
2000-01-03	1455.22	NaN
2000-01-04	1399.42	-0.038345
2000-01-05	1402.11	0.001922
2000-01-06	1403.45	0.000956
2000-01-07	1441.47	0.027090

In [14]:

gsp = pd.concat([data, sandp, fx_euro], axis=1)
gsp.head()

Out[14]:

	gold	gold_pdiff	sandp	sandp_pdiff	fx_euro	fx_euro_pdiff
Date
2000-01-03	NaN	NaN	1455.22	NaN	0.994890	NaN
2000-01-04	282.05	NaN	1399.42	-0.038345	0.982722	-0.012230
2000-01-05	282.10	0.000177	1402.11	0.001922	0.971969	-0.010942
2000-01-06	280.35	-0.006203	1403.45	0.000956	0.969311	-0.002735
2000-01-07	282.00	0.005886	1441.47	0.027090	0.969049	-0.000270

In [15]:

gsp = gsp[~gsp.gold.isnull()]
gsp = gsp[~gsp.gold_pdiff.isnull()]
gsp = gsp[~gsp.sandp.isnull()]
gsp = gsp[~gsp.sandp_pdiff.isnull()]
gsp = gsp[~gsp.fx_euro.isnull()]
gsp = gsp[~gsp.fx_euro_pdiff.isnull()]

gsp.head()

Out[15]:

	gold	gold_pdiff	sandp	sandp_pdiff	fx_euro	fx_euro_pdiff
Date
2000-01-05	282.10	0.000177	1402.11	0.001922	0.971969	-0.010942
2000-01-06	280.35	-0.006203	1403.45	0.000956	0.969311	-0.002735
2000-01-07	282.00	0.005886	1441.47	0.027090	0.969049	-0.000270
2000-01-10	281.70	-0.001064	1457.60	0.011190	0.970752	0.001757
2000-01-11	281.70	0.000000	1438.56	-0.013063	0.973829	0.003170

In [16]:

# print(np.where(~np.isfinite(gsp.gold)))

In [17]:

x = gsp['gold'].values
y = gsp['sandp'].values
plt.scatter(x, y)
m, b = np.polyfit(x, y, 1)
plt.plot(x, np.array(x) * m + b)

Out[17]:

[<matplotlib.lines.Line2D at 0x107c4cc10>]

In [18]:

x = gsp['fx_euro'].values
y = gsp['sandp'].values
plt.scatter(x, y)
m, b = np.polyfit(x, y, 1)
plt.plot(x, np.array(x) * m + b)

Out[18]:

[<matplotlib.lines.Line2D at 0x1086a6150>]

In [19]:

plt.scatter(gsp['fx_euro_pdiff'], gsp['sandp_pdiff'])

Out[19]:

<matplotlib.collections.PathCollection at 0x107da8dd0>

In [25]:

gsp['1_or_0'] = float('nan')
gsp.head()

Out[25]:

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5 entries, 2000-01-05 00:00:00 to 2000-01-11 00:00:00
Data columns (total 7 columns):
gold             5  non-null values
gold_pdiff       5  non-null values
sandp            5  non-null values
sandp_pdiff      5  non-null values
fx_euro          5  non-null values
fx_euro_pdiff    5  non-null values
1_or_0           0  non-null values
dtypes: float64(7)

In [27]:

gsp['1_or_0'][gsp['sandp_pdiff'] > 0] = 1
gsp['1_or_0'][gsp['sandp_pdiff'] <= 0] = 0
print gsp.head()

              gold  gold_pdiff    sandp  sandp_pdiff   fx_euro  fx_euro_pdiff  \
Date                                                                            
2000-01-05  282.10    0.000177  1402.11     0.001922  0.971969      -0.010942   
2000-01-06  280.35   -0.006203  1403.45     0.000956  0.969311      -0.002735   
2000-01-07  282.00    0.005886  1441.47     0.027090  0.969049      -0.000270   
2000-01-10  281.70   -0.001064  1457.60     0.011190  0.970752       0.001757   
2000-01-11  281.70    0.000000  1438.56    -0.013063  0.973829       0.003170   

            1_or_0  
Date                
2000-01-05       1  
2000-01-06       1  
2000-01-07       1  
2000-01-10       1  
2000-01-11       0

In [32]:

In [46]:

nn_prep = []
for i in gsp.iterrows():
    i = i[1]
    nn_prep.append([[i['gold_pdiff'], i['fx_euro_pdiff']], [i['1_or_0']]])

In [48]:

gsp_train = nn_prep[:-50]
gsp_test = nn_prep[-50:]
assert len(gsp) == len(gsp_test) + len(gsp_train)

In [55]:

# Back-Propagation Neural Networks
# 
# Written in Python.  See http://www.python.org/
# Placed in the public domain.
# Neil Schemenauer <nas@arctrix.com>


random.seed(0)

# calculate a random number where:  a <= rand < b
def rand(a, b):
    return (b-a)*random.random() + a

# Make a matrix (we could use NumPy to speed this up)
def makeMatrix(I, J, fill=0.0):
    m = []
    for i in range(I):
        m.append([fill]*J)
    return m

# our sigmoid function, tanh is a little nicer than the standard 1/(1+e^-x)
def sigmoid(x):
    return math.tanh(x)

# derivative of our sigmoid function, in terms of the output (i.e. y)
def dsigmoid(y):
    return 1.0 - y**2

class NN:
    def __init__(self, ni, nh, no):
        # number of input, hidden, and output nodes
        self.ni = ni + 1 # +1 for bias node
        self.nh = nh
        self.no = no

        # activations for nodes
        self.ai = [1.0]*self.ni
        self.ah = [1.0]*self.nh
        self.ao = [1.0]*self.no

        # create weights
        self.wi = makeMatrix(self.ni, self.nh)
        self.wo = makeMatrix(self.nh, self.no)
        # set them to random vaules
        for i in range(self.ni):
            for j in range(self.nh):
                self.wi[i][j] = rand(-0.2, 0.2)
        for j in range(self.nh):
            for k in range(self.no):
                self.wo[j][k] = rand(-2.0, 2.0)

        # last change in weights for momentum   
        self.ci = makeMatrix(self.ni, self.nh)
        self.co = makeMatrix(self.nh, self.no)

    def update(self, inputs):
        if len(inputs) != self.ni-1:
            raise ValueError, 'wrong number of inputs'

        # input activations
        for i in range(self.ni-1):
            #self.ai[i] = sigmoid(inputs[i])
            self.ai[i] = inputs[i]

        # hidden activations
        for j in range(self.nh):
            summ = 0.0
            for i in range(self.ni):
                summ = summ + self.ai[i] * self.wi[i][j]
            self.ah[j] = sigmoid(summ)

        # output activations
        for k in range(self.no):
            summ = 0.0
            for j in range(self.nh):
                summ = summ + self.ah[j] * self.wo[j][k]
            self.ao[k] = sigmoid(summ)

        return self.ao[:]


    def backPropagate(self, targets, N, M):
        if len(targets) != self.no:
            raise ValueError, 'wrong number of target values'

        # calculate error terms for output
        output_deltas = [0.0] * self.no
        for k in range(self.no):
            error = targets[k]-self.ao[k]
            output_deltas[k] = dsigmoid(self.ao[k]) * error

        # calculate error terms for hidden
        hidden_deltas = [0.0] * self.nh
        for j in range(self.nh):
            error = 0.0
            for k in range(self.no):
                error = error + output_deltas[k]*self.wo[j][k]
            hidden_deltas[j] = dsigmoid(self.ah[j]) * error

        # update output weights
        for j in range(self.nh):
            for k in range(self.no):
                change = output_deltas[k]*self.ah[j]
                self.wo[j][k] = self.wo[j][k] + N*change + M*self.co[j][k]
                self.co[j][k] = change
                #print N*change, M*self.co[j][k]

        # update input weights
        for i in range(self.ni):
            for j in range(self.nh):
                change = hidden_deltas[j]*self.ai[i]
                self.wi[i][j] = self.wi[i][j] + N*change + M*self.ci[i][j]
                self.ci[i][j] = change

        # calculate error
        error = 0.0
        for k in range(len(targets)):
            error = error + 0.5*(targets[k]-self.ao[k])**2
        return error


    def test(self, patterns):
        for p in patterns:
            print p[0], '->', self.update(p[0])

    def weights(self):
        print 'Input weights:'
        for i in range(self.ni):
            print self.wi[i]
        print
        print 'Output weights:'
        for j in range(self.nh):
            print self.wo[j]

    def train(self, patterns, iterations=1000, N=0.5, M=0.1):
        # N: learning rate
        # M: momentum factor
        for i in xrange(iterations):
            error = 0.0
            for p in patterns:
                inputs = p[0]
                targets = p[1]
                self.update(inputs)
                error = error + self.backPropagate(targets, N, M)
            if i % 100 == 0:
                print 'error %-14f' % error

In [56]:

def demo():
    # create a network with two input, two hidden, and one output nodes
    n = NN(2, 2, 1)
    # train it with some patterns
    n.train(gsp_train)
    # test it
    n.test(gsp_test)

In [57]:

demo()

error 439.792905    
error 438.780083    
error 438.820909    
error 438.821522    
error 438.819745    
error 438.818208    
error 438.816996    
error 438.816046    
error 438.815300    
error 438.814714    
[-0.0055801974531406494, -0.0011700140272399094] -> [0.48620277993651795]
[0.0058992805755395681, -0.002623642552971176] -> [0.4704945153286617]
[0.00014304105278214847, -0.0070234361109993244] -> [0.47983598517553055]
[-0.0087242562929061782, -0.002565506677766935] -> [0.49068514425335763]
[-0.0044726590679555617, 0.0038850191564856829] -> [0.48347942980057235]
[-0.0046376811594202897, 0.0010128525242347141] -> [0.4844053393634913]
[-0.0049504950495049506, 0.00066759765245553103] -> [0.4849119083803649]
[0.0038045068773778167, 0.0033735492370096883] -> [0.4719730420222759]
[-0.0064139941690962102, 0.0010999571445268915] -> [0.48677027392046485]
[0.0026262036766851473, -0.00036312040607454658] -> [0.47464208905162175]
[0.0030558789289871946, -0.0024018604723314543] -> [0.4745529002737109]
[-0.0087044828086464531, -0.0001464282521002473] -> [0.49009076376770705]
[-0.01712278647738914, 0.0061936557473082918] -> [0.49932253238421104]
[0.0075938058368076235, 0.0050181935276123139] -> [0.4659437390683361]
[0.022905275602187084, 0.00125725549200215] -> [0.44270666536731185]
[-0.0089569488587113548, -0.00021119945932948875] -> [0.4904355741380318]
[0.010349854227405248, 0.0024593931277822594] -> [0.46247115914652404]
[0.0017313518972731207, 0.0036385324287872911] -> [0.47489060073523387]
[-0.0063373181621777328, 0.00075331771556407467] -> [0.48675131452065307]
[-0.00014494854326714017, 0.00096001139298296679] -> [0.47822992542562204]
[-0.00057987822557262973, -0.00097433448000781647] -> [0.47932519287313813]
[-0.007832898172323759, -0.0022137819049713996] -> [0.4894372521749539]
[0.0077485380116959065, -0.00022429068072215734] -> [0.46712459584740723]
[0.0062382126795299583, -0.0011624942640085963] -> [0.4696109182269211]
[-0.004181084198385236, -0.0021401006625760949] -> [0.484556158451352]
[0.0028905911258852437, -0.0037496914689404124] -> [0.4751362737903817]
[0.0072056492289955323, -0.007516055861432498] -> [0.46987165665335806]
[0.0, -0.0025816665459474533] -> [0.4789202335201181]
[-0.0035770496494491343, 0.0] -> [0.48321320302487303]
[-0.0094773118897185527, 0.002236924688905342] -> [0.4905388605935582]
[0.0021745433458973617, -0.00082549138086065435] -> [0.47540596469365004]
[-0.0059308549110371764, -0.0021237058667374241] -> [0.48690057534839476]
[-0.0065483119906868453, -0.0029561572551080688] -> [0.48792017212422023]
[-0.0021971583418778379, -0.0031198060175683909] -> [0.48209546022016964]
[-0.0058719906048150319, -0.002152966367244491] -> [0.4868291399158365]
[0.0023626698168930892, 0.0019888233362249331] -> [0.4744126214862578]
[0.0067766647024160281, 0.0068935257792270129] -> [0.4666610224668308]
[0.00073163593795727241, 0.0038686500577119558] -> [0.47625467400500915]
[0.0016084222839596433, -0.0021070908581194542] -> [0.4765380412383043]
[-0.010364963503649635, -0.0044444617061658257] -> [0.4932415217693953]
[0.0010326006785661601, -0.0039168310600021777] -> [0.4778104231933276]
[-0.0038314176245210726, -0.0057325481052303579] -> [0.48495798944094587]
[0.0056213017751479289, -0.0024370195880701181] -> [0.47085184190337226]
[-0.014710208884966167, -0.00099903913232341109] -> [0.4979413274830795]
[-0.0044789489399820843, -0.0038473143242626173] -> [0.48537253533993147]
[-0.011247750449910018, -0.00092850879107373894] -> [0.4935605216912154]
[0.0086455331412103754, 0.0025815881135740001] -> [0.4650227779089431]
[-0.0043609022556390974, -0.00036138937066810406] -> [0.484365896608805]
[0.0021144842168856668, -0.0023393248019886563] -> [0.4758795759381603]
[0.0031650339110776186, -4.8932866751775121e-05] -> [0.47378804565458604]

In [52]:

3156
50

In [ ]: