Notebook

In [281]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
%pylab inline
from matplotlib import interactive
interactive(False)
from pylab import rcParams

Populating the interactive namespace from numpy and matplotlib

WARNING: pylab import has clobbered these variables: ['intc']
`%matplotlib` prevents importing * from pylab and numpy

In [282]:

#Defining the t-test function to be used later
def print_ttest(x,mu=0,ci=95):
    #performs one sample t-test to check if population mean
    from scipy import stats
    print "one sample t test"
    t,prob = stats.ttest_1samp(x,popmean=mu)
    
    values = {90:1.645,95:1.960,98:2.326,99:2.576}
    
    print "t = ",t, "p value = ",prob
    print "alternative hypothesis: true mean is not equal to 0"
    print "95% confidence interval"
    cl = np.mean(x) - values[ci] * np.std(x)/np.sqrt(len(x))
    cu = np.mean(x) + values[ci] * np.std(x)/np.sqrt(len(x))
    
    print(cl,cu)
    
    print "sample estimate \nmean of data"
    print(np.mean(x))

In [283]:

#intel corporation
data_intel = np.genfromtxt('m-intcsp7309.txt',names=True,dtype=None) #Store the columns into an array 
data_fx = np.genfromtxt('d-useu9910.txt',names=True,dtype=None)
from statsmodels.stats.diagnostic import acorr_ljungbox
#testing for ARCH effects in intc data
intc = np.log(data_intel['intc']+1) #scaling the returns by 1 because otherwise NANs produced
print_ttest(intc) #test to see if the mean of the returns is 0

one sample t test
t =  2.37880962857 p value =  0.0177915101286
alternative hypothesis: true mean is not equal to 0
95% confidence interval
(0.0025357440207990703, 0.026118855765791728)
sample estimate 
mean of data
0.0143272998933

In [284]:

y = intc - np.mean(intc)
acorr_ljungbox(y**2,lags=12)

Out[284]:

(array([  9.41337385,  28.71057885,  45.88699648,  53.79034501,
         59.83982017,  63.32033414,  69.90506298,  72.50820325,
         75.67456962,  77.30833263,  77.34260767,  92.93888403]),
 array([  2.15408510e-03,   5.82877130e-07,   5.99408675e-10,
          5.82242687e-11,   1.31171008e-11,   9.49681345e-12,
          1.54415607e-12,   1.55370752e-12,   1.16276895e-12,
          1.68796711e-12,   4.80635603e-12,   1.32884693e-14]))

In [285]:

from statsmodels.stats.diagnostic import het_arch
het_arch(y,maxlag=12)[2:] #matches the example  on pg 183

Out[285]:

(4.9776070635960048, 9.7421404769149504e-08)

In [286]:

import statsmodels.graphics.tsaplots as tsaplots
rcParams['figure.figsize']=12,4
fig,ax = plt.subplots(1,2)
tsaplots.plot_acf(y**2,lags=20,ax=ax[0]);
tsaplots.plot_pacf(y**2,lags=20,ax=ax[1]);

In [287]:

#Daily Exchange Rate
fxeu = np.log(data_fx['rate'])
eu = np.diff(fxeu)
acorr_ljungbox(eu,lags=20)

Out[287]:

(array([  0.22573787,   1.31682029,   1.3548676 ,   8.82478993,
         12.31810785,  12.80393347,  21.132594  ,  21.89194041,
         22.0978777 ,  23.2623724 ,  23.35866345,  24.07549911,
         28.55037701,  28.88587438,  29.06597768,  29.07089314,
         29.34791341,  29.68705092,  30.16725933,  30.58500374]),
 array([ 0.6347023 ,  0.51767371,  0.71614726,  0.06563125,  0.03067961,
         0.04625734,  0.00357878,  0.00512003,  0.0085739 ,  0.00981847,
         0.01572533,  0.01986534,  0.00757728,  0.01082814,  0.01577281,
         0.02346245,  0.03144965,  0.0406024 ,  0.049708  ,  0.06091293]))

In [288]:

#we can see that the X-squared test statistic is 30.585 for lag 20 with a p-value of 0.0609.  
#Hence we accept that there is no correlation at the 95%ci for just the return series.  Likewise, 
#t.test for sample mean yields that sample mean is 0.  
print_ttest(eu) #Hence we don't subtract sample mean since its 0

one sample t test
t =  0.20216631618 p value =  0.839800750154
alternative hypothesis: true mean is not equal to 0
95% confidence interval
(-0.00021210035327715916, 0.0002608964497447487)
sample estimate 
mean of data
2.43980482338e-05

In [289]:

acorr_ljungbox(eu**2,lags=20) #significant arch effects, p-val = 3.12e-127

Out[289]:

(array([   2.70191354,   81.3425976 ,   88.21001495,  137.28155212,
         150.38872345,  204.79189158,  243.15102464,  256.95741931,
         305.27189578,  326.24785765,  380.23524017,  404.41714103,
         446.31426632,  468.4943095 ,  509.61901733,  546.5464821 ,
         589.00710624,  606.60681459,  615.47946711,  661.45446938]),
 array([  1.00227886e-001,   2.17109766e-018,   5.30859926e-019,
          1.07783759e-028,   1.10351850e-030,   1.81143778e-041,
          7.94169656e-049,   5.76669922e-051,   1.98706379e-060,
          4.33263136e-064,   9.55119354e-075,   4.39105316e-079,
          3.57286156e-087,   4.36200462e-091,   5.21293258e-099,
          4.82964947e-106,   3.02608378e-114,   3.45200110e-117,
          2.72522646e-118,   3.12175039e-127]))

There exists significant serial correlations (ARCH) effects in the return squared. Plot of ACF and PACF of squared returns confirms our tests:

In [290]:

fig,ax = plt.subplots(1,2)
tsaplots.plot_acf(eu**2,lags=20,ax=ax[0]);
tsaplots.plot_pacf(eu**2,lags=20,ax=ax[1]);

In [291]:

het_arch(eu,maxlag=20)[2:] #matches the example  on pg 185 for F-stat only

Out[291]:

(14.726785405690586, 5.1524090137047448e-48)

In [292]:

#model determination (intel corporation)
#looking at the PACF graph of intel, we will choose a ARCH(3) model. Hence:
from arch import arch_model
m1 = arch_model(intc,vol='ARCH',mean='Constant',p=3,q=0).fit()
print(m1.summary())

  NIT    FC           OBJFUN            GNORM
    1     7    -3.025907E+02     2.405864E+02
    2    19    -3.026282E+02     2.493080E+02
    3    27    -3.033069E+02     2.507896E+02
    4    36    -3.033561E+02     1.266148E+02
    5    44    -3.034578E+02     1.889575E+02
    6    52    -3.035684E+02     6.527600E+01
    7    60    -3.036168E+02     4.711018E+01
    8    68    -3.036503E+02     4.028544E+01
    9    75    -3.036637E+02     1.199831E+01
   10    82    -3.036644E+02     6.283745E+00
   11    89    -3.036647E+02     5.394031E+00
   12    96    -3.036647E+02     1.180318E+00
   13   103    -3.036647E+02     5.090194E-02
Optimization terminated successfully.    (Exit mode 0)
            Current function value: -303.66474971
            Iterations: 13
            Function evaluations: 103
            Gradient evaluations: 13
                      Constant Mean - ARCH Model Results                      
==============================================================================
Dep. Variable:                      y   R-squared:                      -0.000
Mean Model:             Constant Mean   Adj. R-squared:                 -0.000
Vol Model:                       ARCH   Log-Likelihood:                303.665
Distribution:                  Normal   AIC:                          -597.329
Method:            Maximum Likelihood   BIC:                          -576.850
                                        No. Observations:                  444
Date:                Fri, Jul 24 2015   Df Residuals:                      439
Time:                        11:50:05   Df Model:                            5
                                  Mean Model                                  
==============================================================================
                 coef    std err          t      P>|t|        95.0% Conf. Int.
------------------------------------------------------------------------------
mu             0.0125  3.560e-05    351.921      0.000   [1.246e-02,1.260e-02]
                               Volatility Model                               
==============================================================================
                 coef    std err          t      P>|t|        95.0% Conf. Int.
------------------------------------------------------------------------------
omega          0.0105  2.916e-06   3601.634      0.000   [1.050e-02,1.051e-02]
alpha[1]       0.2210  2.867e-02      7.707  1.288e-14       [  0.165,  0.277]
alpha[2]       0.0779  2.638e-03     29.522 1.498e-191   [7.271e-02,8.305e-02]
alpha[3]       0.0520  1.189e-03     43.706      0.000   [4.965e-02,5.431e-02]
==============================================================================

Covariance estimator: robust

In [293]:

#same as above.  But here, we separate out model and do it in components.  The anssers from the fit are the same as abve
from arch.univariate import ConstantMean
ar = ConstantMean(intc) # Hence we have the model r_t = mu + a_t
from arch.univariate import ARCH
ar.volatility = ARCH(p=3) #var = alpha_0 + alpha_1 a_{t-1}^2 + alpha_2 a_{t-2}^2 + alpha_3 a_{t-3}^3
print(ar.fit().summary())

  NIT    FC           OBJFUN            GNORM
    1     7    -3.025907E+02     2.405864E+02
    2    19    -3.026282E+02     2.493080E+02
    3    27    -3.033069E+02     2.507896E+02
    4    36    -3.033561E+02     1.266148E+02
    5    44    -3.034578E+02     1.889575E+02
    6    52    -3.035684E+02     6.527600E+01
    7    60    -3.036168E+02     4.711018E+01
    8    68    -3.036503E+02     4.028544E+01
    9    75    -3.036637E+02     1.199831E+01
   10    82    -3.036644E+02     6.283745E+00
   11    89    -3.036647E+02     5.394031E+00
   12    96    -3.036647E+02     1.180318E+00
   13   103    -3.036647E+02     5.090194E-02
Optimization terminated successfully.    (Exit mode 0)
            Current function value: -303.66474971
            Iterations: 13
            Function evaluations: 103
            Gradient evaluations: 13
                      Constant Mean - ARCH Model Results                      
==============================================================================
Dep. Variable:                      y   R-squared:                      -0.000
Mean Model:             Constant Mean   Adj. R-squared:                 -0.000
Vol Model:                       ARCH   Log-Likelihood:                303.665
Distribution:                  Normal   AIC:                          -597.329
Method:            Maximum Likelihood   BIC:                          -576.850
                                        No. Observations:                  444
Date:                Fri, Jul 24 2015   Df Residuals:                      439
Time:                        11:50:05   Df Model:                            5
                                  Mean Model                                  
==============================================================================
                 coef    std err          t      P>|t|        95.0% Conf. Int.
------------------------------------------------------------------------------
mu             0.0125  3.560e-05    351.921      0.000   [1.246e-02,1.260e-02]
                               Volatility Model                               
==============================================================================
                 coef    std err          t      P>|t|        95.0% Conf. Int.
------------------------------------------------------------------------------
omega          0.0105  2.916e-06   3601.634      0.000   [1.050e-02,1.051e-02]
alpha[1]       0.2210  2.867e-02      7.707  1.288e-14       [  0.165,  0.277]
alpha[2]       0.0779  2.638e-03     29.522 1.498e-191   [7.271e-02,8.305e-02]
alpha[3]       0.0520  1.189e-03     43.706      0.000   [4.965e-02,5.431e-02]
==============================================================================

Covariance estimator: robust

In [301]:

#comparing to R's output
fGarch = importr('fGarch')
%load_ext rmagic
import rpy2.robjects as R
import pandas.rpy.common as com
from rpy2.robjects.packages import importr
stats=importr('stats')
TSA = importr('TSA')
r_df = com.convert_to_r_dataframe(pd.DataFrame(intc))
%Rpush r_df
%R intc = ts(r_df)
%R m_check = garchFit(~1+garch(3,0),data=intc,trace=F)
%R summary(m_check)

The rmagic extension is already loaded. To reload it, use:
  %reload_ext rmagic

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~1 + garch(3, 0), data = intc, trace = F) 

Mean and Variance Equation:
 data ~ 1 + garch(3, 0)
<environment: 0x9bc87c0>
 [data = intc]

Conditional Distribution:
 norm 

Coefficient(s):
      mu     omega    alpha1    alpha2    alpha3  
0.012567  0.010421  0.232889  0.075069  0.051994  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu      0.012567    0.005515    2.279   0.0227 *  
omega   0.010421    0.001238    8.418   <2e-16 ***
alpha1  0.232889    0.111541    2.088   0.0368 *  
alpha2  0.075069    0.047305    1.587   0.1125    
alpha3  0.051994    0.045139    1.152   0.2494    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Log Likelihood:
 303.9607    normalized:  0.6845963 

Description:
 Fri Jul 24 12:03:49 2015 by user:  


Standardised Residuals Tests:
                                Statistic p-Value     
 Jarque-Bera Test   R    Chi^2  203.362   0           
 Shapiro-Wilk Test  R    W      0.9635971 4.898647e-09
 Ljung-Box Test     R    Q(10)  9.260782  0.5075463   
 Ljung-Box Test     R    Q(15)  19.36748  0.1975619   
 Ljung-Box Test     R    Q(20)  20.46983  0.4289059   
 Ljung-Box Test     R^2  Q(10)  7.322136  0.6947234   
 Ljung-Box Test     R^2  Q(15)  27.41532  0.02552908  
 Ljung-Box Test     R^2  Q(20)  28.15113  0.1058698   
 LM Arch Test       R    TR^2   25.23347  0.01375447  

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-1.346670 -1.300546 -1.346920 -1.328481

In [294]:

#the parameters of the fitted model are:
m1.std_err

Out[294]:

mu          0.000036
omega       0.000003
alpha[1]    0.028669
alpha[2]    0.002638
alpha[3]    0.001189
Name: std_err, dtype: float64

In [295]:

m2 = arch_model(intc,vol='ARCH',mean='Constant',p=1,q=0).fit()
print(m2.summary())

  NIT    FC           OBJFUN            GNORM
    1     5    -2.993856E+02     4.212538E+02
    2    15    -2.994116E+02     4.071644E+02
    3    23    -2.994913E+02     6.670403E+01
    4    29    -2.996932E+02     1.932478E+01
    5    34    -2.996935E+02     3.827238E+00
    6    39    -2.996935E+02     1.166247E-01
Optimization terminated successfully.    (Exit mode 0)
            Current function value: -299.693511326
            Iterations: 6
            Function evaluations: 39
            Gradient evaluations: 6
                      Constant Mean - ARCH Model Results                      
==============================================================================
Dep. Variable:                      y   R-squared:                      -0.000
Mean Model:             Constant Mean   Adj. R-squared:                 -0.000
Vol Model:                       ARCH   Log-Likelihood:                299.694
Distribution:                  Normal   AIC:                          -593.387
Method:            Maximum Likelihood   BIC:                          -581.100
                                        No. Observations:                  444
Date:                Fri, Jul 24 2015   Df Residuals:                      441
Time:                        11:50:06   Df Model:                            3
                                  Mean Model                                  
==============================================================================
                 coef    std err          t      P>|t|        95.0% Conf. Int.
------------------------------------------------------------------------------
mu             0.0131  3.311e-05    396.717      0.000   [1.307e-02,1.320e-02]
                               Volatility Model                               
==============================================================================
                 coef    std err          t      P>|t|        95.0% Conf. Int.
------------------------------------------------------------------------------
omega          0.0111  2.272e-06   4880.529      0.000   [1.108e-02,1.109e-02]
alpha[1]       0.3700  2.835e-02     13.051  6.299e-39       [  0.314,  0.426]
==============================================================================

Covariance estimator: robust

In [300]:

f=m2.plot(annualize='M'); #plotting the standardized residuals and conditional volatility
f;

In [ ]:

In [185]:

'''%load_ext rmagic
import rpy2.robjects as R
import pandas.rpy.common as com
from rpy2.robjects.packages import importr
stats = importr('stats')
tsa = importr('TSA')
r_df = com.convert_to_r_dataframe(pd.DataFrame(y))
%Rpush r_df
%R y_intel = ts(r_df)
%R pacf(y_intel^2)'''

Out[185]:

"%load_ext rmagic\nimport rpy2.robjects as R\nimport pandas.rpy.common as com\nfrom rpy2.robjects.packages import importr\nstats = importr('stats')\ntsa = importr('TSA')\nr_df = com.convert_to_r_dataframe(pd.DataFrame(y))\n%Rpush r_df\n%R y_intel = ts(r_df)\n%R pacf(y_intel^2)"

In [180]:

Error in withVisible({ : object 'y_intel' not found

In [ ]: