import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
%pylab inline
from matplotlib import interactive
interactive(False)
from pylab import rcParams
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['intc'] `%matplotlib` prevents importing * from pylab and numpy
#Defining the t-test function to be used later
def print_ttest(x,mu=0,ci=95):
#performs one sample t-test to check if population mean
from scipy import stats
print "one sample t test"
t,prob = stats.ttest_1samp(x,popmean=mu)
values = {90:1.645,95:1.960,98:2.326,99:2.576}
print "t = ",t, "p value = ",prob
print "alternative hypothesis: true mean is not equal to 0"
print "95% confidence interval"
cl = np.mean(x) - values[ci] * np.std(x)/np.sqrt(len(x))
cu = np.mean(x) + values[ci] * np.std(x)/np.sqrt(len(x))
print(cl,cu)
print "sample estimate \nmean of data"
print(np.mean(x))
#intel corporation
data_intel = np.genfromtxt('m-intcsp7309.txt',names=True,dtype=None) #Store the columns into an array
data_fx = np.genfromtxt('d-useu9910.txt',names=True,dtype=None)
from statsmodels.stats.diagnostic import acorr_ljungbox
#testing for ARCH effects in intc data
intc = np.log(data_intel['intc']+1) #scaling the returns by 1 because otherwise NANs produced
print_ttest(intc) #test to see if the mean of the returns is 0
one sample t test t = 2.37880962857 p value = 0.0177915101286 alternative hypothesis: true mean is not equal to 0 95% confidence interval (0.0025357440207990703, 0.026118855765791728) sample estimate mean of data 0.0143272998933
y = intc - np.mean(intc)
acorr_ljungbox(y**2,lags=12)
(array([ 9.41337385, 28.71057885, 45.88699648, 53.79034501, 59.83982017, 63.32033414, 69.90506298, 72.50820325, 75.67456962, 77.30833263, 77.34260767, 92.93888403]), array([ 2.15408510e-03, 5.82877130e-07, 5.99408675e-10, 5.82242687e-11, 1.31171008e-11, 9.49681345e-12, 1.54415607e-12, 1.55370752e-12, 1.16276895e-12, 1.68796711e-12, 4.80635603e-12, 1.32884693e-14]))
from statsmodels.stats.diagnostic import het_arch
het_arch(y,maxlag=12)[2:] #matches the example on pg 183
(4.9776070635960048, 9.7421404769149504e-08)
import statsmodels.graphics.tsaplots as tsaplots
rcParams['figure.figsize']=12,4
fig,ax = plt.subplots(1,2)
tsaplots.plot_acf(y**2,lags=20,ax=ax[0]);
tsaplots.plot_pacf(y**2,lags=20,ax=ax[1]);
#Daily Exchange Rate
fxeu = np.log(data_fx['rate'])
eu = np.diff(fxeu)
acorr_ljungbox(eu,lags=20)
(array([ 0.22573787, 1.31682029, 1.3548676 , 8.82478993, 12.31810785, 12.80393347, 21.132594 , 21.89194041, 22.0978777 , 23.2623724 , 23.35866345, 24.07549911, 28.55037701, 28.88587438, 29.06597768, 29.07089314, 29.34791341, 29.68705092, 30.16725933, 30.58500374]), array([ 0.6347023 , 0.51767371, 0.71614726, 0.06563125, 0.03067961, 0.04625734, 0.00357878, 0.00512003, 0.0085739 , 0.00981847, 0.01572533, 0.01986534, 0.00757728, 0.01082814, 0.01577281, 0.02346245, 0.03144965, 0.0406024 , 0.049708 , 0.06091293]))
#we can see that the X-squared test statistic is 30.585 for lag 20 with a p-value of 0.0609.
#Hence we accept that there is no correlation at the 95%ci for just the return series. Likewise,
#t.test for sample mean yields that sample mean is 0.
print_ttest(eu) #Hence we don't subtract sample mean since its 0
one sample t test t = 0.20216631618 p value = 0.839800750154 alternative hypothesis: true mean is not equal to 0 95% confidence interval (-0.00021210035327715916, 0.0002608964497447487) sample estimate mean of data 2.43980482338e-05
acorr_ljungbox(eu**2,lags=20) #significant arch effects, p-val = 3.12e-127
(array([ 2.70191354, 81.3425976 , 88.21001495, 137.28155212, 150.38872345, 204.79189158, 243.15102464, 256.95741931, 305.27189578, 326.24785765, 380.23524017, 404.41714103, 446.31426632, 468.4943095 , 509.61901733, 546.5464821 , 589.00710624, 606.60681459, 615.47946711, 661.45446938]), array([ 1.00227886e-001, 2.17109766e-018, 5.30859926e-019, 1.07783759e-028, 1.10351850e-030, 1.81143778e-041, 7.94169656e-049, 5.76669922e-051, 1.98706379e-060, 4.33263136e-064, 9.55119354e-075, 4.39105316e-079, 3.57286156e-087, 4.36200462e-091, 5.21293258e-099, 4.82964947e-106, 3.02608378e-114, 3.45200110e-117, 2.72522646e-118, 3.12175039e-127]))
There exists significant serial correlations (ARCH) effects in the return squared. Plot of ACF and PACF of squared returns confirms our tests:
fig,ax = plt.subplots(1,2)
tsaplots.plot_acf(eu**2,lags=20,ax=ax[0]);
tsaplots.plot_pacf(eu**2,lags=20,ax=ax[1]);
het_arch(eu,maxlag=20)[2:] #matches the example on pg 185 for F-stat only
(14.726785405690586, 5.1524090137047448e-48)
#model determination (intel corporation)
#looking at the PACF graph of intel, we will choose a ARCH(3) model. Hence:
from arch import arch_model
m1 = arch_model(intc,vol='ARCH',mean='Constant',p=3,q=0).fit()
print(m1.summary())
NIT FC OBJFUN GNORM 1 7 -3.025907E+02 2.405864E+02 2 19 -3.026282E+02 2.493080E+02 3 27 -3.033069E+02 2.507896E+02 4 36 -3.033561E+02 1.266148E+02 5 44 -3.034578E+02 1.889575E+02 6 52 -3.035684E+02 6.527600E+01 7 60 -3.036168E+02 4.711018E+01 8 68 -3.036503E+02 4.028544E+01 9 75 -3.036637E+02 1.199831E+01 10 82 -3.036644E+02 6.283745E+00 11 89 -3.036647E+02 5.394031E+00 12 96 -3.036647E+02 1.180318E+00 13 103 -3.036647E+02 5.090194E-02 Optimization terminated successfully. (Exit mode 0) Current function value: -303.66474971 Iterations: 13 Function evaluations: 103 Gradient evaluations: 13 Constant Mean - ARCH Model Results ============================================================================== Dep. Variable: y R-squared: -0.000 Mean Model: Constant Mean Adj. R-squared: -0.000 Vol Model: ARCH Log-Likelihood: 303.665 Distribution: Normal AIC: -597.329 Method: Maximum Likelihood BIC: -576.850 No. Observations: 444 Date: Fri, Jul 24 2015 Df Residuals: 439 Time: 11:50:05 Df Model: 5 Mean Model ============================================================================== coef std err t P>|t| 95.0% Conf. Int. ------------------------------------------------------------------------------ mu 0.0125 3.560e-05 351.921 0.000 [1.246e-02,1.260e-02] Volatility Model ============================================================================== coef std err t P>|t| 95.0% Conf. Int. ------------------------------------------------------------------------------ omega 0.0105 2.916e-06 3601.634 0.000 [1.050e-02,1.051e-02] alpha[1] 0.2210 2.867e-02 7.707 1.288e-14 [ 0.165, 0.277] alpha[2] 0.0779 2.638e-03 29.522 1.498e-191 [7.271e-02,8.305e-02] alpha[3] 0.0520 1.189e-03 43.706 0.000 [4.965e-02,5.431e-02] ============================================================================== Covariance estimator: robust
#same as above. But here, we separate out model and do it in components. The anssers from the fit are the same as abve
from arch.univariate import ConstantMean
ar = ConstantMean(intc) # Hence we have the model r_t = mu + a_t
from arch.univariate import ARCH
ar.volatility = ARCH(p=3) #var = alpha_0 + alpha_1 a_{t-1}^2 + alpha_2 a_{t-2}^2 + alpha_3 a_{t-3}^3
print(ar.fit().summary())
NIT FC OBJFUN GNORM 1 7 -3.025907E+02 2.405864E+02 2 19 -3.026282E+02 2.493080E+02 3 27 -3.033069E+02 2.507896E+02 4 36 -3.033561E+02 1.266148E+02 5 44 -3.034578E+02 1.889575E+02 6 52 -3.035684E+02 6.527600E+01 7 60 -3.036168E+02 4.711018E+01 8 68 -3.036503E+02 4.028544E+01 9 75 -3.036637E+02 1.199831E+01 10 82 -3.036644E+02 6.283745E+00 11 89 -3.036647E+02 5.394031E+00 12 96 -3.036647E+02 1.180318E+00 13 103 -3.036647E+02 5.090194E-02 Optimization terminated successfully. (Exit mode 0) Current function value: -303.66474971 Iterations: 13 Function evaluations: 103 Gradient evaluations: 13 Constant Mean - ARCH Model Results ============================================================================== Dep. Variable: y R-squared: -0.000 Mean Model: Constant Mean Adj. R-squared: -0.000 Vol Model: ARCH Log-Likelihood: 303.665 Distribution: Normal AIC: -597.329 Method: Maximum Likelihood BIC: -576.850 No. Observations: 444 Date: Fri, Jul 24 2015 Df Residuals: 439 Time: 11:50:05 Df Model: 5 Mean Model ============================================================================== coef std err t P>|t| 95.0% Conf. Int. ------------------------------------------------------------------------------ mu 0.0125 3.560e-05 351.921 0.000 [1.246e-02,1.260e-02] Volatility Model ============================================================================== coef std err t P>|t| 95.0% Conf. Int. ------------------------------------------------------------------------------ omega 0.0105 2.916e-06 3601.634 0.000 [1.050e-02,1.051e-02] alpha[1] 0.2210 2.867e-02 7.707 1.288e-14 [ 0.165, 0.277] alpha[2] 0.0779 2.638e-03 29.522 1.498e-191 [7.271e-02,8.305e-02] alpha[3] 0.0520 1.189e-03 43.706 0.000 [4.965e-02,5.431e-02] ============================================================================== Covariance estimator: robust
#comparing to R's output
fGarch = importr('fGarch')
%load_ext rmagic
import rpy2.robjects as R
import pandas.rpy.common as com
from rpy2.robjects.packages import importr
stats=importr('stats')
TSA = importr('TSA')
r_df = com.convert_to_r_dataframe(pd.DataFrame(intc))
%Rpush r_df
%R intc = ts(r_df)
%R m_check = garchFit(~1+garch(3,0),data=intc,trace=F)
%R summary(m_check)
The rmagic extension is already loaded. To reload it, use: %reload_ext rmagic
Title: GARCH Modelling Call: garchFit(formula = ~1 + garch(3, 0), data = intc, trace = F) Mean and Variance Equation: data ~ 1 + garch(3, 0) <environment: 0x9bc87c0> [data = intc] Conditional Distribution: norm Coefficient(s): mu omega alpha1 alpha2 alpha3 0.012567 0.010421 0.232889 0.075069 0.051994 Std. Errors: based on Hessian Error Analysis: Estimate Std. Error t value Pr(>|t|) mu 0.012567 0.005515 2.279 0.0227 * omega 0.010421 0.001238 8.418 <2e-16 *** alpha1 0.232889 0.111541 2.088 0.0368 * alpha2 0.075069 0.047305 1.587 0.1125 alpha3 0.051994 0.045139 1.152 0.2494 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Log Likelihood: 303.9607 normalized: 0.6845963 Description: Fri Jul 24 12:03:49 2015 by user: Standardised Residuals Tests: Statistic p-Value Jarque-Bera Test R Chi^2 203.362 0 Shapiro-Wilk Test R W 0.9635971 4.898647e-09 Ljung-Box Test R Q(10) 9.260782 0.5075463 Ljung-Box Test R Q(15) 19.36748 0.1975619 Ljung-Box Test R Q(20) 20.46983 0.4289059 Ljung-Box Test R^2 Q(10) 7.322136 0.6947234 Ljung-Box Test R^2 Q(15) 27.41532 0.02552908 Ljung-Box Test R^2 Q(20) 28.15113 0.1058698 LM Arch Test R TR^2 25.23347 0.01375447 Information Criterion Statistics: AIC BIC SIC HQIC -1.346670 -1.300546 -1.346920 -1.328481
#the parameters of the fitted model are:
m1.std_err
mu 0.000036 omega 0.000003 alpha[1] 0.028669 alpha[2] 0.002638 alpha[3] 0.001189 Name: std_err, dtype: float64
m2 = arch_model(intc,vol='ARCH',mean='Constant',p=1,q=0).fit()
print(m2.summary())
NIT FC OBJFUN GNORM 1 5 -2.993856E+02 4.212538E+02 2 15 -2.994116E+02 4.071644E+02 3 23 -2.994913E+02 6.670403E+01 4 29 -2.996932E+02 1.932478E+01 5 34 -2.996935E+02 3.827238E+00 6 39 -2.996935E+02 1.166247E-01 Optimization terminated successfully. (Exit mode 0) Current function value: -299.693511326 Iterations: 6 Function evaluations: 39 Gradient evaluations: 6 Constant Mean - ARCH Model Results ============================================================================== Dep. Variable: y R-squared: -0.000 Mean Model: Constant Mean Adj. R-squared: -0.000 Vol Model: ARCH Log-Likelihood: 299.694 Distribution: Normal AIC: -593.387 Method: Maximum Likelihood BIC: -581.100 No. Observations: 444 Date: Fri, Jul 24 2015 Df Residuals: 441 Time: 11:50:06 Df Model: 3 Mean Model ============================================================================== coef std err t P>|t| 95.0% Conf. Int. ------------------------------------------------------------------------------ mu 0.0131 3.311e-05 396.717 0.000 [1.307e-02,1.320e-02] Volatility Model ============================================================================== coef std err t P>|t| 95.0% Conf. Int. ------------------------------------------------------------------------------ omega 0.0111 2.272e-06 4880.529 0.000 [1.108e-02,1.109e-02] alpha[1] 0.3700 2.835e-02 13.051 6.299e-39 [ 0.314, 0.426] ============================================================================== Covariance estimator: robust
f=m2.plot(annualize='M'); #plotting the standardized residuals and conditional volatility
f;
'''%load_ext rmagic
import rpy2.robjects as R
import pandas.rpy.common as com
from rpy2.robjects.packages import importr
stats = importr('stats')
tsa = importr('TSA')
r_df = com.convert_to_r_dataframe(pd.DataFrame(y))
%Rpush r_df
%R y_intel = ts(r_df)
%R pacf(y_intel^2)'''
"%load_ext rmagic\nimport rpy2.robjects as R\nimport pandas.rpy.common as com\nfrom rpy2.robjects.packages import importr\nstats = importr('stats')\ntsa = importr('TSA')\nr_df = com.convert_to_r_dataframe(pd.DataFrame(y))\n%Rpush r_df\n%R y_intel = ts(r_df)\n%R pacf(y_intel^2)"
Error in withVisible({ : object 'y_intel' not found