%matplotlib inline
import matplotlib.pyplot as plt
import zipline
from zipline.api import (
add_history,
history,
symbol,
record,
order_target_percent
)
from zipline.utils.factory import load_bars_from_yahoo
from zipline import TradingAlgorithm
import pytz
import numpy as np
import pandas as pd
import statsmodels.api as sm
Where $\epsilon{_t}$ is a normally distributed series with a mean of 0. After a single time step,
So predicting the spread return at time $t$ is the same as predicting the direction of change in a stationary series of mean 0.
EOG Resources (EOG) and Pioneer Natural Resources Company (PXD) are two Texas based independent producers of oil and natural gas.
Due to their similar market risk profiles (oil price sensitivity), PXD and EOG trend with similar trajectories.
# Historical Data from Yahoo
data = load_bars_from_yahoo(
stocks=('EOG', 'PXD', 'SPY'),
start=pd.Timestamp('2011-01-01', tz='utc'),
end=pd.Timestamp.utcnow(),
indexes={}
)
prices = data.minor_xs('price')
log_returns = np.log(prices).diff()
compound_returns = (1 + log_returns).cumprod()
compound_returns.plot(figsize=(14, 6))
EOG PXD SPY
<matplotlib.axes.AxesSubplot at 0x110eeecd0>
Intuition is not enough, so it's good to test for cointegration. The most commonly used test is the Dicky-Fuller cointegration test.
def adf_test(y, X, intercept=True, window=None):
'''
Dicky-Fuller cointegration test
returns the ols obj and the adf test. e.g. (ols, adf)
'''
model = pd.ols(y=y, x=X, intercept=intercept, window=window)
return model, sm.tsa.adfuller(model.resid)
model, adf = adf_test(prices.EOG, prices.PXD, window=20)
spread = prices.EOG - model.beta['x'] * prices.PXD
t_stat = adf[0]
critical_values = adf[4]
print 'Test statistic: ', t_stat
print 'Critical Values:', critical_values
spread.plot(figsize=(14,4))
Test statistic: -11.3864409753 Critical Values: {'5%': -2.8644316780522177, '1%': -3.4368995989062348, '10%': -2.5683096665272789}
<matplotlib.axes.AxesSubplot at 0x113072a10>
# Trading Algorithm
import numpy as np
import statsmodels.api as sm
def initialize(context):
context.x = symbol('EOG')
context.y = symbol('PXD')
context.nobs = 20
add_history(context.nobs, '1d', 'price')
context.tick = 0
def handle_data(context, data):
# Allow the history frame to accumulate data
context.tick += 1
if context.tick < context.nobs:
return
prices = np.log(history(context.nobs, '1d', 'price'))
y = prices[context.y]
X = prices[context.x]
model = sm.OLS(y, sm.add_constant(X)).fit()
hedge_ratio = model.params[context.x]
spread = y - hedge_ratio * X
zscore = (spread.iloc[-1] - spread.mean()) / spread.std()
record(
hedge_ratio=hedge_ratio,
spread=spread.iloc[-1],
zscore=zscore
)
# Get direction of the trades, and place orders
# Re-hedge to a dollar-neutral position daily in the direction of anticipated reversion.
# More traditionally an entry is made when the magnitude of the z-score is large.
side = np.copysign(0.5, zscore)
order_target_percent(context.y, -side)
order_target_percent(context.x, side * model.params[context.x])
algo = TradingAlgorithm(
initialize=initialize,
handle_data=handle_data
)
perf = algo.run(data)
perf.index = perf.index.tz_localize(pytz.utc)
[2015-01-27 04:37:14.000959] INFO: Performance: Simulated 1022 trading days out of 1022. [2015-01-27 04:37:14.001827] INFO: Performance: first open: 2011-01-03 14:31:00+00:00 [2015-01-27 04:37:14.002566] INFO: Performance: last close: 2015-01-26 21:00:00+00:00
perf.spread.plot(figsize=(14,7))
<matplotlib.axes.AxesSubplot at 0x115064390>
compound_pair_returns = (1 + perf.returns).cumprod()
results = compound_returns.to_dict()
results['EOG ~ PXD'] = compound_pair_returns
results = pd.DataFrame(results).ffill()
results[['PXD', 'EOG', 'EOG ~ PXD']].plot(figsize=(14,7))
results[['SPY', 'EOG ~ PXD']].plot(figsize=(14,7))
<matplotlib.axes.AxesSubplot at 0x102990310>
perf[['long_exposure', 'short_exposure', 'net_leverage']].plot(figsize=(14,7))
<matplotlib.axes.AxesSubplot at 0x119ba6990>
risk_report = pd.Panel({
period: pd.DataFrame(algo.risk_report[period])
for period in algo.risk_report
})
print risk_report.minor_axis
risk_report.minor_xs('sharpe')
Index([u'algo_volatility', u'algorithm_period_return', u'alpha', u'benchmark_period_return', u'benchmark_volatility', u'beta', u'excess_return', u'information', u'max_drawdown', u'period_label', u'sharpe', u'sortino', u'trading_days', u'treasury_period_return'], dtype='object')
one_month | six_month | three_month | twelve_month | |
---|---|---|---|---|
0 | 0 | 0.399064 | 0.1751205 | 1.891121 |
1 | -0.2303534 | 0.06056268 | 0.4051947 | 2.081803 |
2 | 0.6720155 | 1.415842 | 1.463952 | 2.175454 |
3 | 0.4959671 | 1.185801 | 0.3694682 | 1.768673 |
4 | 1.17063 | 1.540741 | -0.2464215 | 1.233226 |
5 | -1.118294 | 1.678988 | 0.6653211 | 1.02713 |
6 | -0.5931036 | 1.99515 | 1.174621 | 1.862708 |
7 | 2.033294 | 2.716927 | 2.285227 | 2.53893 |
8 | -0.08176557 | 1.479004 | 1.767422 | 2.25566 |
9 | 1.268055 | 1.181162 | 1.543545 | 2.183615 |
10 | 1.42794 | 0.08121069 | 1.242962 | 1.824025 |
11 | -0.444197 | -0.4190961 | 0.1417493 | 1.130034 |
12 | 0.9053201 | 0.4090032 | 0.04302299 | 1.734644 |
13 | -0.08242315 | 0.5739339 | -0.9432394 | 1.176115 |
14 | -0.4266107 | 1.508455 | -0.6499409 | 1.495482 |
15 | -1.108201 | 1.763178 | 0.5388966 | 1.851867 |
16 | 0.6875907 | 2.659631 | 2.269167 | 2.180863 |
17 | 1.635488 | 2.132488 | 3.292805 | 1.773212 |
18 | 1.369677 | 2.053531 | 1.999118 | 1.333311 |
19 | 2.325417 | 1.059135 | 1.363741 | 1.103552 |
20 | -0.2270935 | 0.5245296 | -0.08113302 | 1.096163 |
21 | 0.395192 | 0.7399767 | 0.8561812 | 1.047849 |
22 | -0.4168844 | 0.2779619 | 0.04189832 | 1.139825 |
23 | 1.6534 | 0.3620893 | 0.8247335 | 1.425947 |
24 | -1.046189 | 0.008985382 | 0.1675946 | 0.9222007 |
25 | 0.7705466 | 0.4913982 | 0.349238 | 1.10515 |
26 | 0.3631654 | 0.9482038 | -0.2755495 | 1.136325 |
27 | -0.9101462 | 0.7065436 | -0.1099011 | 0.9122471 |
28 | -0.05254148 | 1.155817 | 0.3470793 | 1.331254 |
29 | 0.3586327 | 1.499308 | 1.460123 | 1.507445 |
30 | 0.6268109 | 1.255211 | 1.179911 | 1.642905 |
31 | 1.367809 | 0.9888387 | 1.187544 | 1.493357 |
32 | -0.7228139 | 0.5936103 | 0.5557084 | 0.5932982 |
33 | 0.5704613 | 0.5453151 | 0.5665684 | 0.8791801 |
34 | 0.5775891 | 0.6057297 | 0.02198625 | 0.8407119 |
35 | -0.5182931 | 0.481271 | 0.2531073 | 0.4785539 |
36 | -0.1662931 | 0.938908 | 0.164244 | 0.7670049 |
37 | 0.9211245 | 1.052431 | 0.7901755 | 0.5383184 |
38 | -0.7683404 | 0.203883 | 0.4149857 | NaN |
39 | 0.7212907 | 0.6930835 | 1.123166 | NaN |
40 | 0.2959122 | 0.57153 | 0.6666565 | NaN |
41 | 1.000188 | 0.2394808 | -0.2446549 | NaN |
42 | -0.2450061 | 0.2787647 | -0.4324159 | NaN |
43 | -1.004149 | -6.329155e-05 | 0.1321612 | NaN |
44 | 0.807929 | NaN | 0.3495203 | NaN |
45 | 0.7595933 | NaN | 0.4474621 | NaN |
46 | 0.0335916 | NaN | -0.0445592 | NaN |
47 | 0.6829041 | NaN | NaN | NaN |
48 | -1.275649 | NaN | NaN | NaN |
import statsmodels.formula.api as smf
model = smf.ols(formula="EOG ~ PXD", data=np.log(prices)).fit()
model.summary()
Dep. Variable: | EOG | R-squared: | 0.941 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.941 |
Method: | Least Squares | F-statistic: | 1.631e+04 |
Date: | Mon, 26 Jan 2015 | Prob (F-statistic): | 0.00 |
Time: | 20:37:21 | Log-Likelihood: | 1238.5 |
No. Observations: | 1022 | AIC: | -2473. |
Df Residuals: | 1020 | BIC: | -2463. |
Df Model: | 1 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
Intercept | 0.0261 | 0.033 | 0.799 | 0.424 | -0.038 0.090 |
PXD | 0.8583 | 0.007 | 127.723 | 0.000 | 0.845 0.872 |
Omnibus: | 71.465 | Durbin-Watson: | 0.034 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 94.314 |
Skew: | 0.599 | Prob(JB): | 3.31e-21 |
Kurtosis: | 3.883 | Cond. No. | 73.2 |