from datetime import datetime, time
from os.path import join as pjoin
import pandas as pd
import numpy as np
import os
import matplotlib as mpl
mpl.rc('figure', figsize=(10, 6))
stocks = ['AAPL', 'AMZN', 'COKE', 'GOOG', 'IBM',
'INTC', 'PEP', 'TIVO', 'TRCR', 'WMT']
stocks = ['AAPL']
bar_directory = os.path.expanduser('~/Downloads/minutebars')
def load_bars(stocks):
bars = {}
for stock in stocks:
print stock
df = pd.read_csv('%s/%s.csv' % (bar_directory, stock))
df.index = pd.to_datetime(df.pop('dt'))
bars[stock] = df
return pd.Panel(bars)
data = load_bars(stocks)
# data.major_axis = data.major_axis.tz_localize('utc').tz_convert('US/Eastern')
AAPL
data.major_axis = data.major_axis.shift(1, freq='-5h')
print data
close_px = data.minor_xs('close_price')
<class 'pandas.core.panel.Panel'> Dimensions: 1 (items) x 489597 (major_axis) x 5 (minor_axis) Items axis: AAPL to AAPL Major_axis axis: 2008-01-07 09:31:00 to 2013-01-07 16:00:00 Minor_axis axis: volume to open_price
data
<class 'pandas.core.panel.Panel'> Dimensions: 1 (items) x 489597 (major_axis) x 5 (minor_axis) Items axis: AAPL to AAPL Major_axis axis: 2008-01-07 09:31:00 to 2013-01-07 16:00:00 Minor_axis axis: volume to open_price
pd.value_counts(data.major_axis.hour, sort=False)
9 36540 10 75599 11 75597 12 75600 13 75012 14 75001 15 74998 16 1250
data.AAPL.at_time(time(16, 0)).to_period('D').plot()
<matplotlib.axes.AxesSubplot at 0x112c8a810>
data.ix['AAPL', '2013-01-07 00:00':'2013-01-07 23:59', :].tail()
volume | high | low | close_price | open_price | |
---|---|---|---|---|---|
2013-01-07 15:56:00 | 53724 | 524.72 | 524.13 | 524.143 | 524.47 |
2013-01-07 15:57:00 | 68580 | 524.30 | 523.69 | 523.750 | 524.14 |
2013-01-07 15:58:00 | 62140 | 524.04 | 523.66 | 524.040 | 523.76 |
2013-01-07 15:59:00 | 94058 | 524.09 | 523.60 | 523.780 | 524.04 |
2013-01-07 16:00:00 | 149290 | 524.19 | 523.74 | 523.970 | 523.77 |
data
<class 'pandas.core.panel.Panel'> Dimensions: 1 (items) x 489597 (major_axis) x 5 (minor_axis) Items axis: AAPL to AAPL Major_axis axis: 2008-01-07 09:31:00 to 2013-01-07 16:00:00 Minor_axis axis: volume to open_price
close_px.AAPL.at_time(time(10, 0))
2008-01-07 10:00:00 182.640 2008-01-08 10:00:00 179.400 2008-01-09 10:00:00 172.800 2008-01-10 10:00:00 176.190 2008-01-11 10:00:00 176.430 2008-01-14 10:00:00 176.530 2008-01-15 10:00:00 177.720 2008-01-16 10:00:00 164.760 2008-01-17 10:00:00 163.290 2008-01-18 10:00:00 163.750 2008-01-22 10:00:00 155.280 2008-01-23 10:00:00 137.610 2008-01-24 10:00:00 140.000 2008-01-25 10:00:00 137.340 2008-01-28 10:00:00 129.585 ... 2012-12-14 10:00:00 513.175 2012-12-17 10:00:00 506.150 2012-12-18 10:00:00 521.900 2012-12-19 10:00:00 529.580 2012-12-20 10:00:00 523.980 2012-12-21 10:00:00 512.350 2012-12-24 10:00:00 522.889 2012-12-26 10:00:00 518.330 2012-12-27 10:00:00 511.410 2012-12-28 10:00:00 510.760 2012-12-31 10:00:00 523.930 2013-01-02 10:00:00 552.000 2013-01-03 10:00:00 548.250 2013-01-04 10:00:00 535.890 2013-01-07 10:00:00 516.350 Name: AAPL, Length: 1260
df = close_px.resample('T')
df.resample('D', how=['count', 'mean', 'std']).head()
AAPL | |||
---|---|---|---|
count | mean | std | |
2008-01-07 | 390 | 177.267823 | 2.651974 |
2008-01-08 | 390 | 179.307018 | 2.224237 |
2008-01-09 | 390 | 172.023926 | 2.016931 |
2008-01-10 | 390 | 177.632200 | 1.256401 |
2008-01-11 | 390 | 173.757151 | 2.003267 |
close_px = data.minor_xs('close_price')
open_px = data.minor_xs('open_price')
volume = data.minor_xs('volume')
close_px.ix['2013-01-07'].head()
AAPL | |
---|---|
2013-01-07 09:31:00 | 520.680 |
2013-01-07 09:32:00 | 520.710 |
2013-01-07 09:33:00 | 522.000 |
2013-01-07 09:34:00 | 520.238 |
2013-01-07 09:35:00 | 522.340 |
volume.between_time(time(9, 31), time(10, 0))
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 37800 entries, 2008-01-07 09:31:00 to 2013-01-07 10:00:00 Data columns: AAPL 37800 non-null values dtypes: float64(1)
# Volume by time of day
morning = volume.between_time(time(9, 31), time(12, 30))
afternoon = volume.between_time(time(12, 31), time(15, 50))
last_10 = volume.between_time(time(15, 51), time(16))
volume_hod = volume.ix['2013-01-07'].resample('H', label='right', closed='right')
volume_hod.rename(lambda x: x.strftime('%H:%M %p') % x).plot(kind='bar', rot=0)
<matplotlib.axes.AxesSubplot at 0x1124a1990>
morning.resample('D', how='mean')
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 1828 entries, 2008-01-07 00:00:00 to 2013-01-07 00:00:00 Freq: D Data columns: AAPL 1260 non-null values dtypes: float64(1)
def _agg(df):
return df.resample('D', how='mean').AAPL
volume_tod = pd.DataFrame({
'morning': _agg(morning),
'afternoon': _agg(afternoon),
'last_10': _agg(last_10)
})
volume_tod.resample('M', how='mean').plot()
<matplotlib.axes.AxesSubplot at 0x112730050>
def _normalize(ts):
# inplace
ts.index = ts.index.normalize()
return ts
# Compute overnight returns
bar_open = time(9, 31)
bar_10am = time(10, 0)
bar_last10 = time(15, 51)
bar_close = time(16)
price_open = _normalize(open_px.at_time(bar_open))
price_10am = _normalize(close_px.at_time(bar_10am))
price_last10 = _normalize(open_px.at_time(bar_last10))
price_close = _normalize(close_px.at_time(bar_close))
price_open[-50:]
AAPL | |
---|---|
2012-10-23 | 631.000 |
2012-10-24 | 621.440 |
2012-10-25 | 620.000 |
2012-10-26 | 609.430 |
2012-10-31 | 594.880 |
2012-11-01 | 598.220 |
2012-11-02 | 595.890 |
2012-11-05 | 583.515 |
2012-11-06 | 590.230 |
2012-11-07 | 573.835 |
2012-11-08 | 560.630 |
2012-11-09 | 540.420 |
2012-11-12 | 554.150 |
2012-11-13 | 538.910 |
2012-11-14 | 545.500 |
2012-11-15 | 537.530 |
2012-11-16 | 525.200 |
2012-11-19 | 540.710 |
2012-11-20 | 571.910 |
2012-11-21 | 564.250 |
2012-11-23 | 567.170 |
2012-11-26 | 575.900 |
2012-11-27 | 589.550 |
2012-11-28 | 577.270 |
2012-11-29 | 590.215 |
2012-11-30 | 586.790 |
2012-12-03 | 593.650 |
2012-12-04 | 581.800 |
2012-12-05 | 568.910 |
2012-12-06 | 528.940 |
2012-12-07 | 553.400 |
2012-12-10 | 525.000 |
2012-12-11 | 539.770 |
2012-12-12 | 547.770 |
2012-12-13 | 531.150 |
2012-12-14 | 514.750 |
2012-12-17 | 508.930 |
2012-12-18 | 525.000 |
2012-12-19 | 531.470 |
2012-12-20 | 530.000 |
2012-12-21 | 512.470 |
2012-12-24 | 520.350 |
2012-12-26 | 519.000 |
2012-12-27 | 513.540 |
2012-12-28 | 510.290 |
2012-12-31 | 510.530 |
2013-01-02 | 553.820 |
2013-01-03 | 547.880 |
2013-01-04 | 536.965 |
2013-01-07 | 522.000 |
overnight_returns = price_open / price_close.shift(1) - 1
morning_returns = price_10am / price_open - 1
last10_returns = price_close / price_last10 - 1
overnight_returns.corrwith(morning_returns)
AAPL -0.20188
pd.ols(y=morning_returns.AAPL,
x=overnight_returns)
-------------------------Summary of Regression Analysis------------------------- Formula: Y ~ <AAPL> + <intercept> Number of Observations: 1249 Number of Degrees of Freedom: 2 R-squared: 0.0408 Adj R-squared: 0.0400 Rmse: 0.0093 F-stat (1, 1247): 52.9817, p-value: 0.0000 Degrees of Freedom: model 1, resid 1247 -----------------------Summary of Estimated Coefficients------------------------ Variable Coef Std Err t-stat p-value CI 2.5% CI 97.5% -------------------------------------------------------------------------------- AAPL -0.1341 0.0184 -7.28 0.0000 -0.1702 -0.0980 intercept 0.0003 0.0003 1.14 0.2545 -0.0002 0.0008 ---------------------------------End of Summary---------------------------------
model = pd.ols(y=morning_returns.AAPL, x=overnight_returns, window=60,
min_periods=40)
model.beta['AAPL'].plot()
<matplotlib.axes.AxesSubplot at 0x11229c850>
X = pd.DataFrame({'overnight': overnight_returns.AAPL,
'last10': last10_returns.shift(1).AAPL})
model = pd.ols(y=morning_returns.AAPL, x=X, window=60,
min_periods=40)
model.beta.plot()
<matplotlib.axes.AxesSubplot at 0x11506c750>
Some ideas
def sharpe(rets, annualizer=250):
return rets.mean() / rets.std() * np.sqrt(annualizer)
# Overnight reversal?
buysell = -np.sign(last10_returns.shift(1))
position = price_close.shift(1) * 100 * buysell
# Sell at open
pl = (position * overnight_returns).sum(1)
pl.cumsum().plot()
<matplotlib.axes.AxesSubplot at 0x112671850>
pl.groupby(pl.index.year).agg(sharpe)
2008 1.898333 2009 0.515809 2010 -0.020921 2011 -0.142998 2012 0.240910 2013 3.598240
# Reversal to 10AM?
buysell = -np.sign(last10_returns.shift(1))
position = price_close.shift(1) * 100 * buysell
# Sell at open
pl = (position * (overnight_returns + morning_returns)).sum(1)
pl.cumsum().plot()
<matplotlib.axes.AxesSubplot at 0x112623450>
pl[np.abs(pl) > 1000]
2008-01-23 -1966.780894 2008-07-22 1445.207517 2008-09-29 -1698.251881 2009-12-28 -1057.093661 2010-04-21 1510.990690 2010-05-10 -1460.904416 2010-10-19 1022.378378 2011-01-18 1354.686453 2011-03-15 -1216.277930 2011-04-21 1006.586405 2011-07-20 -1268.663637 2011-08-18 1159.482931 2011-10-10 1198.511786 2011-10-19 -2199.110178 2012-01-25 2890.325227 2012-03-06 1187.964022 2012-03-14 -1311.480754 2012-04-03 -1014.870078 2012-04-16 -1716.270154 2012-04-18 -1012.362462 2012-04-25 5677.879381 2012-04-30 1381.428571 2012-07-05 -1243.921923 2012-07-23 -1242.712651 2012-07-25 2730.147617 2012-07-30 -1156.292425 2012-07-31 -1171.412632 2012-08-27 1126.066222 2012-09-14 1017.735059 2012-10-24 -1137.612127 2012-10-31 1253.180053 2012-11-07 -1371.265917 2012-11-19 -2143.259862 2012-11-26 -1738.918180 2012-12-05 1687.942891 2012-12-11 1262.571401 2012-12-14 1647.043711 2012-12-31 -1447.112804 2013-01-02 1908.848579 2013-01-07 -1071.422701
# Use our model to predict
X = pd.DataFrame({'overnight': overnight_returns.AAPL,
'last10': last10_returns.shift(1).AAPL})
model = pd.ols(y=morning_returns.AAPL, x=X, window=120,
min_periods=40)
pred = model.y_predict
THRESHOLD = 0.001
signal = np.where(np.abs(pred) > THRESHOLD, np.sign(pred), 0)
CASH = 50000
shares = CASH // price_open.AAPL
position = signal * shares * price_open.AAPL
returns = position * morning_returns.AAPL
returns.cumsum().plot()
<matplotlib.axes.AxesSubplot at 0x112291f50>
returns.groupby(returns.index.year).agg(sharpe)
2008 3.157617 2009 0.609950 2010 0.178774 2011 2.009353 2012 0.387223 2013 12.408097