from datetime import datetime, time from os.path import join as pjoin import pandas as pd import numpy as np import os import matplotlib as mpl mpl.rc('figure', figsize=(10, 6)) stocks = ['AAPL', 'AMZN', 'COKE', 'GOOG', 'IBM', 'INTC', 'PEP', 'TIVO', 'TRCR', 'WMT'] stocks = ['AAPL'] bar_directory = os.path.expanduser('~/Downloads/minutebars') def load_bars(stocks): bars = {} for stock in stocks: print stock df = pd.read_csv('%s/%s.csv' % (bar_directory, stock)) df.index = pd.to_datetime(df.pop('dt')) bars[stock] = df return pd.Panel(bars) data = load_bars(stocks) # data.major_axis = data.major_axis.tz_localize('utc').tz_convert('US/Eastern') data.major_axis = data.major_axis.shift(1, freq='-5h') print data close_px = data.minor_xs('close_price') data pd.value_counts(data.major_axis.hour, sort=False) data.AAPL.at_time(time(16, 0)).to_period('D').plot() data.ix['AAPL', '2013-01-07 00:00':'2013-01-07 23:59', :].tail() data close_px.AAPL.at_time(time(10, 0)) df = close_px.resample('T') df.resample('D', how=['count', 'mean', 'std']).head() close_px = data.minor_xs('close_price') open_px = data.minor_xs('open_price') volume = data.minor_xs('volume') close_px.ix['2013-01-07'].head() volume.between_time(time(9, 31), time(10, 0)) # Volume by time of day morning = volume.between_time(time(9, 31), time(12, 30)) afternoon = volume.between_time(time(12, 31), time(15, 50)) last_10 = volume.between_time(time(15, 51), time(16)) volume_hod = volume.ix['2013-01-07'].resample('H', label='right', closed='right') volume_hod.rename(lambda x: x.strftime('%H:%M %p') % x).plot(kind='bar', rot=0) morning.resample('D', how='mean') def _agg(df): return df.resample('D', how='mean').AAPL volume_tod = pd.DataFrame({ 'morning': _agg(morning), 'afternoon': _agg(afternoon), 'last_10': _agg(last_10) }) volume_tod.resample('M', how='mean').plot() def _normalize(ts): # inplace ts.index = ts.index.normalize() return ts # Compute overnight returns bar_open = time(9, 31) bar_10am = time(10, 0) bar_last10 = time(15, 51) bar_close = time(16) price_open = _normalize(open_px.at_time(bar_open)) price_10am = _normalize(close_px.at_time(bar_10am)) price_last10 = _normalize(open_px.at_time(bar_last10)) price_close = _normalize(close_px.at_time(bar_close)) price_open[-50:] overnight_returns = price_open / price_close.shift(1) - 1 morning_returns = price_10am / price_open - 1 last10_returns = price_close / price_last10 - 1 overnight_returns.corrwith(morning_returns) pd.ols(y=morning_returns.AAPL, x=overnight_returns) model = pd.ols(y=morning_returns.AAPL, x=overnight_returns, window=60, min_periods=40) model.beta['AAPL'].plot() X = pd.DataFrame({'overnight': overnight_returns.AAPL, 'last10': last10_returns.shift(1).AAPL}) model = pd.ols(y=morning_returns.AAPL, x=X, window=60, min_periods=40) model.beta.plot() def sharpe(rets, annualizer=250): return rets.mean() / rets.std() * np.sqrt(annualizer) # Overnight reversal? buysell = -np.sign(last10_returns.shift(1)) position = price_close.shift(1) * 100 * buysell # Sell at open pl = (position * overnight_returns).sum(1) pl.cumsum().plot() pl.groupby(pl.index.year).agg(sharpe) # Reversal to 10AM? buysell = -np.sign(last10_returns.shift(1)) position = price_close.shift(1) * 100 * buysell # Sell at open pl = (position * (overnight_returns + morning_returns)).sum(1) pl.cumsum().plot() pl[np.abs(pl) > 1000] # Use our model to predict X = pd.DataFrame({'overnight': overnight_returns.AAPL, 'last10': last10_returns.shift(1).AAPL}) model = pd.ols(y=morning_returns.AAPL, x=X, window=120, min_periods=40) pred = model.y_predict THRESHOLD = 0.001 signal = np.where(np.abs(pred) > THRESHOLD, np.sign(pred), 0) CASH = 50000 shares = CASH // price_open.AAPL position = signal * shares * price_open.AAPL returns = position * morning_returns.AAPL returns.cumsum().plot() returns.groupby(returns.index.year).agg(sharpe)