Backtesting with Zipline:

  • Zipline is the engine that powers both backtesting and live trading on Quantopian.
  • The open source portion of Zipline supports nearly all the features that are available on the Quantopian platform.
    • The major exceptions are features that depend on proprietary data.
  • Zipline supplies functions similar to pandas.io.data.get_data_yahoo to create datasources in the format that it expects. (Main differences are slightly different key names, and dates are in UTC.)
In [1]:
import pandas as pd
import zipline
from zipline import TradingAlgorithm
from zipline.data.loader import load_bars_from_yahoo
In [12]:
# Uncomment and run these lines to create a cache file of benchmarks 
# and historical treasury rates in your ~/.zipline directory.
# You should only have to do this once unless you delete the cache.

# zipline.data.loader.dump_treasury_curves()
# zipline.data.loader.dump_benchmarks('SPY')
In [3]:
start = pd.Timestamp('2008-01-01', tz='UTC')
end = pd.Timestamp('2013-01-01', tz='UTC')

input_data = load_bars_from_yahoo(
    stocks=['AAPL', 'MSFT'],
    start=start,
    end=end,
)
input_data
AAPL
MSFT

Out[3]:
<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 1259 (major_axis) x 6 (minor_axis)
Items axis: AAPL to MSFT
Major_axis axis: 2008-01-02 00:00:00+00:00 to 2012-12-31 00:00:00+00:00
Minor_axis axis: open to price
In [4]:
input_data.loc[:,:,'price'].plot()
Out[4]:
<matplotlib.axes.AxesSubplot at 0x1074ba110>
In [20]:
volumes = input_data.loc[:,:,'volume']
volumes.plot()
Out[20]:
<matplotlib.axes.AxesSubplot at 0x107526350>
In [24]:
# Quarterly volumes.  Resampling is awesome!
volumes.resample('1Q', how='sum').plot(kind='bar', stacked=True)
Out[24]:
<matplotlib.axes.AxesSubplot at 0x1080b4ed0>
In [5]:
# A very simple example algo, using the TradingAlgorithm subclass interface.
class BuyAndHoldAlgorithm(TradingAlgorithm):
    
    def initialize(self):
        self.has_ordered = False
    
    def handle_data(self, data):
        """
        Buy 100 shares of every stock in our universe at the start of 
        the simulation.
        """
        if not self.has_ordered:
            for stock in data:
                self.order(stock, 100)
            self.has_ordered = True
In [6]:
my_algo = BuyAndHoldAlgorithm()
results = my_algo.run(input_data)
[2014-08-01 18:56] INFO: Performance: Simulated 1259 trading days out of 1259.
[2014-08-01 18:56] INFO: Performance: first open: 2008-01-02 14:31:00+00:00
[2014-08-01 18:56] INFO: Performance: last close: 2012-12-31 21:00:00+00:00

In [13]:
# Results has very fine-grained info on what your algorithm did.
# These are the raw values that we used to create our displays
# on Quantopian.
results.columns
Out[13]:
Index([u'capital_used', u'ending_cash', u'ending_value', u'orders', u'period_close', u'period_open', u'pnl', u'portfolio_value', u'positions', u'returns', u'starting_cash', u'starting_value', u'transactions'], dtype='object')
In [16]:
# My algo's positions, on days 0 and 1.
list(results.positions[[0,1]])
Out[16]:
[[],
 [{'amount': 100,
   'cost_basis': 26.6500000000006,
   'last_sale_price': 26.62,
   'sid': 'AAPL'},
  {'amount': 100,
   'cost_basis': 30.160000000012246,
   'last_sale_price': 30.13,
   'sid': 'MSFT'}]]
In [9]:
results.portfolio_value.plot()
Out[9]:
<matplotlib.axes.AxesSubplot at 0x10b9744d0>
In [25]:
%%zipline --symbols=AAPL --start=2009-01-01 --end=2013-01-01 -o outvar
# This is an IPython cell magic.  It's essentially a way to pass the contents
# of a cell into another program.  The %%zipline cell magic runs a a simulation using
# the initialize and handle_data functions defined in the cell, binding its output
# to the name passed to the -o flag.

# Unlike on Quantopian, you need to import magic functions into your namespace.
from zipline.api import (
    add_history,
    history,
    order_target,
    record,
    symbol,
)

def initialize(context):
    # Register 2 histories that track daily prices,
    # one with a 100 window and one with a 300 day window
    add_history(20, '1d', 'price')
    add_history(80, '1d', 'price')

    context.i = 0


def handle_data(context, data):
    # Skip first 300 days to get full windows
    context.i += 1
    if context.i < 80:
        return

    # Compute averages
    # history() has to be called with the same params
    # from above and returns a pandas dataframe with a DatetimeIndex
    # and columns given by the securities in the backtest.
    short_mavg = history(20, '1d', 'price').mean()
    long_mavg = history(80, '1d', 'price').mean()

    # Trading logic
    if short_mavg['AAPL'] > long_mavg['AAPL']:
        # order_target orders as many shares as needed to
        # achieve the desired number of shares.
        order_target('AAPL', 100)
        
    elif short_mavg['AAPL'] < long_mavg['AAPL']:
        order_target('AAPL', 0)

    # Save values for later inspection
    record(AAPL=data['AAPL'].price,
           short_mavg=short_mavg['AAPL'],
           long_mavg=long_mavg['AAPL'])
[2014-08-01 19:08] INFO: Performance: Simulated 1006 trading days out of 1006.
[2014-08-01 19:08] INFO: Performance: first open: 2009-01-02 14:31:00+00:00
[2014-08-01 19:08] INFO: Performance: last close: 2012-12-31 21:00:00+00:00

AAPL

In [26]:
output = outvar.dropna(how='any')

import matplotlib.pyplot as plt
fig = plt.figure()
aapl_subplot = fig.add_subplot('211', xlabel='Date', ylabel='Price')
position_value_subplot = fig.add_subplot('212', xlabel='Date', ylabel='Value')

output['AAPL'].plot(ax=aapl_subplot)
output['portfolio_value'].plot(ax=position_value_subplot)

plt.gcf().set_size_inches(14, 10)