A numerical integration recipe as discussed here.

In [103]:
from scipy import integrate
import pandas as pd
import numpy as np

def integrate_method(self, how='trapz', unit='s'):
'''Numerically integrate the time series.

@param how: the method to use (trapz by default)
@return

Available methods:
* trapz - trapezoidal
* cumtrapz - cumulative trapezoidal
* simps - Simpson's rule
* romb - Romberger's rule

See http://docs.scipy.org/doc/scipy/reference/integrate.html for the method details.
or the source code
'''
available_rules = set(['trapz', 'cumtrapz', 'simps', 'romb'])
if how in available_rules:
rule = integrate.__getattribute__(how)
else:
print('Unsupported integration rule: %s' % (how))
print('Expecting one of these sample-based integration rules: %s' % (str(list(available_rules))))
raise AttributeError

result = rule(self.values, self.index.astype(np.int64) / 10**9)
#result = rule(self.values)
return result

pd.TimeSeries.integrate = integrate_method

# Example of usage¶

Sample data

In [96]:
x = np.abs(np.random.randn(10000))
ts = pd.Series(x, pd.date_range(start='2013-05-03', periods=len(x), freq='s'))

Default args

In [97]:
ts.integrate()
Out[97]:
8023.3962287465547

Different methods:

• Cumulative trapezoidal rule - TODO: maybe convert output to pandas timespans in this case
In [65]:
ts.integrate('cumtrapz')
Out[65]:
array([  1.50979200e+00,   2.36680665e+00,   2.91839588e+00, ...,
7.94300517e+03,   7.94440820e+03,   7.94510764e+03])
• Simpson's rule
In [64]:
ts.integrate('simps')
Out[64]:
7945.0431120912626
• Romberger's rule - number of samples must be one plus a non-negative power of 2.
In [72]:
ts[:1+2**3].integrate('romb')
Out[72]:
array([  8.43726871e+09,   8.43726872e+09,   8.43726873e+09,
8.43726873e+09,   8.43726874e+09,   8.43726875e+09,
8.43726875e+09,   8.43726876e+09,   8.43726876e+09])

## Testing for correctness on predictable series¶

A straight line covering an area of 12 * 2 = 24

In [104]:
twos = pd.Series(2, pd.date_range(start='2012-01-23', periods=13, freq='1s'))
twos.plot()
print(twos.integrate('simps'))
24.0

We should get the same result even if we sampled the data e.g. every 3 seconds

In [105]:
twos_sparse = pd.Series(2, pd.date_range(start='2012-01-23 00:00', end='2012-01-23 00:12', freq='3s'))
twos_sparse.plot()
print(twos_sparse.integrate('simps'))
1440.0

A diagonal, closing a triangle 12 * 12 / 2 = 72

In [89]:
lin = pd.Series(np.arange(13), pd.date_range(start='2012-01-23', periods=13, freq='1s'))
lin.plot()
print(lin.integrate())
72.0

## Benchmarking¶

Overcomes the bottleneck illustrated here

In [61]:
%timeit ts.integrate()
1000 loops, best of 3: 237 µs per loop

# Tests¶

Non-existing method

In [73]:
ts.integrate('homers')
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-73-9795db05ffcd> in <module>()
----> 1 ts.integrate('homers')

<ipython-input-63-e2ac6ebfcf13> in integrate_method(self, how, unit)
24         print('Unsupported integration rule: %s' % (how))
25         print('Expecting one of these sample-based integration rules: %s' % (str(list(available_rules))))
---> 26         raise AttributeError
27
28     #result = integrate.trapz(ts.values, ts.index.astype(np.int64) / 10**9)

AttributeError:
Unsupported integration rule: homers
Expecting one of these sample-based integration rules: ['romb', 'simps', 'trapz', 'cumtrapz']