from __future__ import division
from timeit import timeit
from biom import load_table
import pandas as pd
from glob import glob
t = load_table('table_mc5038.biom')
t.shape
(52663, 396)
axes = ['observation', 'sample']
shape = t.shape
for i, axis in enumerate(axes):
data[axis] = []
step_size = np.ceil(shape[i] / 10)
for n in arange(step_size, shape[i], step_size):
ids = t.ids(axis=axis)[:n]
subt = t.filter(ids, axis=axis, inplace=False)
out_path = 'test-tables/t.%d.%s.biom' % (n, axis)
subt.to_json('me', open(out_path, 'w'))
data = {'observation': [], 'sample': []}
for fp in glob('test-tables/t.*.biom'):
fields = fp.split('.')
n = fields[1]
axis = fields[2]
def sr():
!single_rarefaction.py -i $fp -o x.biom -d 500
data[axis].append((int(n), timeit(sr, number=3) / 3))
df = pd.DataFrame(data['observation'], columns=['n', 'mean run time'], dtype=float)
df.sort('n', inplace=True)
df.plot(x='n', y='mean run time')
<matplotlib.axes.AxesSubplot at 0x117c26290>
df = pd.DataFrame(data['sample'], columns=['n', 'mean run time'], dtype=float)
df.sort('n', inplace=True)
df.plot(x='n', y='mean run time', legend=False)
<matplotlib.axes.AxesSubplot at 0x10de7e5d0>
df.plot?