from __future__ import division from timeit import timeit from biom import load_table import pandas as pd from glob import glob t = load_table('table_mc5038.biom') t.shape axes = ['observation', 'sample'] shape = t.shape for i, axis in enumerate(axes): data[axis] = [] step_size = np.ceil(shape[i] / 10) for n in arange(step_size, shape[i], step_size): ids = t.ids(axis=axis)[:n] subt = t.filter(ids, axis=axis, inplace=False) out_path = 'test-tables/t.%d.%s.biom' % (n, axis) subt.to_json('me', open(out_path, 'w')) data = {'observation': [], 'sample': []} for fp in glob('test-tables/t.*.biom'): fields = fp.split('.') n = fields[1] axis = fields[2] def sr(): !single_rarefaction.py -i $fp -o x.biom -d 500 data[axis].append((int(n), timeit(sr, number=3) / 3)) df = pd.DataFrame(data['observation'], columns=['n', 'mean run time'], dtype=float) df.sort('n', inplace=True) df.plot(x='n', y='mean run time') df = pd.DataFrame(data['sample'], columns=['n', 'mean run time'], dtype=float) df.sort('n', inplace=True) df.plot(x='n', y='mean run time', legend=False) df.plot?