import pandas as pd def getstore_and_print_table(fname): import pprint store = pd.HDFStore(fname) pprint.pprint(store.get_storer('df').group.table) return store df = pd.DataFrame(randn(1e6,2),columns=list('AB')) %%timeit df.to_hdf('test.h5','df',data_columns=['A','B'],mode='w',table=True, index=True) %%timeit df.to_hdf('test.h5','df',data_columns=['A','B'],mode='w',table=True, index=False) store = getstore_and_print_table('test.h5') store %timeit store.select('df',['B > 0.5', 'B < 1.6']) %timeit store.select('df',['A<0.5','A>0.0']) %timeit store.create_table_index('df',columns=['B'],kind='full') store.get_storer('df').group.table %timeit store.select('df',['B > 0.5', 'B < 1.6']) %timeit store.select('df',['A<0.5','A>0.0']) store.close() !ptdump -v test.h5 %timeit !ptrepack --chunkshape=auto --sortby=B -o test.h5 test_sorted_noprop.h5 !ptdump -v test_sorted_noprop.h5 store = getstore_and_print_table('test_sorted_noprop.h5') store %timeit store.select('df',['B > 0.5', 'B < 1.6']) try: %timeit store.select('df',['A<0.5','A>0.0']) except ValueError as e: print "ValueError:",e store.close() %timeit !ptrepack --chunkshape=auto --sortby=B --propindexes -o test.h5 test_sorted.h5 !ptdump -v test_sorted.h5 store = getstore_and_print_table('test_sorted.h5') %timeit store.select('df',['B > 0.5','B < 1.6']) try: %timeit store.select('df',['A<0.5','A>0.0']) except ValueError as e: print "ValueError:",e store.close() %timeit !ptrepack --chunkshape=auto --sortby=B --propindexes --complib=blosc --complevel=5 -o test.h5 test_sorted_compressed.h5 !ptdump -v test_sorted_compressed.h5 store = getstore_and_print_table('test_sorted_compressed.h5') %timeit store.select('df',['B > 0.5','B < 1.6']) store.close()