from random import randint from astropy.table import Table, BST, FastRBT, SortedArray from astropy.table.sorted_array import _searchsorted from astropy.time import Time from time import time N = 100000 class IndexProfiling: def __init__(self, engine): # initialize N rows with shuffled integer elements idx = np.arange(N) np.random.shuffle(idx) self.t = Table([idx]) self.engine = engine self.val = self.t['col0'][N / 2] def time_init(self): if self.engine is not None: self.t.add_index('col0', engine=self.engine) def time_group(self): self.t.group_by('col0') def time_loc(self): if self.engine is not None: self.t.loc[self.val] else: # linear search for val in self.t['col0']: if val == self.val: break def time_loc_range(self): # from N/4 to 3N/4, inclusive if self.engine is not None: self.t.loc[N / 4 : 3 * N / 4] else: range_vals = [] for val in self.t['col0']: if N / 4 <= val <= 3 * N / 4: range_vals.append(val) def time_add_row(self): self.t.add_row((randint(0, N * 10),)) def time_modify(self): self.t['col0'][0] = randint(0, N * 10) def get_time(func): start = time() func() return time() - start implementations = ['None', 'FastRBT', 'SortedArray'] methods = ['init', 'group', 'loc', 'loc_range', 'add_row', 'modify'] times = {} times2 = {} for i, impl in enumerate(implementations): profile = IndexProfiling(eval(impl)) for j, method in enumerate(methods): func = getattr(profile, 'time_{0}'.format(method)) running_time = get_time(func) times[(impl, method)] = running_time with profile.t.index_mode('discard_on_copy'): time2 = get_time(func) times2[(impl, method)] = time2 for method in methods: print('\n' + method + '\n**********') for impl in implementations: t = times[(impl, method)] msg = '{0}: {1}'.format(impl, t) if impl is not None and method != 'init': msg += ' ({0:.4}%)'.format(t / times[('None', method)] * 100) print(msg) for method in methods: print('\n' + method + '\n**********') for impl in implementations: t = times2[(impl, method)] msg = '{0}: {1}'.format(impl, t) if impl is not None and method != 'init': msg += ' ({0:.4}%)'.format(t / times2[('None', method)] * 100) print(msg) t1 = Table([[randint(0, N * 2) * 1000. / N for i in range(N)]]) t2 = Table([Time(t1['col0'], format='mjd')]) print('Index setup\n**********') print('Regular columns: {0}'.format(get_time(lambda: t1.add_index('col0')))) print('Time columns: {0}'.format(get_time(lambda: t2.add_index('col0')))) val = t1['col0'][N / 2] tval = t2['col0'][N / 2] low = t1.iloc[N / 4]['col0'] high = t1.iloc[3 * N / 4]['col0'] tlow = t2.iloc[N / 4]['col0'] thigh = t2.iloc[3 * N / 4]['col0'] print('\nValue search\n************') print('Regular column: {0}'.format(get_time(lambda: t1.loc[val]))) print('Time column: {0}'.format(get_time(lambda: t2.loc[tval]))) print('\nRange search\n************') print('Regular column: {0}'.format(get_time(lambda: t1.loc[low:high]))) print('Time column: {0}'.format(get_time(lambda: t2.loc[tlow:thigh]))) print('\nsearchsorted\n***********') print('Regular column: {0}'.format( get_time(lambda: np.searchsorted(t1['col0'], val)))) print('Time column: {0}'.format( get_time(lambda: _searchsorted(t2['col0'], tval))))