from random import randint
from astropy.table import Table, BST, FastRBT, SortedArray
from astropy.table.sorted_array import _searchsorted
from astropy.time import Time
from time import time
N = 100000
class IndexProfiling:
def __init__(self, engine):
# initialize N rows with shuffled integer elements
idx = np.arange(N)
np.random.shuffle(idx)
self.t = Table([idx])
self.engine = engine
self.val = self.t['col0'][N / 2]
def time_init(self):
if self.engine is not None:
self.t.add_index('col0', engine=self.engine)
def time_group(self):
self.t.group_by('col0')
def time_loc(self):
if self.engine is not None:
self.t.loc[self.val]
else: # linear search
for val in self.t['col0']:
if val == self.val:
break
def time_loc_range(self):
# from N/4 to 3N/4, inclusive
if self.engine is not None:
self.t.loc[N / 4 : 3 * N / 4]
else:
range_vals = []
for val in self.t['col0']:
if N / 4 <= val <= 3 * N / 4:
range_vals.append(val)
def time_add_row(self):
self.t.add_row((randint(0, N * 10),))
def time_modify(self):
self.t['col0'][0] = randint(0, N * 10)
def get_time(func):
start = time()
func()
return time() - start
implementations = ['None', 'FastRBT', 'SortedArray']
methods = ['init', 'group', 'loc', 'loc_range', 'add_row', 'modify']
times = {}
times2 = {}
for i, impl in enumerate(implementations):
profile = IndexProfiling(eval(impl))
for j, method in enumerate(methods):
func = getattr(profile, 'time_{0}'.format(method))
running_time = get_time(func)
times[(impl, method)] = running_time
with profile.t.index_mode('discard_on_copy'):
time2 = get_time(func)
times2[(impl, method)] = time2
for method in methods:
print('\n' + method + '\n**********')
for impl in implementations:
t = times[(impl, method)]
msg = '{0}: {1}'.format(impl, t)
if impl is not None and method != 'init':
msg += ' ({0:.4}%)'.format(t / times[('None', method)] * 100)
print(msg)
init ********** None: 9.53674316406e-07 FastRBT: 1.51089882851 SortedArray: 0.0071108341217 group ********** None: 0.0517690181732 (100.0%) FastRBT: 0.0276219844818 (53.36%) SortedArray: 0.00372791290283 (7.201%) loc ********** None: 0.00727581977844 (100.0%) FastRBT: 6.48498535156e-05 (0.8913%) SortedArray: 0.000172138214111 (2.366%) loc_range ********** None: 0.0341680049896 (100.0%) FastRBT: 1.58422899246 (4.637e+03%) SortedArray: 0.00239109992981 (6.998%) add_row ********** None: 0.000385046005249 (100.0%) FastRBT: 0.00041389465332 (107.5%) SortedArray: 0.0014979839325 (389.0%) modify ********** None: 1.12056732178e-05 (100.0%) FastRBT: 8.9168548584e-05 (795.7%) SortedArray: 0.00265407562256 (2.369e+04%)
for method in methods:
print('\n' + method + '\n**********')
for impl in implementations:
t = times2[(impl, method)]
msg = '{0}: {1}'.format(impl, t)
if impl is not None and method != 'init':
msg += ' ({0:.4}%)'.format(t / times2[('None', method)] * 100)
print(msg)
init ********** None: 0.0 FastRBT: 1.52415585518 SortedArray: 0.00721597671509 group ********** None: 0.0459520816803 (100.0%) FastRBT: 0.0275840759277 (60.03%) SortedArray: 0.00373697280884 (8.132%) loc ********** None: 0.00726699829102 (100.0%) FastRBT: 2.38418579102e-05 (0.3281%) SortedArray: 0.000133037567139 (1.831%) loc_range ********** None: 0.0328350067139 (100.0%) FastRBT: 0.0731410980225 (222.8%) SortedArray: 0.000694036483765 (2.114%) add_row ********** None: 0.000391960144043 (100.0%) FastRBT: 0.000457048416138 (116.6%) SortedArray: 0.00168395042419 (429.6%) modify ********** None: 6.91413879395e-06 (100.0%) FastRBT: 5.98430633545e-05 (865.5%) SortedArray: 0.00289297103882 (4.184e+04%)
t1 = Table([[randint(0, N * 2) * 1000. / N for i in range(N)]])
t2 = Table([Time(t1['col0'], format='mjd')])
print('Index setup\n**********')
print('Regular columns: {0}'.format(get_time(lambda: t1.add_index('col0'))))
print('Time columns: {0}'.format(get_time(lambda: t2.add_index('col0'))))
val = t1['col0'][N / 2]
tval = t2['col0'][N / 2]
low = t1.iloc[N / 4]['col0']
high = t1.iloc[3 * N / 4]['col0']
tlow = t2.iloc[N / 4]['col0']
thigh = t2.iloc[3 * N / 4]['col0']
print('\nValue search\n************')
print('Regular column: {0}'.format(get_time(lambda: t1.loc[val])))
print('Time column: {0}'.format(get_time(lambda: t2.loc[tval])))
print('\nRange search\n************')
print('Regular column: {0}'.format(get_time(lambda: t1.loc[low:high])))
print('Time column: {0}'.format(get_time(lambda: t2.loc[tlow:thigh])))
print('\nsearchsorted\n***********')
print('Regular column: {0}'.format(
get_time(lambda: np.searchsorted(t1['col0'], val))))
print('Time column: {0}'.format(
get_time(lambda: _searchsorted(t2['col0'], tval))))
WARNING: ErfaWarning: ERFA function "utctai" yielded 100000 of "dubious year (Note 3)" [astropy._erfa.core] WARNING:astropy:ErfaWarning: ERFA function "utctai" yielded 100000 of "dubious year (Note 3)"
Index setup ********** Regular columns: 0.00827789306641 Time columns: 0.0804588794708
WARNING: ErfaWarning: ERFA function "utctai" yielded 50000 of "dubious year (Note 3)" [astropy._erfa.core] WARNING:astropy:ErfaWarning: ERFA function "utctai" yielded 50000 of "dubious year (Note 3)"
Value search ************ Regular column: 0.000174045562744 Time column: 0.0105710029602 Range search ************ Regular column: 0.00186204910278 Time column: 0.0573830604553 searchsorted *********** Regular column: 1.19209289551e-05 Time column: 0.0693500041962