Setting up tests for profiling

In [1]:
from random import randint
from astropy.table import Table, BST, FastRBT, SortedArray
from astropy.table.sorted_array import _searchsorted
from astropy.time import Time
from time import time

N = 100000

class IndexProfiling:
    def __init__(self, engine):
        # initialize N rows with shuffled integer elements
        idx = np.arange(N)
        np.random.shuffle(idx)
        self.t = Table([idx])
        self.engine = engine
        self.val = self.t['col0'][N / 2]

    def time_init(self):
        if self.engine is not None:
            self.t.add_index('col0', engine=self.engine)

    def time_group(self):
        self.t.group_by('col0')

    def time_loc(self):
        if self.engine is not None:
            self.t.loc[self.val]
        else: # linear search
            for val in self.t['col0']:
                if val == self.val:
                    break

    def time_loc_range(self):
        # from N/4 to 3N/4, inclusive
        if self.engine is not None:
            self.t.loc[N / 4 : 3 * N / 4]
        else:
            range_vals = []
            for val in self.t['col0']:
                if N / 4 <= val <= 3 * N / 4:
                    range_vals.append(val)

    def time_add_row(self):
        self.t.add_row((randint(0, N * 10),))

    def time_modify(self):
        self.t['col0'][0] = randint(0, N * 10)

def get_time(func):
    start = time()
    func()
    return time() - start

Run tests for FastRBT, SortedArray, and a non-indexed Table

In [2]:
implementations = ['None', 'FastRBT', 'SortedArray']
methods = ['init', 'group', 'loc', 'loc_range', 'add_row', 'modify']
times = {}
times2 = {}

for i, impl in enumerate(implementations):
    profile = IndexProfiling(eval(impl))
    for j, method in enumerate(methods):
        func = getattr(profile, 'time_{0}'.format(method))
        running_time = get_time(func)
        times[(impl, method)] = running_time
        with profile.t.index_mode('discard_on_copy'):
            time2 = get_time(func)
            times2[(impl, method)] = time2
In [3]:
for method in methods:
    print('\n' + method + '\n**********')
    for impl in implementations:
        t = times[(impl, method)]
        msg = '{0}: {1}'.format(impl, t)
        if impl is not None and method != 'init':
            msg += ' ({0:.4}%)'.format(t / times[('None', method)] * 100)
        print(msg)
init
**********
None: 9.53674316406e-07
FastRBT: 1.51089882851
SortedArray: 0.0071108341217

group
**********
None: 0.0517690181732 (100.0%)
FastRBT: 0.0276219844818 (53.36%)
SortedArray: 0.00372791290283 (7.201%)

loc
**********
None: 0.00727581977844 (100.0%)
FastRBT: 6.48498535156e-05 (0.8913%)
SortedArray: 0.000172138214111 (2.366%)

loc_range
**********
None: 0.0341680049896 (100.0%)
FastRBT: 1.58422899246 (4.637e+03%)
SortedArray: 0.00239109992981 (6.998%)

add_row
**********
None: 0.000385046005249 (100.0%)
FastRBT: 0.00041389465332 (107.5%)
SortedArray: 0.0014979839325 (389.0%)

modify
**********
None: 1.12056732178e-05 (100.0%)
FastRBT: 8.9168548584e-05 (795.7%)
SortedArray: 0.00265407562256 (2.369e+04%)

Profiling without index copies

In [4]:
for method in methods:
    print('\n' + method + '\n**********')
    for impl in implementations:
        t = times2[(impl, method)]
        msg = '{0}: {1}'.format(impl, t)
        if impl is not None and method != 'init':
            msg += ' ({0:.4}%)'.format(t / times2[('None', method)] * 100)
        print(msg)
init
**********
None: 0.0
FastRBT: 1.52415585518
SortedArray: 0.00721597671509

group
**********
None: 0.0459520816803 (100.0%)
FastRBT: 0.0275840759277 (60.03%)
SortedArray: 0.00373697280884 (8.132%)

loc
**********
None: 0.00726699829102 (100.0%)
FastRBT: 2.38418579102e-05 (0.3281%)
SortedArray: 0.000133037567139 (1.831%)

loc_range
**********
None: 0.0328350067139 (100.0%)
FastRBT: 0.0731410980225 (222.8%)
SortedArray: 0.000694036483765 (2.114%)

add_row
**********
None: 0.000391960144043 (100.0%)
FastRBT: 0.000457048416138 (116.6%)
SortedArray: 0.00168395042419 (429.6%)

modify
**********
None: 6.91413879395e-06 (100.0%)
FastRBT: 5.98430633545e-05 (865.5%)
SortedArray: 0.00289297103882 (4.184e+04%)

Test mixin performance

In [5]:
t1 = Table([[randint(0, N * 2) * 1000. / N for i in range(N)]])
t2 = Table([Time(t1['col0'], format='mjd')])
print('Index setup\n**********')
print('Regular columns: {0}'.format(get_time(lambda: t1.add_index('col0'))))
print('Time columns:    {0}'.format(get_time(lambda: t2.add_index('col0'))))

val = t1['col0'][N / 2]
tval = t2['col0'][N / 2]
low = t1.iloc[N / 4]['col0']
high = t1.iloc[3 * N / 4]['col0']
tlow = t2.iloc[N / 4]['col0']
thigh = t2.iloc[3 * N / 4]['col0']

print('\nValue search\n************')
print('Regular column: {0}'.format(get_time(lambda: t1.loc[val])))
print('Time column:    {0}'.format(get_time(lambda: t2.loc[tval])))
print('\nRange search\n************')
print('Regular column: {0}'.format(get_time(lambda: t1.loc[low:high])))
print('Time column:    {0}'.format(get_time(lambda: t2.loc[tlow:thigh])))

print('\nsearchsorted\n***********')
print('Regular column: {0}'.format(
        get_time(lambda: np.searchsorted(t1['col0'], val))))
print('Time column:    {0}'.format(
        get_time(lambda: _searchsorted(t2['col0'], tval))))
WARNING: ErfaWarning: ERFA function "utctai" yielded 100000 of "dubious year (Note 3)" [astropy._erfa.core]
WARNING:astropy:ErfaWarning: ERFA function "utctai" yielded 100000 of "dubious year (Note 3)"
Index setup
**********
Regular columns: 0.00827789306641
Time columns:    0.0804588794708
WARNING: ErfaWarning: ERFA function "utctai" yielded 50000 of "dubious year (Note 3)" [astropy._erfa.core]
WARNING:astropy:ErfaWarning: ERFA function "utctai" yielded 50000 of "dubious year (Note 3)"
Value search
************
Regular column: 0.000174045562744
Time column:    0.0105710029602

Range search
************
Regular column: 0.00186204910278
Time column:    0.0573830604553

searchsorted
***********
Regular column: 1.19209289551e-05
Time column:    0.0693500041962