I am really looking forward to your comments and suggestions to improve and extend this collection! Just send me a quick note
via Twitter: @rasbt
or Email: [email protected]


Python benchmarks via timeit

  • Code was executed in Python 3.4.0



String operations



String formatting: .format() vs. binary operator %s

We expect the string .format() method to perform slower than %, because it is doing the formatting for each object itself, where formatting via the binary % is hard-coded for known types. But let's see how big the difference really is...

In [131]:
import timeit

n = 10000

def test_format(n):
    return ['{}'.format(i) for i in range(n)]

def test_binaryop(n):
    return ['%s' %i for i in range(n)]

%timeit test_format(n)
%timeit test_binaryop(n)
100 loops, best of 3: 4.01 ms per loop
1000 loops, best of 3: 1.82 ms per loop
In [132]:
funcs = ['test_format', 'test_binaryop']

orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(n)' %f, 
                      'from __main__ import %s, n' %f)
                              .repeat(repeat=3, number=1000)))
In [7]:
%pylab inline
In [135]:
import matplotlib.pyplot as plt

labels = [('test_format', '.format() method'), 
          ('test_binaryop', 'binary operator %')] 

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
plt.xlim([1,max(orders_n) + max(orders_n) * 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different string formatting methods')
max_perf = max( f/b for f,b in zip(times_n['test_format'],
                                   times_n['test_binaryop']) )
min_perf = min( f/b for f,b in zip(times_n['test_format'],
                                   times_n['test_binaryop']) )
    
ftext = 'The binary op. % is {:.2f}x to {:.2f}x faster than .format()'\
        .format(min_perf, max_perf)    
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')


plt.show()



String reversing: [::-1] vs. ''.join(reversed())

In [10]:
import timeit

def reverse_join(my_str):
    return ''.join(reversed(my_str))
    
def reverse_slizing(my_str):
    return my_str[::-1]

test_str = 'abcdefg'

# Test to show that both work
a = reverse_join(test_str)
b = reverse_slizing(test_str)
assert(a == b and a == 'gfedcba')

%timeit reverse_join(test_str)
%timeit reverse_slizing(test_str)
1000000 loops, best of 3: 1.33 µs per loop
1000000 loops, best of 3: 268 ns per loop
In [11]:
funcs = ['reverse_join', 'reverse_slizing']

orders_n = [10**n for n in range(1, 6)]
test_strings = (test_str*n for n in orders_n)
times_n = {f:[] for f in funcs}

for st,n in zip(test_strings, orders_n):
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(st)' %f, 
                      'from __main__ import %s, st' %f)
                              .repeat(repeat=3, number=1000)))
In [4]:
%pylab inline
In [13]:
import matplotlib.pyplot as plt

labels = [('reverse_join', '"".join(reversed(my_str))'), 
          ('reverse_slizing', 'my_str[::-1]')] 

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot([n*len(test_str) for n in orders_n], 
             times_n[lb[0]], alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
plt.xlim([1,max(orders_n) + max(orders_n) * 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different string reversing methods')
max_perf = max( j/s for j,s in zip(times_n['reverse_join'],
                                   times_n['reverse_slizing']) )
min_perf = min( j/s for j,s in zip(times_n['reverse_join'],
                                   times_n['reverse_slizing']) )
    
ftext = 'my_str[::-1] is {:.2f}x to {:.2f}x faster than "".join(reversed(my_str))'\
        .format(min_perf, max_perf)
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')
plt.show()



String concatenation: += vs. ''.join()

Strings in Python are immutable objects. So, each time we append a character to a string, it has to be created “from scratch” in memory. Thus, the answer to the question “What is the most efficient way to concatenate strings?” is a quite obvious, but the relative numbers of performance gains are nonetheless interesting.

In [15]:
import timeit

def string_add(in_chars):
    new_str = ''
    for char in in_chars:
        new_str += char
    return new_str

def string_join(in_chars):
    return ''.join(in_chars)

test_chars = ['a', 'b', 'c', 'd', 'e', 'f']

%timeit string_add(test_chars)
%timeit string_join(test_chars)
1000000 loops, best of 3: 764 ns per loop
1000000 loops, best of 3: 321 ns per loop
In [16]:
funcs = ['string_add', 'string_join']

orders_n = [10**n for n in range(1, 6)]
test_chars_n = (test_chars*n for n in orders_n)
times_n = {f:[] for f in funcs}

for st,n in zip(test_chars_n, orders_n):
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(st)' %f, 
                      'from __main__ import %s, st' %f)
                              .repeat(repeat=3, number=1000)))
In [4]:
%pylab inline
In [18]:
import matplotlib.pyplot as plt

labels = [('string_add', 'new_str += char'), 
          ('string_join', '"".join(chars)')] 

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot([len(test_chars)*n for n in orders_n], 
             times_n[lb[0]], alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
#plt.xlim([1,max(orders_n) + max(orders_n) * 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different string concatenation methods')
max_perf = max( a/j for a,j in zip(times_n['string_add'],
                                   times_n['string_join']) )
min_perf = min( a/j for a,j in zip(times_n['string_add'],
                                   times_n['string_join']) )

ftext = '"".join(chars) is {:.2f}x to {:.2f}x faster than new_str += char'\
        .format(min_perf, max_perf)
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')
plt.show()



Assembling strings

Next, I wanted to compare different methods string “assembly.” This is different from simple string concatenation, which we have seen in the previous section, since we insert values into a string, e.g., from a variable.

In [21]:
import timeit

n = 1000

def plus_operator(n):
    my_str = 'a'
    for i in range(n):
        my_str = my_str + str(1) + str(2)
    return my_str 
    
def format_method(n):
    my_str = 'a'
    for i in range(n):
        my_str = '{}{}{}'.format(my_str,1,2)
    
def binary_operator(n):
    my_str = 'a'
    for i in range(n):
        my_str = '%s%s%s' %(my_str,1,2)
    return my_str

%timeit plus_operator(n)
%timeit format_method(n)
%timeit binary_operator(n)
1000 loops, best of 3: 869 µs per loop
1000 loops, best of 3: 686 µs per loop
1000 loops, best of 3: 445 µs per loop
In [23]:
funcs = ['plus_operator', 'format_method', 'binary_operator']

orders_n = [10**n for n in range(1, 5)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(n)' %f, 
                      'from __main__ import %s, n' %f)
                              .repeat(repeat=3, number=1000)))
In [ ]:
%pylab inline
In [27]:
import matplotlib.pyplot as plt

labels = [('plus_operator', 'my_str + str(1) + str(2)'), 
          ('format_method', '"{}{}{}".format(my_str,1,2)'),
          ('binary_operator', '"%s%s%s" %(my_str,1,2)'),
          ] 

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
#plt.xlim([1,max(orders_n) + max(orders_n) * 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different string assembly methods')

max_perf = max( p/b for p,b in zip(times_n['plus_operator'],
                                   times_n['binary_operator']) )
min_perf = min( p/b for p,b in zip(times_n['plus_operator'],
                                   times_n['binary_operator']) )

ftext = '"%s%s%s" %(my_str,1,2) is {:.2f}x to'\
        '{:.2f}x faster than my_str + str(1) + str(2)'\
        .format(min_perf, max_perf)
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')
plt.show()



Testing if a string is an integer

In [28]:
import timeit

def string_is_int(a_str):
    try:
        int(a_str)
        return True
    except ValueError:
        return False

an_int = '123'
no_int = '123abc'

%timeit string_is_int(an_int)
%timeit string_is_int(no_int)
%timeit an_int.isdigit()
%timeit no_int.isdigit()
1000000 loops, best of 3: 420 ns per loop
100000 loops, best of 3: 2.83 µs per loop
10000000 loops, best of 3: 116 ns per loop
10000000 loops, best of 3: 119 ns per loop
In [52]:
funcs = ['string_is_int', 'isdigit']
t1 = '123'
t2 = '123abc'
isdigit_method = []
string_is_int_method = []

for t in [t1,t2]:
    string_is_int_method.append(min(timeit.Timer('string_is_int(t)', 
                      'from __main__ import string_is_int, t')
                              .repeat(repeat=3, number=1000000)))
    isdigit_method.append(min(timeit.Timer('t.isdigit()', 
                      'from __main__ import t')
                              .repeat(repeat=3, number=1000000)))
In [ ]:
%pylab inline
In [90]:
N = len(isdigit_method)
ind = np.arange(N)  # the x locations for the groups
width = 0.25       # the width of the bars

 
fig, ax = plt.subplots()
plt.bar(ind, 
        [i for i in string_is_int_method], 
        width,
        alpha=0.5,
        color='g',
        label='string_is_int(a_str)')

plt.bar(ind + width, 
        [i for i in isdigit_method], 
        width,
        alpha=0.5,
        color='b',
        label='a_str.isdigit()')
    
ax.set_ylabel('time in microseconds')
ax.set_title('Time to check if a string is an integer')
ax.set_xticks(ind + width)
ax.set_xticklabels(['"%s"' %t for t in [t1, t2]])
plt.xlabel('test strings')
plt.xlim(-0.1,1.6)
#plt.ylim(0,15)
plt.legend(loc='upper left')
plt.show()



Testing if a string is a number

In [91]:
import timeit

def string_is_number(a_str):
    try:
        float(a_str)
        return True
    except ValueError:
        return False
    
a_float = '1.234'
no_float = '123abc'

a_float.replace('.','',1).isdigit()
no_float.replace('.','',1).isdigit()

%timeit string_is_number(an_int)
%timeit string_is_number(no_int)
%timeit a_float.replace('.','',1).isdigit()
%timeit no_float.replace('.','',1).isdigit()
1000000 loops, best of 3: 418 ns per loop
1000000 loops, best of 3: 1.28 µs per loop
1000000 loops, best of 3: 500 ns per loop
1000000 loops, best of 3: 427 ns per loop
In [96]:
funcs = ["string_is_number", "replace('.','',1).isdigit()"]
t1 = '1.234'
t2 = '123abc'
isdigit_method = []
string_is_number_method = []

for t in [t1,t2]:
    string_is_number_method.append(min(timeit.Timer('string_is_number(t)', 
                      'from __main__ import string_is_number, t')
                              .repeat(repeat=3, number=1000000)))
    isdigit_method.append(min(timeit.Timer("t.replace('.','',1).isdigit()", 
                      'from __main__ import t')
                              .repeat(repeat=3, number=1000000)))
In [ ]:
%pylab inline
In [98]:
N = len(isdigit_method)
ind = np.arange(N)  # the x locations for the groups
width = 0.25       # the width of the bars

 
fig, ax = plt.subplots()

plt.bar(ind , 
        [i for i in isdigit_method], 
        width,
        alpha=0.5,
        color='b',
        label="a_str.replace('.','',1).isdigit()")

plt.bar(ind + width, 
        [i for i in string_is_number_method], 
        width,
        alpha=0.5,
        color='g',
        label='string_is_number(a_str)')

    
ax.set_ylabel('time in microseconds')
ax.set_title('Time to check if a string is a number')
ax.set_xticks(ind + width)
ax.set_xticklabels(['"%s"' %t for t in [t2, t1]])
plt.xlabel('test strings')
plt.xlim(-0.1,1.6)
#plt.ylim(0,15)
plt.legend(loc='upper left')
plt.show()



List operations



List reversing - [::-1] vs. reverse() vs. reversed()

In [104]:
import timeit
import copy

def reverse_func(my_list):
    return copy.deepcopy(my_list).reverse()
    
def reversed_func(my_list):
    return list(reversed(my_list))

def reverse_slizing(my_list):
    return my_list[::-1]

n = 10
test_list = list([i for i in range(n)])

%timeit reverse_func(test_list)
%timeit reversed_func(test_list)
%timeit reverse_slizing(test_list)
100000 loops, best of 3: 13.8 µs per loop
1000000 loops, best of 3: 1.44 µs per loop
1000000 loops, best of 3: 330 ns per loop
In [117]:
funcs = ['reverse_func', 'reversed_func',
         'reverse_slizing']

orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    test_list = list([i for i in range(n)])
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(test_list)' %f, 
                      'from __main__ import %s, test_list' %f)
                              .repeat(repeat=3, number=1000)))
In [ ]:
%pylab inline
In [118]:
import matplotlib.pyplot as plt

labels = [('reverse_func', 'copy.deepcopy(my_list).reverse()'), 
          ('reversed_func', 'list(reversed(my_list))'),
          ('reverse_slizing', 'my_list[::-1]'),
          ] 

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
#plt.xlim([1,max(orders_n) + max(orders_n) * 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different list reversing approaches')

max_perf = max( f/s for f,s in zip(times_n['reverse_func'],
                                   times_n['reverse_slizing']) )
min_perf = min( f/s for f,s in zip(times_n['reverse_func'],
                                   times_n['reverse_slizing']) )

ftext = 'my_list[::-1] is {:.2f}x to '\
        '{:.2f}x faster than copy.deepcopy(my_list).reverse()'\
        .format(min_perf, max_perf)
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')

plt.show()



Creating lists using conditional statements

In this test, I attempted to figure out the fastest way to create a new list of elements that meet a certain criterion. For the sake of simplicity, the criterion was to check if an element is even or odd, and only if the element was even, it should be included in the list. For example, the resulting list for numbers in the range from 1 to 10 would be [2, 4, 6, 8, 10].

Here, I tested three different approaches:
1) a simple for loop with an if-statement check (cond_loop())
2) a list comprehension (list_compr())
3) the built-in filter() function (filter_func())

Note that the filter() function now returns a generator in Python 3, so I had to wrap it in an additional list() function call.

In [119]:
import timeit

def cond_loop(n):
    even_nums = []
    for i in range(n):
        if i % 2 == 0:
            even_nums.append(i)
    return even_nums

def list_compr(n):
    even_nums = [i for i in range(n) if i % 2 == 0]
    return even_nums
    
def filter_func(n):
    even_nums = list(filter((lambda x: x % 2 != 0), range(n)))
    return even_nums

%timeit cond_loop(n)
%timeit list_compr(n)
%timeit filter_func(n)
10 loops, best of 3: 18.1 ms per loop
100 loops, best of 3: 15.3 ms per loop
10 loops, best of 3: 22.8 ms per loop
In [120]:
funcs = ['cond_loop', 'list_compr',
         'filter_func']

orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    test_list = list([i for i in range(n)])
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(n)' %f, 
                      'from __main__ import %s, n' %f)
                              .repeat(repeat=3, number=1000)))
In [ ]:
%pylab inline
In [122]:
import matplotlib.pyplot as plt

labels = [('cond_loop', 'explicit for-loop'), 
          ('list_compr', 'list comprehension'),
          ('filter_func', 'lambda function'),
          ] 

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
#plt.xlim([1,max(orders_n) + max(orders_n) * 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different conditional list creation methods')

max_perf = max( f/c for f,c in zip(times_n['filter_func'],
                                   times_n['cond_loop']) )
min_perf = min( f/c for f,c in zip(times_n['filter_func'],
                                   times_n['cond_loop']) )

ftext = 'the list comprehension is {:.2f}x to '\
        '{:.2f}x faster than the lambda function'\
        .format(min_perf, max_perf)
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')

plt.show()



Dictionary operations



Adding elements to a Dictionary

All three functions below count how often different elements (values) occur in a list.
E.g., for the list ['a', 'b', 'a', 'c'], the dictionary would look like this:
my_dict = {'a': 2, 'b': 1, 'c': 1}

In [123]:
import random
import timeit
from collections import defaultdict


def add_element_check1(elements):
    """if ele not in dict (v1)"""
    d = dict()
    for e in elements:
        if e not in d:
            d[e] = 1
        else:
            d[e] += 1
    return d
            
def add_element_check2(elements):
    """if ele not in dict (v2)"""
    d = dict()
    for e in elements:
        if e not in d:
            d[e] = 0
        d[e] += 1            
    return d
        
def add_element_except(elements):
    """try-except"""
    d = dict()
    for e in elements:
        try:
            d[e] += 1
        except KeyError:
            d[e] = 1
    return d
            
def add_element_defaultdict(elements):
    """defaultdict"""
    d = defaultdict(int)
    for e in elements:
        d[e] += 1
    return d

def add_element_get(elements):
    """.get() method"""
    d = dict()
    for e in elements:
        d[e] = d.get(e, 1) + 1
    return d


random.seed(123)

print('Results for 100 integers in range 1-10') 
rand_ints = [random.randrange(1, 10) for i in range(100)]
%timeit add_element_check1(rand_ints)
%timeit add_element_check2(rand_ints)
%timeit add_element_except(rand_ints)
%timeit add_element_defaultdict(rand_ints)
%timeit add_element_get(rand_ints)

print('\nResults for 1000 integers in range 1-5')            
rand_ints = [random.randrange(1, 5) for i in range(1000)]
%timeit add_element_check1(rand_ints)
%timeit add_element_check2(rand_ints)
%timeit add_element_except(rand_ints)
%timeit add_element_defaultdict(rand_ints)
%timeit add_element_get(rand_ints)

print('\nResults for 1000 integers in range 1-1000')            
rand_ints = [random.randrange(1, 1000) for i in range(1000)]
%timeit add_element_check1(rand_ints)
%timeit add_element_check2(rand_ints)
%timeit add_element_except(rand_ints)
%timeit add_element_defaultdict(rand_ints)
%timeit add_element_get(rand_ints)
Results for 100 integers in range 1-10
100000 loops, best of 3: 16.8 µs per loop
100000 loops, best of 3: 18.2 µs per loop
100000 loops, best of 3: 17.1 µs per loop
100000 loops, best of 3: 14.7 µs per loop
10000 loops, best of 3: 20.8 µs per loop

Results for 1000 integers in range 1-5
10000 loops, best of 3: 161 µs per loop
10000 loops, best of 3: 166 µs per loop
10000 loops, best of 3: 128 µs per loop
10000 loops, best of 3: 114 µs per loop
1000 loops, best of 3: 197 µs per loop

Results for 1000 integers in range 1-1000
10000 loops, best of 3: 166 µs per loop
1000 loops, best of 3: 240 µs per loop
1000 loops, best of 3: 354 µs per loop
1000 loops, best of 3: 281 µs per loop
1000 loops, best of 3: 234 µs per loop
In [136]:
funcs = ['add_element_check1', 'add_element_check2',
         'add_element_except', 'add_element_defaultdict',
         'add_element_get']

orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    elements = [random.randrange(1, 100) for i in range(n)]
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(elements)' %f, 
                      'from __main__ import %s, elements' %f)
                              .repeat(repeat=3, number=1000)))
In [ ]:
%pylab inline
In [139]:
import matplotlib.pyplot as plt

labels = [('add_element_check1', 'if ele not in dict (v1)'), 
          ('add_element_check2', 'if ele not in dict (v2)'),
          ('add_element_except', 'try-except'),
          ('add_element_defaultdict', 'defaultdict'),
          ('add_element_get', '.get() method')
          ] 

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,10))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
#plt.xlim([1,max(orders_n) + max(orders_n) * 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different methods to count elements in a dictionary')

plt.show()

Conclusion

We see from the results that the try-except variant is faster than then the if element in my_dict alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the except-block is skipped if an element is already added as a key to the dictionary. However, in this case the collections.defaultdict has even a better performance.
However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the if element in my_dict approach outperforms the alternative approaches.



Comprehesions vs. for-loops

Comprehensions are not only shorter and prettier than ye goode olde for-loop,
but they are also up to ~1.2x faster.

In [140]:
import timeit
n = 1000

Set comprehensions

In [141]:
def set_loop(n):
    a_set = set()
    for i in range(n):
        if i % 3 == 0:
            a_set.add(i)
    return a_set

def set_compr(n):
    return {i for i in range(n) if i % 3 == 0}
In [142]:
%timeit set_loop(n)
%timeit set_compr(n)
1000 loops, best of 3: 165 µs per loop
10000 loops, best of 3: 145 µs per loop

List comprehensions

In [143]:
def list_loop(n):
    a_list = list()
    for i in range(n):
        if i % 3 == 0:
            a_list.append(i)
    return a_list
In [144]:
def list_compr(n):
    return [i for i in range(n) if i % 3 == 0]
In [145]:
%timeit list_loop(n)
%timeit list_compr(n)
10000 loops, best of 3: 164 µs per loop
10000 loops, best of 3: 143 µs per loop
In [152]:
funcs = ['list_loop', 'list_compr']
orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(n)' %f, 
                      'from __main__ import %s, n' %f)
                              .repeat(repeat=3, number=1000)))
In [ ]:
%pylab inline
In [153]:
import matplotlib.pyplot as plt

labels = [('list_loop', 'explicit for-loop'), 
          ('list_compr', 'list comprehension')]

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
#plt.xlim([1,max(orders_n) + max(orders_n) * 10])
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of explicit for-loops vs. list comprehensions')

max_perf = max( l/c for l,c in zip(times_n['list_loop'],
                                   times_n['list_compr']) )
min_perf = min( l/c for l,c in zip(times_n['list_loop'],
                                   times_n['list_compr']) )

ftext = 'the list comprehension is {:.2f}x to '\
        '{:.2f}x faster than the explicit for-loop'\
        .format(min_perf, max_perf)
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')

plt.show()

Dictionary comprehensions

In [146]:
def dict_loop(n):
    a_dict = dict()
    for i in range(n):
        if i % 3 == 0:
            a_dict[i] = i
    return a_dict
In [147]:
def dict_compr(n):
    return {i:i for i in range(n) if i % 3 == 0}
In [148]:
%timeit dict_loop(n)
%timeit dict_compr(n)
10000 loops, best of 3: 159 µs per loop
10000 loops, best of 3: 151 µs per loop



Copying files by searching directory trees

Executing Unix/Linux shell commands:

In [30]:
import subprocess

def subprocess_findcopy(path, search_str, dest):    
    query = 'find %s -name "%s" -exec cp {} %s \;' %(path, search_str, dest)
    subprocess.call(query, shell=True)
    return  

Using Python's os.walk() to search the directory tree recursively and matching patterns via fnmatch.filter()

In [33]:
import shutil
import os
import fnmatch

def walk_findcopy(path, search_str, dest):
    for path, subdirs, files in os.walk(path):
        for name in fnmatch.filter(files, search_str):
            try:
                shutil.copy(os.path.join(path,name), dest)
            except NameError:
                pass
    return
In [35]:
import timeit


def findcopy_timeit(inpath, outpath, search_str):
    
    shutil.rmtree(outpath)
    os.mkdir(outpath)
    print(50*'#')
    print('subprocsess call')
    %timeit subprocess_findcopy(inpath, search_str, outpath)
    print("copied %s files" %len(os.listdir(outpath)))
    shutil.rmtree(outpath)
    os.mkdir(outpath)
    print('\nos.walk approach')
    %timeit walk_findcopy(inpath, search_str, outpath)
    print("copied %s files" %len(os.listdir(outpath)))
    print(50*'#')

print('small tree')
inpath = '/Users/sebastian/Desktop/testdir_in'
outpath = '/Users/sebastian/Desktop/testdir_out'
search_str = '*.png'
findcopy_timeit(inpath, outpath, search_str)

print('larger tree')
inpath = '/Users/sebastian/Dropbox'
outpath = '/Users/sebastian/Desktop/testdir_out'
search_str = '*.csv'
findcopy_timeit(inpath, outpath, search_str)
small tree
##################################################
subprocsess call
1 loops, best of 3: 268 ms per loop
copied 13 files

os.walk approach
100 loops, best of 3: 12.2 ms per loop
copied 13 files
##################################################
larger tree
##################################################
subprocsess call
1 loops, best of 3: 623 ms per loop
copied 105 files

os.walk approach
1 loops, best of 3: 417 ms per loop
copied 105 files
##################################################

I have to say that I am really positively surprised. The shell's find scales even better than expected!



Returning column vectors slicing through a numpy array

Given a numpy matrix, I want to iterate through it and return each column as a 1-column vector.
E.g., if I want to return the 1st column from matrix A below

A = np.array([ [1,2,3], [4,5,6], [7,8,9] ])
>>> A
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

I want my result to be:

array([[1],
       [4],
       [7]])

with .shape = (3,1)

However, the default behavior of numpy is to return the column as a row vector:

>>> A[:,0]
array([1, 4, 7])
>>> A[:,0].shape
(3,)
In [83]:
import numpy as np

# 1st column, e.g., A[:,0,np.newaxis]

def colvec_method1(A):
    for col in A.T:
        colvec = row[:,np.newaxis]
        yield colvec
In [82]:
# 1st column, e.g., A[:,0:1]

def colvec_method2(A):
    for idx in range(A.shape[1]):
        colvec = A[:,idx:idx+1]
        yield colvec
In [81]:
# 1st column, e.g., A[:,0].reshape(-1,1)

def colvec_method3(A):
    for idx in range(A.shape[1]):
        colvec = A[:,idx].reshape(-1,1)
        yield colvec
In [79]:
# 1st column, e.g., np.vstack(A[:,0]

def colvec_method4(A):
    for idx in range(A.shape[1]):
        colvec = np.vstack(A[:,idx])
        yield colvec
In [77]:
# 1st column, e.g., np.row_stack(A[:,0])

def colvec_method5(A):
    for idx in range(A.shape[1]):
        colvec = np.row_stack(A[:,idx])
        yield colvec
In [74]:
# 1st column, e.g., np.column_stack((A[:,0],))

def colvec_method6(A):
    for idx in range(A.shape[1]):
        colvec = np.column_stack((A[:,idx],))
        yield colvec
In [89]:
# 1st column, e.g., A[:,[0]]

def colvec_method7(A):
    for idx in range(A.shape[1]):
        colvec = A[:,[idx]]
        yield colvec
In [69]:
def test_method(method, A):
    for i in method(A): 
        assert i.shape == (A.shape[0],1), "{}, {}".format(i.shape, A.shape[0],1)
In [91]:
import timeit

A = np.random.random((300, 3))

for method in [
            colvec_method1, colvec_method2, 
            colvec_method3, colvec_method4, 
            colvec_method5, colvec_method6,
            colvec_method7]:
    print('\nTest:', method.__name__)
    %timeit test_method(colvec_method2, A)
Test: colvec_method1
100000 loops, best of 3: 16.6 µs per loop

Test: colvec_method2
10000 loops, best of 3: 16.1 µs per loop

Test: colvec_method3
100000 loops, best of 3: 16.2 µs per loop

Test: colvec_method4
100000 loops, best of 3: 16.4 µs per loop

Test: colvec_method5
100000 loops, best of 3: 16.2 µs per loop

Test: colvec_method6
100000 loops, best of 3: 16.8 µs per loop

Test: colvec_method7
100000 loops, best of 3: 16.3 µs per loop



Speed of numpy functions vs Python built-ins and std. lib.



sum() vs. numpy.sum()

In [20]:
from numpy import sum as np_sum
import timeit

samples = list(range(1000000))

%timeit(sum(samples))
%timeit(np_sum(samples))
10 loops, best of 3: 18.2 ms per loop
10 loops, best of 3: 138 ms per loop
In [26]:
funcs = ['sum', 'np_sum']
orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    samples = list(range(n))
    times_n['sum'].append(min(timeit.Timer('sum(samples)', 
                'from __main__ import samples')
                    .repeat(repeat=3, number=1000)))
    times_n['np_sum'].append(min(timeit.Timer('np_sum(samples)', 
                'from __main__ import np_sum, samples')
                    .repeat(repeat=3, number=1000)))
In [24]:
%pylab inline
In [27]:
import matplotlib.pyplot as plt

labels = [('sum', 'in-built sum() function'), 
          ('np_sum', 'numpy.sum() function')]

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of explicit for-loops vs. list comprehensions')

max_perf = max( n/i for i,n in zip(times_n['sum'],
                                   times_n['np_sum']) )
min_perf = min( n/i for i,n in zip(times_n['sum'],
                                   times_n['np_sum']) )

ftext = 'the in-built sum() is {:.2f}x to '\
        '{:.2f}x faster than the numpy.sum()'\
        .format(min_perf, max_perf)
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')

plt.show()



range() vs. numpy.arange()

In [36]:
from numpy import arange as np_arange

n = 1000000

def loop_range(n):
    for i in range(n):
        pass
    return

def loop_arange(n):
    for i in np_arange(n):
        pass
    return

%timeit(loop_range(n))
%timeit(loop_arange(n))
10 loops, best of 3: 50.9 ms per loop
10 loops, best of 3: 183 ms per loop
In [38]:
funcs = ['loop_range', 'loop_arange']
orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(n)' %f, 
                'from __main__ import %s, n' %f)
                    .repeat(repeat=3, number=1000)))
In [40]:
import matplotlib.pyplot as plt

labels = [('loop_range', 'in-built range()'), 
          ('loop_arange', 'numpy.arange()')]

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of explicit for-loops vs. list comprehensions')

max_perf = max( a/r for r,a in zip(times_n['loop_range'],
                                   times_n['loop_arange']) )
min_perf = min( a/r for r,a in zip(times_n['loop_range'],
                                   times_n['loop_arange']) )

ftext = 'the in-built range() is {:.2f}x to '\
        '{:.2f}x faster than numpy.arange()'\
        .format(min_perf, max_perf)
plt.figtext(.14,.75, ftext, fontsize=11, ha='left')

plt.show()



statistics.mean() vs. numpy.mean()

In [2]:
# The statistics module has been added to
# the standard library in Python 3.4

import timeit
import statistics as stats
import numpy as np

def calc_mean(samples):
    return sum(samples)/len(samples)

def np_mean(samples):
    return np.mean(samples)

def np_mean_ary(np_array):
    return np.mean(np_array)

def st_mean(samples):
    return stats.mean(samples)

n = 1000000
samples = list(range(n))
samples_array = np.arange(n)

assert(st_mean(samples) == np_mean(samples)
       == calc_mean(samples) == np_mean_ary(samples_array))

%timeit(calc_mean(samples))
%timeit(np_mean(samples))
%timeit(np_mean_ary(samples_array))
%timeit(st_mean(samples))
100 loops, best of 3: 26.2 ms per loop
1 loops, best of 3: 144 ms per loop
100 loops, best of 3: 3.21 ms per loop
1 loops, best of 3: 1.12 s per loop
In [3]:
funcs = ['st_mean', 'np_mean', 'calc_mean', 'np_mean_ary']
orders_n = [10**n for n in range(1, 6)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    samples = list(range(n))
    for f in funcs:
        if f == 'np_mean_ary':
            samples = np.arange(n)
        times_n[f].append(min(timeit.Timer('%s(samples)' %f, 
                'from __main__ import %s, samples' %f)
                    .repeat(repeat=3, number=1000)))
In [6]:
%pylab inline
In [7]:
import matplotlib.pyplot as plt

labels = [('st_mean', 'statistics.mean()'), 
          ('np_mean', 'numpy.mean() on list'),
          ('np_mean_ary', 'numpy.mean() on array'),
          ('calc_mean', 'sum(samples)/len(samples)')
          ]

matplotlib.rcParams.update({'font.size': 12})

fig = plt.figure(figsize=(10,8))
for lb in labels:
    plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
plt.xlabel('sample size n')
plt.ylabel('time per computation in milliseconds [ms]')
plt.legend(loc=2)
plt.grid()
plt.xscale('log')
plt.yscale('log')
plt.title('Performance of different approaches for calculating sample means')

max_perf = max( s/c for s,c in zip(times_n['st_mean'],
                                   times_n['np_mean_ary']) )
min_perf = min( s/c for s,c in zip(times_n['st_mean'],
                                   times_n['np_mean_ary']) )

ftext = 'using numpy.mean() on np.arrays is {:.2f}x to '\
        '{:.2f}x faster than statistics.mean() on lists'\
        .format(min_perf, max_perf)
plt.figtext(.14,.15, ftext, fontsize=11, ha='left')

plt.show()



Cython vs regular (C)Python

Here, we implement a linear regression via least squares fitting (with vertical offsets) by solving to fit n points $(x_i, y_i)$ with $i=1,2,...n,$ via linear equation of the form
$f(x) = a\cdot x + b$.

Therefore we calculate the following parameters as follows:

$a = \frac{S_{x,y}}{\sigma_{x}^{2}}\quad$ (slope)

$b = \bar{y} - a\bar{x}\quad$ (y-axis intercept)

where

$S_{xy} = \sum_{i=1}^{n} (x_i - \bar{x})(y_i - \bar{y})\quad$ (covariance)

$\sigma{_x}^{2} = \sum_{i=1}^{n} (x_i - \bar{x})^2\quad$ (variance)

I have described the approach in more detail in this IPython notebook.



First, the implementation in Python (CPython):

In [1]:
def py_lstsqr(x, y):
    """ Computes the least-squares solution to a linear matrix equation. """

    x_avg = sum(x)/len(x)
    y_avg = sum(y)/len(y)
    var_x = 0
    cov_xy = 0
    for x_i, y_i in zip(x,y):
        temp = (x_i - x_avg)
        var_x += temp**2
        cov_xy += temp*(y_i - y_avg)
    slope = cov_xy / var_x
    y_interc = y_avg - slope*x_avg
    return (slope, y_interc)



And now, adding type definitions and compiling the code via Cython:

In [2]:
%load_ext cythonmagic
In [3]:
%%cython

def cy_lstsqr(x, y):
    """ Computes the least-squares solution to a linear matrix equation. """
    cdef double x_avg, y_avg, temp, var_x, cov_xy, slope, y_interc, x_i, y_i
    x_avg = sum(x)/len(x)
    y_avg = sum(y)/len(y)
    var_x = 0
    cov_xy = 0
    for x_i, y_i in zip(x,y):
        temp = (x_i - x_avg)
        var_x += temp**2
        cov_xy += temp*(y_i - y_avg)
    slope = cov_xy / var_x
    y_interc = y_avg - slope*x_avg
    return (slope, y_interc)



A small visual proof of concept that our least squares fit works as intended:

In [5]:
%pylab inline
In [6]:
from matplotlib import pyplot as plt

import timeit
import random
random.seed(12345)

n = 500
x = [x_i*random.randrange(8,12)/10 for x_i in range(n)]
y = [y_i*random.randrange(10,14)/10 for y_i in range(n)]

slope, intercept = cy_lstsqr(x, y)

line_x = [round(min(x)) - 1, round(max(x)) + 1]
line_y = [slope*x_i + intercept for x_i in line_x]

plt.figure(figsize=(8,8))
plt.scatter(x,y)
plt.plot(line_x, line_y, color='red', lw='2')

plt.ylabel('y')
plt.xlabel('x')
plt.title('Linear regression via least squares fit')

ftext = 'y = ax + b = {:.3f} + {:.3f}x'\
        .format(slope, intercept)
plt.figtext(.15,.8, ftext, fontsize=11, ha='left')

plt.show()