#!/usr/bin/env python
# coding: utf-8

# > This is one of the 100 recipes of the [IPython Cookbook](http://ipython-books.github.io/), the definitive guide to high-performance scientific computing and data science in Python.
# 

# # 4.8. Making efficient selections in arrays with NumPy

# In[ ]:


import numpy as np


# In[ ]:


id = lambda x: x.__array_interface__['data'][0]


# We create a large array.

# In[ ]:


n, d = 100000, 100


# In[ ]:


a = np.random.random_sample((n, d)); aid = id(a)


# ## Array views and fancy indexing

# We take a selection using two different methods: with a view and with fancy indexing.

# In[ ]:


b1 = a[::10]
b2 = a[np.arange(0, n, 10)]


# In[ ]:


np.array_equal(b1, b2)


# The view refers to the original data buffer, whereas fancy indexing yields a copy.

# In[ ]:


id(b1) == aid, id(b2) == aid


# Fancy indexing is several orders of magnitude slower as it involves copying a large array. Fancy indexing is more general as it allows to select any portion of an array (using any list of indices), not just a strided selection.

# In[ ]:


get_ipython().run_line_magic('timeit', 'a[::10]')


# In[ ]:


get_ipython().run_line_magic('timeit', 'a[np.arange(0, n, 10)]')


# ## Alternatives to fancy indexing: list of indices

# Given a list of indices, there are two ways of selecting the corresponding sub-array: fancy indexing, or the np.take function.

# In[ ]:


i = np.arange(0, n, 10)


# In[ ]:


b1 = a[i]
b2 = np.take(a, i, axis=0)


# In[ ]:


np.array_equal(b1, b2)


# In[ ]:


get_ipython().run_line_magic('timeit', 'a[i]')


# In[ ]:


get_ipython().run_line_magic('timeit', 'np.take(a, i, axis=0)')


# Using np.take instead of fancy indexing is faster.

# **Note**: Performance of fancy indexing has been improved in recent versions of NumPy; this trick is especially useful on older versions of NumPy.

# ## Alternatives to fancy indexing: mask of booleans

# Let's create a mask of booleans, where each value indicates whether the corresponding row needs to be selected in x.

# In[ ]:


i = np.random.random_sample(n) < .5


# The selection can be made using fancy indexing or the np.compress function.

# In[ ]:


b1 = a[i]
b2 = np.compress(i, a, axis=0)


# In[ ]:


np.array_equal(b1, b2)


# In[ ]:


get_ipython().run_line_magic('timeit', 'a[i]')


# In[ ]:


get_ipython().run_line_magic('timeit', 'np.compress(i, a, axis=0)')


# Once again, the alternative method to fancy indexing is faster.

# > You'll find all the explanations, figures, references, and much more in the book (to be released later this summer).
# 
# > [IPython Cookbook](http://ipython-books.github.io/), by [Cyrille Rossant](http://cyrille.rossant.net), Packt Publishing, 2014 (500 pages).