NumPy

Credits: Forked from Parallel Machine Learning with scikit-learn and IPython by Olivier Grisel

  • NumPy Arrays, dtype, and shape
  • Common Array Operations
  • Reshape and Update In-Place
  • Combine Arrays
  • Create Sample Data
In [1]:
import numpy as np

NumPy Arrays, dtypes, and shapes

In [2]:
a = np.array([1, 2, 3])
print(a)
print(a.shape)
print(a.dtype)
[1 2 3]
(3,)
int64
In [3]:
b = np.array([[0, 2, 4], [1, 3, 5]])
print(b)
print(b.shape)
print(b.dtype)
[[0 2 4]
 [1 3 5]]
(2, 3)
int64
In [4]:
np.zeros(5)
Out[4]:
array([ 0.,  0.,  0.,  0.,  0.])
In [5]:
np.ones(shape=(3, 4), dtype=np.int32)
Out[5]:
array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int32)

Common Array Operations

In [6]:
c = b * 0.5
print(c)
print(c.shape)
print(c.dtype)
[[ 0.   1.   2. ]
 [ 0.5  1.5  2.5]]
(2, 3)
float64
In [7]:
d = a + c
print(d)
[[ 1.   3.   5. ]
 [ 1.5  3.5  5.5]]
In [8]:
d[0]
Out[8]:
array([ 1.,  3.,  5.])
In [9]:
d[0, 0]
Out[9]:
1.0
In [10]:
d[:, 0]
Out[10]:
array([ 1. ,  1.5])
In [11]:
d.sum()
Out[11]:
19.5
In [12]:
d.mean()
Out[12]:
3.25
In [13]:
d.sum(axis=0)
Out[13]:
array([  2.5,   6.5,  10.5])
In [14]:
d.mean(axis=1)
Out[14]:
array([ 3. ,  3.5])

Reshape and Update In-Place

In [15]:
e = np.arange(12)
print(e)
[ 0  1  2  3  4  5  6  7  8  9 10 11]
In [16]:
# f is a view of contents of e
f = e.reshape(3, 4)
print(f)
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
In [17]:
# Set values of e from index 5 onwards to 0
e[5:] = 0
print(e)
[0 1 2 3 4 0 0 0 0 0 0 0]
In [18]:
# f is also updated
f
Out[18]:
array([[0, 1, 2, 3],
       [4, 0, 0, 0],
       [0, 0, 0, 0]])
In [19]:
# OWNDATA shows f does not own its data
f.flags
Out[19]:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

Combine Arrays

In [20]:
a
Out[20]:
array([1, 2, 3])
In [21]:
b
Out[21]:
array([[0, 2, 4],
       [1, 3, 5]])
In [22]:
d
Out[22]:
array([[ 1. ,  3. ,  5. ],
       [ 1.5,  3.5,  5.5]])
In [23]:
np.concatenate([a, a, a])
Out[23]:
array([1, 2, 3, 1, 2, 3, 1, 2, 3])
In [24]:
# Use broadcasting when needed to do this automatically
np.vstack([a, b, d])
Out[24]:
array([[ 1. ,  2. ,  3. ],
       [ 0. ,  2. ,  4. ],
       [ 1. ,  3. ,  5. ],
       [ 1. ,  3. ,  5. ],
       [ 1.5,  3.5,  5.5]])
In [25]:
# In machine learning, useful to enrich or 
# add new/concatenate features with hstack
np.hstack([b, d])
Out[25]:
array([[ 0. ,  2. ,  4. ,  1. ,  3. ,  5. ],
       [ 1. ,  3. ,  5. ,  1.5,  3.5,  5.5]])

Create Sample Data

In [26]:
%matplotlib inline

import pylab as plt
import seaborn

seaborn.set()
In [27]:
# Create evenly spaced numbers over the specified interval
x = np.linspace(0, 2, 10)
plt.plot(x, 'o-');
plt.show()
In [28]:
# Create sample data, add some noise
x = np.random.uniform(1, 100, 1000)
y = np.log(x) + np.random.normal(0, .3, 1000)

plt.scatter(x, y)
plt.show()