NumPy is a Python library providing support for large, multi-dimensional arrays and matrices, along with a large collection of mathematical functions to operate on these arrays.
It is the fundamental package for scientific computing with Python.
# Import the NumPy package under the alias "np"
import numpy as np
In the context of data science, a tensor is a set of primitive values (almost always numbers) shaped into an array of any number of dimensions.
Tensors are the core data structures for machine learning.
x = np.array(12)
print(x)
print('Dimensions: ' + str(x.ndim))
print('Shape: ' + str(x.shape))
12 Dimensions: 0 Shape: ()
x = np.array([12, 3, 6, 14])
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[12 3 6 14] Dimensions: 1 Shape: (4,)
x = np.array([[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1],
[7, 80, 4, 36, 2]])
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[ 5 78 2 34 0] [ 6 79 3 35 1] [ 7 80 4 36 2]] Dimensions: 2 Shape: (3, 5)
x = np.array([[[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1]],
[[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1]],
[[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1]]])
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[[ 5 78 2 34 0] [ 6 79 3 35 1]] [[ 5 78 2 34 0] [ 6 79 3 35 1]] [[ 5 78 2 34 0] [ 6 79 3 35 1]]] Dimensions: 3 Shape: (3, 2, 5)
The number of entries along a specific axis is also called dimension, which can be somewhat confusing.
A 3 dimensions vector is not the same as a 3 dimensions tensor.
x = np.array([12, 3, 6]) # x is a 3 dimensions vector (1D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[12 3 6] Dimensions: 1 Shape: (3,)
x = np.array([[12, 3, 6, 14]]) # x is a one row matrix (2D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[12 3 6 14]] Dimensions: 2 Shape: (1, 4)
x = np.array([[12], [3], [6], [14]]) # x is a one column matrix (2D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[12] [ 3] [ 6] [14]] Dimensions: 2 Shape: (4, 1)
x = np.array([[12, 3, 6, 14]])
x = np.squeeze(x) # x is now a vector (1D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[12 3 6 14] Dimensions: 1 Shape: (4,)
x = np.array([[12], [3], [6], [14]])
x = np.squeeze(x) # x is now a vector (1D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[12 3 6 14] Dimensions: 1 Shape: (4,)
# Reshape a (3, 2) matrix into a (2, 3) matrix
x = np.array([[1, 2],
[3, 4],
[5, 6]])
x = x.reshape(2, 3)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[1 2 3] [4 5 6]] Dimensions: 2 Shape: (2, 3)
# Reshape a matrix into a vector
x = np.array([[1, 2],
[3, 4],
[5, 6]])
x = x.reshape(6, )
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[1 2 3 4 5 6] Dimensions: 1 Shape: (6,)
# Reshape a 3D tensor into a matrix
x = np.array([[[5, 6],
[7, 8]],
[[9, 10],
[11, 12]],
[[13, 14],
[15, 16]]])
print ('Original dimensions: ' + str(x.ndim))
print ('Original shape: ' + str(x.shape))
x = x.reshape(3, 2*2)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
Original dimensions: 3 Original shape: (3, 2, 2) [[ 5 6 7 8] [ 9 10 11 12] [13 14 15 16]] Dimensions: 2 Shape: (3, 4)
# Add a dimension to a vector, turning it into a row matrix
x = np.array([1, 2, 3])
print ('Original dimensions: ' + str(x.ndim))
print ('Original shape: ' + str(x.shape))
x = x[np.newaxis, :]
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
Original dimensions: 1 Original shape: (3,) [[1 2 3]] Dimensions: 2 Shape: (1, 3)
# Add a dimension to a vector, turning it into a column matrix
x = np.array([1, 2, 3])
print ('Original dimensions: ' + str(x.ndim))
print ('Original shape: ' + str(x.shape))
x = x[:, np.newaxis]
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
Original dimensions: 1 Original shape: (3,) [[1] [2] [3]] Dimensions: 2 Shape: (3, 1)
# Transpose a vector (no effect)
x = np.array([12, 3, 6, 14])
x = x.T # alternative syntax: x = np.transpose(x)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[12 3 6 14] Dimensions: 1 Shape: (4,)
# Transpose a matrix
x = np.array([[5, 78, 2, 34],
[6, 79, 3, 35],
[7, 80, 4, 36]])
x = x.T
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[ 5 6 7] [78 79 80] [ 2 3 4] [34 35 36]] Dimensions: 2 Shape: (4, 3)
# Slice a vector
x = np.array([1, 2, 3, 4, 5, 6, 7])
print(x[:3])
print(x[3:])
[1 2 3] [4 5 6 7]
# Slice a matrix
x = np.array([[5, 78, 2, 34],
[6, 79, 3, 35],
[7, 80, 4, 36]])
print(x[:2, :])
print(x[2:, :])
print(x[:, :2])
print(x[:, 2:])
[[ 5 78 2 34] [ 6 79 3 35]] [[ 7 80 4 36]] [[ 5 78] [ 6 79] [ 7 80]] [[ 2 34] [ 3 35] [ 4 36]]
NumPy provides several useful functions for initializing tensors with particular values.
x = np.zeros(3)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[0. 0. 0.] Dimensions: 1 Shape: (3,)
x = np.zeros((3,4))
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[0. 0. 0. 0.] [0. 0. 0. 0.] [0. 0. 0. 0.]] Dimensions: 2 Shape: (3, 4)
Values are sampled from a "normal" (Gaussian) distribution
x = np.random.randn(5,2)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[-0.71322934 -0.20855806] [ 0.01650635 -1.17925907] [ 0.5661353 0.88020701] [ 0.00578376 0.91440026] [-0.36185677 0.46671169]] Dimensions: 2 Shape: (5, 2)
Element-wise operations are applied independently to each entry in the tensors being considered.
Other operations, like dot product, combine entries in the input tensors to produce a differently shaped result.
# Element-wise addition between two vectors
x = np.array([2, 5, 4])
y = np.array([1, -1, 4])
z = x + y
print(z)
print ('Dimensions: ' + str(z.ndim))
print ('Shape: ' + str(z.shape))
[3 4 8] Dimensions: 1 Shape: (3,)
# Element-wise product between two matrices (shapes must be identical)
x = np.array([[1, 2, 3],
[3, 2, 1]])
y = np.array([[3, 0, 2],
[1, 4, -2]])
z = x * y
print(z)
print ('Dimensions: ' + str(z.ndim))
print ('Shape: ' + str(z.shape))
[[ 3 0 6] [ 3 8 -2]] Dimensions: 2 Shape: (2, 3)
# Dot product between two matrices (shapes must be compatible)
x = np.array([[1, 2, 3],
[3, 2, 1]]) # x has shape (2, 3)
y = np.array([[3, 0],
[2, 1],
[4, -2]]) # y has shape (3, 2)
z = np.dot(x, y) # alternative syntax: z = x.dot(y)
print(z)
print ('Dimensions: ' + str(z.ndim))
print ('Shape: ' + str(z.shape))
[[19 -4] [17 0]] Dimensions: 2 Shape: (2, 2)
Broadcasting is a powerful NumPy functionality.
If there is no ambiguity, the smaller tensor can be "broadcasted" implicitly to match the larger tensor's shape before an operation is applied to them.
x = np.array([12, 3, 6, 14])
x = x + 3
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[15 6 9 17] Dimensions: 1 Shape: (4,)
x = np.array([[0, 1, 2],
[-2, 5, 3]])
x = x - 1
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[-1 0 1] [-3 4 2]] Dimensions: 2 Shape: (2, 3)
x = np.array([[0, 1, 2],
[-2, 5, 3]])
y = np.array([1, 2, 3])
z = x + y
print(z)
print ('Dimensions: ' + str(z.ndim))
print ('Shape: ' + str(z.shape))
[[ 1 3 5] [-1 7 6]] Dimensions: 2 Shape: (2, 3)
x = np.array([[0, 1, 2],
[-2, 5, 3]])
x = np.sum(x) # x is now a scalar (0D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
9 Dimensions: 0 Shape: ()
# Sums a matrix on its first axis (rows)
x = np.array([[0, 1, 2],
[-2, 5, 3]])
x = np.sum(x, axis=0) # x is now a vector (1D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[-2 6 5] Dimensions: 1 Shape: (3,)
# Sums a matrix on its second axis (columns)
x = np.array([[0, 1, 2],
[-2, 5, 3]])
x = np.sum(x, axis=1) # x is now a vector (1D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[3 6] Dimensions: 1 Shape: (2,)
# Sums a matrix on its second axis (columns), keeping the same dimensions
x = np.array([[0, 1, 2],
[-2, 5, 3]])
x = np.sum(x, axis=0, keepdims=True) # x is still a matrix (2D tensor)
print(x)
print ('Dimensions: ' + str(x.ndim))
print ('Shape: ' + str(x.shape))
[[-2 6 5]] Dimensions: 2 Shape: (1, 3)
x = np.random.randn(3,4)
print(x)
print("Mean: " + str(x.mean(axis=0)))
print("Standard deviation: " + str(x.std(axis=0)))
x -= x.mean(axis=0)
x /= x.std(axis=0)
print(x)
print("Final mean: " + str(x.mean(axis=0)))
print("Final standard deviation: " + str(x.std(axis=0)))
[[-0.68306552 -0.51606135 -0.03604431 -0.18932272] [ 1.10231885 -0.23115393 -0.03278724 0.43574405] [ 1.08069115 -0.5709149 1.10707078 2.13235445]] Mean: [ 0.49998149 -0.43937672 0.34607975 0.79292526] Standard deviation: [0.83658716 0.14892902 0.53810356 0.98089416] [[-1.4141348 -0.5149072 -0.71013106 -1.00138019] [ 0.71999354 1.39813451 -0.70407819 -0.36413838] [ 0.69414125 -0.88322731 1.41420924 1.36551857]] Final mean: [ 7.40148683e-17 3.33066907e-16 -7.40148683e-17 1.48029737e-16] Final standard deviation: [1. 1. 1. 1.]