In addition to numpy
, we need a module to handle sparse matrics. scipy.sparse
has all of what we need.
import numpy as np
import scipy.sparse
from random import randrange
n = 9
A = np.zeros((n, n))
for i in range((int(n**2*0.3))):
A[randrange(n), randrange(n)] = np.random.randn()
A.round(2)
array([[ 0. , 0. , 0.23, 0. , 0. , 0.13, 0.91, -1.29, 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , -1.24, 1.46], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0.32, 0. , 0. , 0. , -0.67, 0. , 0. , 0. , -0.26], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0.61, 0. , 1.2 ], [-0.01, 0.84, 0. , 0. , 0.23, 0. , 0. , 0. , 1.02], [ 0. , -1.09, 0. , 0. , 0. , 0. , 0. , 0. , -1.43], [ 0. , -0.27, 0. , 0. , 0. , 0. , -1.65, 1.39, 0. ]])
Now convert this to a CSR matrix:
Acsr = scipy.sparse.csr_matrix(A)
Acsr
<9x9 sparse matrix of type '<class 'numpy.float64'>' with 20 stored elements in Compressed Sparse Row format>
Acsr.indptr
array([ 0, 4, 6, 6, 9, 9, 11, 15, 17, 20], dtype=int32)
Acsr.indices
array([2, 5, 6, 7, 7, 8, 0, 4, 8, 6, 8, 0, 1, 4, 8, 1, 8, 1, 6, 7], dtype=int32)
Acsr.data.round(2)
array([ 0.23, 0.13, 0.91, -1.29, -1.24, 1.46, 0.32, -0.67, -0.26, 0.61, 1.2 , -0.01, 0.84, 0.23, 1.02, -1.09, -1.43, -0.27, -1.65, 1.39])
# in gigabytes:
10**6 * 10**6 * 8 / 1e9
# i.e. 8 Terabytes
8000.0
occupied = (
10**6 * 4 # (32-bit integers to store row indices)
+ 10**6 * 4 # (one 32-bit integer per row to store column index)
+ 10**6 * 8 # (one 64-bit double precision number per row to store value)
)
# in gigabytes
occupied / 1e9
# I.e. 16 Megabytes
0.016