作者:Robert Cimrman
%matplotlib inline
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 1e6, 10)
plt.plot(x, 8.0 * (x**2) / 1e6, lw=5)
plt.xlabel('size n')
plt.ylabel('memory [MB]')
<matplotlib.text.Text at 0x1122505c0>
import numpy as np
import scipy.sparse as sparse
import matplotlib.pyplot as plt
所有scipy.sparse类都是spmatrix的子类
属性:
数据通常储存在Numpy数组中
data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
data
array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
offsets = np.array([0, -1, 2])
mtx = sparse.dia_matrix((data, offsets), shape=(4, 4))
mtx
<4x4 sparse matrix of type '<class 'numpy.int64'>' with 9 stored elements (3 diagonals) in DIAgonal format>
mtx.todense()
matrix([[1, 0, 3, 0], [1, 2, 0, 4], [0, 2, 3, 0], [0, 0, 3, 4]])
data = np.arange(12).reshape((3, 4)) + 1
data
array([[ 1, 2, 3, 4], [ 5, 6, 7, 8], [ 9, 10, 11, 12]])
mtx = sparse.dia_matrix((data, offsets), shape=(4, 4))
mtx.data
array([[ 1, 2, 3, 4], [ 5, 6, 7, 8], [ 9, 10, 11, 12]])
mtx.offsets
array([ 0, -1, 2], dtype=int32)
print(mtx)
(0, 0) 1 (1, 1) 2 (2, 2) 3 (3, 3) 4 (1, 0) 5 (2, 1) 6 (3, 2) 7 (0, 2) 11 (1, 3) 12
mtx.todense()
matrix([[ 1, 0, 11, 0], [ 5, 2, 0, 12], [ 0, 6, 3, 0], [ 0, 0, 7, 4]])
偏移: 行
2: 9
1: --10------
0: 1 . 11 .
-1: 5 2 . 12
-2: . 6 3 .
-3: . . 7 4
---------8
vec = np.ones((4, ))
vec
array([ 1., 1., 1., 1.])
mtx * vec
array([ 12., 19., 9., 11.])
mtx.toarray() * vec
array([[ 1., 0., 11., 0.], [ 5., 2., 0., 12.], [ 0., 6., 3., 0.], [ 0., 0., 7., 4.]])
mtx = sparse.lil_matrix((4, 5))
from numpy.random import rand
data = np.round(rand(2, 3))
data
array([[ 1., 1., 1.], [ 0., 1., 1.]])
mtx[:2, [1, 2, 3]] = data
mtx
<4x5 sparse matrix of type '<class 'numpy.float64'>' with 5 stored elements in LInked List format>
print(mtx)
(0, 1) 1.0 (0, 2) 1.0 (0, 3) 1.0 (1, 2) 1.0 (1, 3) 1.0
mtx.todense()
matrix([[ 0., 1., 1., 1., 0.], [ 0., 0., 1., 1., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.]])
mtx.toarray()
array([[ 0., 1., 1., 1., 0.], [ 0., 0., 1., 1., 0.], [ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.]])
更多的切片和索引:
mtx = sparse.lil_matrix([[0, 1, 2, 0], [3, 0, 1, 0], [1, 0, 0, 1]])
mtx.todense()
matrix([[0, 1, 2, 0], [3, 0, 1, 0], [1, 0, 0, 1]], dtype=int64)
print(mtx)
(0, 1) 1 (0, 2) 2 (1, 0) 3 (1, 2) 1 (2, 0) 1 (2, 3) 1
mtx[:2, :]
<2x4 sparse matrix of type '<class 'numpy.int64'>' with 4 stored elements in LInked List format>
mtx[:2, :].todense()
matrix([[0, 1, 2, 0], [3, 0, 1, 0]], dtype=int64)
mtx[1:2, [0,2]].todense()
matrix([[3, 1]], dtype=int64)
mtx.todense()
matrix([[0, 1, 2, 0], [3, 0, 1, 0], [1, 0, 0, 1]], dtype=int64)
mtx = sparse.dok_matrix((5, 5), dtype=np.float64)
mtx
<5x5 sparse matrix of type '<class 'numpy.float64'>' with 0 stored elements in Dictionary Of Keys format>
for ir in range(5):
for ic in range(5):
mtx[ir, ic] = 1.0 * (ir != ic)
mtx
<5x5 sparse matrix of type '<class 'numpy.float64'>' with 20 stored elements in Dictionary Of Keys format>
mtx.todense()
matrix([[ 0., 1., 1., 1., 1.], [ 1., 0., 1., 1., 1.], [ 1., 1., 0., 1., 1.], [ 1., 1., 1., 0., 1.], [ 1., 1., 1., 1., 0.]])
mtx[1, 1]
0.0
mtx[1, 1:3]
<1x2 sparse matrix of type '<class 'numpy.float64'>' with 1 stored elements in Dictionary Of Keys format>
mtx[1, 1:3].todense()
matrix([[ 0., 1.]])
mtx[[2,1], 1:3].todense()
matrix([[ 1., 0.], [ 0., 1.]])
data[i]
是在 (row[i], col[i]) 位置的值_data_matrix
的子类 (带有data
属性的稀疏矩阵类)(data, ij)
元组mtx = sparse.coo_matrix((3, 4), dtype=np.int8)
mtx.todense()
matrix([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8)
row = np.array([0, 3, 1, 0])
col = np.array([0, 3, 1, 2])
data = np.array([4, 5, 7, 9])
mtx = sparse.coo_matrix((data, (row, col)), shape=(4, 4))
mtx
<4x4 sparse matrix of type '<class 'numpy.int64'>' with 4 stored elements in COOrdinate format>
mtx.todense()
matrix([[4, 0, 9, 0], [0, 7, 0, 0], [0, 0, 0, 0], [0, 0, 0, 5]])
indices
, indptr
, data
indices
是列索引的数组data
是对应的非零值数组indptr
指向行开始的所以和数据n_row + 1
, 最后一个项目 = 值数量 = indices
和data
的长度indices[indptr[i]:indptr[i+1]]
的data[indptr[i]:indptr[i+1]]
data[indptr[i]+k]
, k是j在indices[indptr[i]:indptr[i+1]]
的位置来访问_cs_matrix
(常规 CSR/CSC 功能) 的子类_data_matrix
(带有data
属性的稀疏矩阵类) 的子类(data, ij)
元组(data, indices, indptr)
元组mtx = sparse.csr_matrix((3, 4), dtype=np.int8)
mtx.todense()
matrix([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8)
(data, ij)
元组创建:row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
mtx = sparse.csr_matrix((data, (row, col)), shape=(3, 3))
mtx
<3x3 sparse matrix of type '<class 'numpy.int64'>' with 6 stored elements in Compressed Sparse Row format>
mtx.todense()
matrix([[1, 0, 2], [0, 0, 3], [4, 5, 6]], dtype=int64)
mtx.data
array([1, 2, 3, 4, 5, 6], dtype=int64)
mtx.indices
array([0, 2, 2, 0, 1, 2], dtype=int32)
mtx.indptr
array([0, 2, 3, 6], dtype=int32)
用(data, indices, indptr)
元组创建:
data = np.array([1, 2, 3, 4, 5, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
indptr = np.array([0, 2, 3, 6])
mtx = sparse.csr_matrix((data, indices, indptr), shape=(3, 3))
mtx.todense()
matrix([[1, 0, 2], [0, 0, 3], [4, 5, 6]])
indices
、indptr
、data
indices
是行索引的数组data
是对应的非零值indptr
指向indices
和data
开始的列n_col + 1
, 最后一个条目 = 值数量 = indices
和data
的长度indices[indptr[i]:indptr[i+1]]
的data[indptr[i]:indptr[i+1]]
data[indptr[j]+k]
访问, k是i在indices[indptr[j]:indptr[j+1]]
的位置_cs_matrix
的子类 (通用的 CSR/CSC 功能性)_data_matrix
的子类 (带有data
属性的稀疏矩阵类)(data, ij)
元组(data, indices, indptr)
元组mtx = sparse.csc_matrix((3, 4), dtype=np.int8)
mtx.todense()
matrix([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8)
(data, ij)
元组创建:row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
mtx = sparse.csc_matrix((data, (row, col)), shape=(3, 3))
mtx
<3x3 sparse matrix of type '<class 'numpy.int64'>' with 6 stored elements in Compressed Sparse Column format>
mtx.todense()
matrix([[1, 0, 2], [0, 0, 3], [4, 5, 6]], dtype=int64)
mtx.data
array([1, 4, 5, 2, 3, 6], dtype=int64)
mtx.indices
array([0, 2, 2, 0, 1, 2], dtype=int32)
mtx.indptr
array([0, 2, 3, 6], dtype=int32)
(data, indices, indptr)
元组创建:data = np.array([1, 4, 5, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
indptr = np.array([0, 2, 3, 6])
mtx = sparse.csc_matrix((data, indices, indptr), shape=(3, 3))
mtx.todense()
matrix([[1, 0, 2], [0, 0, 3], [4, 5, 6]])
(R, C)
必须可以整除矩阵的形状(M, N)
indices
、indptr
、data
indices
是每个块列索引的数组data
是形状为(nnz, R, C)对应的非零值_cs_matrix
的子类 (通用的CSR/CSC功能性)_data_matrix
的子类 (带有data
属性的稀疏矩阵类)(data, ij)
元组(data, indices, indptr)
元组(1, 1)
块大小的(类似CSR...)的BSR矩阵:mtx = sparse.bsr_matrix((3, 4), dtype=np.int8)
mtx
<3x4 sparse matrix of type '<class 'numpy.int8'>' with 0 stored elements (blocksize = 1x1) in Block Sparse Row format>
mtx.todense()
matrix([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8)
(3, 2)
的空BSR矩阵:mtx = sparse.bsr_matrix((3, 4), blocksize=(3, 2), dtype=np.int8)
mtx
<3x4 sparse matrix of type '<class 'numpy.int8'>' with 0 stored elements (blocksize = 3x2) in Block Sparse Row format>
- 一个bug?
(1, 1)
块大小 (类似 CSR...)(data, ij)
的元组创建:row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
mtx = sparse.bsr_matrix((data, (row, col)), shape=(3, 3))
mtx
<3x3 sparse matrix of type '<class 'numpy.int64'>' with 6 stored elements (blocksize = 1x1) in Block Sparse Row format>
mtx.todense()
matrix([[1, 0, 2], [0, 0, 3], [4, 5, 6]], dtype=int64)
mtx.indices
array([0, 2, 2, 0, 1, 2], dtype=int32)
mtx.indptr
array([0, 2, 3, 6], dtype=int32)
(2, 1)
块大小(data, indices, indptr)
的元组创建:indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2)
mtx = sparse.bsr_matrix((data, indices, indptr), shape=(6, 6))
mtx.todense()
matrix([[1, 1, 0, 0, 2, 2], [1, 1, 0, 0, 2, 2], [0, 0, 0, 0, 3, 3], [0, 0, 0, 0, 3, 3], [4, 4, 5, 5, 6, 6], [4, 4, 5, 5, 6, 6]])
data
array([[[1, 1], [1, 1]], [[2, 2], [2, 2]], [[3, 3], [3, 3]], [[4, 4], [4, 4]], [[5, 5], [5, 5]], [[6, 6], [6, 6]]])
存储机制的总结
格式 | 矩阵 * 向量 | 提取项目 | 灵活提取 | 设置项目 | 灵活设置 | 求解器 | 备注 |
---|---|---|---|---|---|---|---|
DIA | sparsetools | . | . | . | . | 迭代 | 有数据数组,专门化 |
LIL | 通过 CSR | 是 | 是 | 是 | 是 | 迭代 | 通过CSR的算术, 增量构建 |
DOK | python | 是 | 只有一个轴 | 是 | 是 | 迭代 | O(1) 条目访问, 增量构建 |
COO | sparsetools | . | . | . | . | 迭代 | 有数据数组, 便利的快速转换 |
CSR | sparsetools | 是 | 是 | 慢 | . | 任何 | 有数据数组, 快速以行为主的操作 |
CSC | sparsetools | 是 | 是 | 慢 | . | 任何 | 有数据数组, 快速以列为主的操作 |
BSR | sparsetools | . | . | . | . | 专门化 | 有数据数组,专门化 |