#!/usr/bin/env python
# coding: utf-8

# # 张量的存储

# > 本文内容整理自书籍《Deep Learning with PyTorch》 https://pytorch.org/deep-learning-with-pytorch-thank-you

# 本章涵盖
# + 张量，PyTorch中的基本数据结构
# + 索引并在PyTorch张量上进行操作以探索和处理数据
# + 与NumPy多维数组互操作
# + 将计算移至GPU以提高速度

# In[1]:


import numpy as np
import torch


# ## 张量的存储的形状、步长、偏移量

# In[11]:


points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]])
points


# In[12]:


points.shape


# In[14]:


points[0]


# In[13]:


points[0, 1], points[0][1]


# 尽管张量报告自己有三行两列，但它的底层是一个大小为6的连续数组。从这个意义上说，张量知道如何把一对指标转换成存储中的一个位置。

# In[15]:


points.storage()


# 你也可以手动索引到一个存储:

# In[17]:


points.storage()[0]


# 你不能用两个指标来索引一个二维张量的存储。存储的布局总是一维的，而与可能涉及到它的任何张量的维数无关。
# 
# 在这一点上，改变存储的值就会改变其引用张量的内容，这并不奇怪:

# In[16]:


points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]]) 
points_storage = points.storage() 
points_storage[0] = 2.0 


# ### 形状

# In[24]:


points.shape, points[0].shape, points[0][0].shape


# In[27]:


points.size(), points[0].size(), points[0][0].size()


# ### 步长

# In[26]:


points.stride(), points[0].stride(), points[0][0].stride()


# ### 偏移量 

# In[21]:


points, points[0], points[0][0]


# 这种张量和存储之间的间接性导致了一些操作，比如转置一个张量或者提取一个次张量，这些操作是便宜的，因为它们不会导致内存的重新分配；而是，它们包括分配一个新的张量对象，这个张量对象的形状、存储偏移量或步长有不同的值。

# In[20]:


points.storage_offset(), points[0].storage_offset(), points[0][0].storage_offset()


# In[22]:


points[1], points[1][0], points[1][1]


# In[23]:


points[1].storage_offset(), points[1][0].storage_offset(), points[1][1].storage_offset()


# 在二维张量中访问元素i, j的结果是访问存储中的storage_offset + stride[0] * i + stride[1] * j元素。

# ### 子张量的副作用

# In[28]:


second_point = points[1] 
second_point[0] = 10.0 
points


# 这种效果可能并不总是可取的，所以你最终可以把次张量克隆成一个新的张量:

# In[29]:


second_point = points[1].clone() 
second_point[0] = 20.0 
points


# ## 张量的视图与存储

# ### 转置不改变张量的存储

# In[36]:


points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]]) 
points


# In[37]:


points_t = points.t() 
points_t


# In[38]:


id(points.storage()) == id(points_t.storage())


# In[39]:


points.storage()


# 它们只是在形状和步幅上不同:

# In[40]:


points.stride()


# In[41]:


points_t.stride()


# In[44]:


points[0, 1].storage_offset()


# In[45]:


points_t[0, 1].storage_offset()


# 在pytorch中，只有很少几个操作是不改变tensor的内容本身，而只是重新定义下标与元素的对应关系的。换句话说，这种操作不进行数据拷贝和数据的改变，变的是元数据。
# 
# 这些操作是：narrow()，view()，expand()和transpose()

# 举个例子，在使用transpose()进行转置操作时，pytorch并不会创建新的、转置后的tensor，而是修改了tensor中的一些属性（也就是元数据），使得此时的offset和stride是与转置tensor相对应的。转置的tensor和原tensor的内存是共享的！

# In[2]:


tensor_A = torch.tensor([
        [[ 0,  6, 12, 18],
         [ 2,  8, 14, 20],
         [ 4, 10, 16, 22]],

        [[ 1,  7, 13, 19],
         [ 3,  9, 15, 21],
         [ 5, 11, 17, 23]]])

tensor_A


# 在存储数据时，内存并不支持这个维度层级概念，只能以平铺方式按序写入内存，因此这 种层级关系需要人为管理，也就是说，每个张量的存储顺序需要人为跟踪。为了方便表达，我们把张量 shape 中相对靠左侧的维度叫做大维度，shape 中相对靠右侧的维度叫做小维度，比如[2, 3, 4]的张量中，图片数量维度与通道数量相比，图片数量叫做大维度，通道 数叫做小维度。在优先写入小维度的设定下，形状（2, 3, 4）张量的内存布局为：

# In[3]:


tensor_A.storage()


# In[4]:


tensor_A.stride()


# In[5]:


tensor_B = torch.tensor(np.reshape(np.arange(2*3*4), (4, 3, 2)))
tensor_B


# In[6]:


tensor_B.storage()


# In[9]:


tensor_B.stride()


# 这种张量和存储之间的间接性导致了一些操作，比如转置一个张量或者提取一个次张量，这些操作是便宜的，因为它们不会导致内存的重新分配；而是，它们包括分配一个新的张量对象，这个张量对象的形状、存储偏移量或步长有不同的值。

# In[7]:


tensor_B_transpose = tensor_B.transpose(0, 2)
tensor_B_transpose


# In[8]:


tensor_B_transpose.storage() # 与 tensor_B.storage() 相同


# In[9]:


tensor_B_transpose.stride() # 与 tensor_B 不同


# ### contiguous

# 经过上述transpose操作后得到的tensor_B_transpose，它内部数据的布局方式和从头开始创建一个这样的常规的tensor的布局方式是不一样的！于是，这就有contiguous()的用武之地了。在上面的例子中，tensor_B是contiguous的，但tensor_B_transpose不是（因为内部数据不是通常的布局方式）。注意不要被contiguous的字面意思“连续的”误解，tensor中数据还是在内存中一块区域里，只是布局的问题！当调用contiguous()时，会强制拷贝一份tensor，让它的布局和从头创建的一样。

# In[46]:


tensor_B_transpose_contiguous = tensor_B_transpose.contiguous()
tensor_B_transpose_contiguous


# In[47]:


tensor_B_transpose_contiguous.storage() # 与 tensor_A.storage() 一样


# In[48]:


tensor_B_transpose_contiguous.stride()


# ### 张量的视图与存储的关系总结
# 
# **联系**
# 
# 对于形状 shape 为(d1, d2,.., dn)的张量的视图中的元素E(e1, e2,...,en)，如果该张量的存储的步长为 stride 为 (s1, s2,...,sn) 、存储偏移量storage offset 为 s_o，那么元素E的存储位置index是：
# $$index((e1, e2,...,en)) = s\_o + s1 * e1 + s2 * e2 + ... + sn *en$$
# 
# **区别**
# 
# + 相同存储可以有不同的视图：tensor_B.storage() 与 tensor_B_transpose.storage() 相同，但是 tensor_B 与 tensor_B_transpose 不同。
# + 相同的视图可以有不同的存储：tensor_A 与 tensor_B_transpose 相同，但是 tensor_A.storage() 与 tensor_B_transpose.storage() 不同。
# 
# 总结：张量的视图与存储通过索引来建立关系，它们之间没有必然性，即相同存储可以有不同的视图，相同的视图可以有不同的存储。

# ## 张量的类型

# + torch.FloatTensor | torch.float32 or torch.float—32-bit floating-point 
# + torch.DoubleTensor | torch.float64 or torch.double—64-bit, double-precision floating-point 
# + torch.HalfTensor | torch.float16 or torch.half—16-bit, half-precision floating-point 
# + torch.CharTensor | torch.int8—Signed 8-bit integers 
# + torch.ByteTensor | torch.uint8—Unsigned 8-bit integers 
# + torch.int16 or torch.short—Signed 16-bit integers 
# + torch.int32 or torch.int—Signed 32-bit integers 
# + torch.int64 or torch.long—Signed 64-bit integers

# 要分配一个正确的数字类型的张量，你可以指定正确的dtype作为构造函数的参数，如下所示:

# In[52]:


double_points = torch.ones(10, 2, dtype=torch.double) 
short_points = torch.tensor([[1, 2], [3, 4]], dtype=torch.short)


# 你可以通过访问相应的属性来找到一个张量的d类型:

# In[53]:


short_points.dtype


# 您还可以通过使用相应的转换方法(例如，转换)，将张量创建函数的输出转换为正确的类型

# In[54]:


double_points = torch.zeros(10, 2).double() 
short_points = torch.ones(10, 2).short()


# 或者更方便的方法：

# In[55]:


double_points = torch.zeros(10, 2).to(torch.double) 
short_points = torch.ones(10, 2).to(dtype=torch.short)


# 在底层，type和to执行相同的类型检查和转换(如果需要的话)操作，但是to方法可以接受额外的参数。
# 你总是可以用type方法把一种类型的张量转换成另一种类型的张量:

# In[57]:


points = torch.randn(10, 2) 
short_points = points.type(torch.short)


# ## 索引张量

# In[58]:


points = torch.tensor([[1.0, 4.0, 5.1], [2.0, 1.0, 3.2], [3.0, 5.0, 1.7]])
points


# In[60]:


points[0:3:2]


# In[61]:


points[1:, :]


# ## 与Numpy互操作

# 利用了Python的Buffer Protocol（https://docs.python.org/3/c-api/buffer.html）， 所以tensor 与 numpy 具有零拷贝的互操作性。

# In[63]:


points = torch.ones(2, 3)
points_np = points.numpy()
points_np


# In[64]:


points = torch.from_numpy(points_np)
points


# ## 序列化张量

# ### pickle

# PyTorch在底层使用pickle来序列化张量对象，以及专门用于存储的序列化代码。这种技术允许您快速保存张量，以便您只想用PyTorch加载它们，但是文件格式本身不能互操作。除了PyTorch，你不能用其他软件读取张量。

# In[67]:


import os


# In[72]:


if not os.path.exists("./PyTorch_learn/data/"):
    os.makedirs("./PyTorch_learn/data/")


# In[73]:


torch.save(points, './PyTorch_learn/data/ourpoints.t')


# In[74]:


points = torch.load('./PyTorch_learn/data/ourpoints.t')
points


# ### HDF5

# HDF5是一种可移植的、广泛支持的表示序列化多格式的格式维数组，组织在嵌套的键值字典中。Python通过h5py library支持HDF5，它以NumPy的形式接受和返回数据数组。

# In[75]:


import h5py


# In[78]:


f =  h5py.File("./PyTorch_learn/data/ourpoints.hdf5", 'w')
dset = f.create_dataset('coords', data=points.numpy())
f.close()


# In[80]:


f = h5py.File('./PyTorch_learn/data/ourpoints.hdf5', 'r') 
dset = f['coords'] 
last_points = dset[1:]
last_points


# In[81]:


last_points = torch.from_numpy(dset[1:]) 
f.close()
last_points


# ## 张量API

# 在线文档 https://pytorch.org/docs/stable/index.html 它是详尽无遗的，并且组织合理，将张量操作分为几组。

# ## 练习

# + Create a tensor a from list(range(9)). Predict then check what the size, off- set, and strides are. 
# + Create a tensor b = a.view(3, 3). What is the value of b[1,1]? 
# + Create a tensor c = b[1:,1:]. Predict then check what the size, offset, and strides are. 
# + Pick a mathematical operation like cosine or square root. Can you find a corre- sponding function in the torch library? 
# + Is there a version of your function that operates in-place?

# In[87]:


a = torch.tensor(list(range(9)))
a


# In[88]:


a.size(), a.storage_offset(), a.stride()


# In[90]:


b = a.view(3, 3)
b, b[1, 1]


# In[93]:


b.size(), b.storage_offset(), b.stride()


# In[94]:


c = b[1:, 1:]
c


# In[95]:


c.size(), c.storage_offset(), c.stride()


# In[99]:


c = c.type(torch.float32)
c


# In[101]:


c.cos()


# In[102]:


c.sqrt()


# In[103]:


c.cos_()


# In[104]:


c


# ## 总结

# + 神经网络将浮点表示形式转换为其他浮点表示形式，起始和结束表示形式通常是人类可以解释的。 中间表示法则不是这样。
# + 这些浮点表示形式存储在张量中。
# + 张量是多维数组，是PyTorch中的基本数据结构。 PyTorch有一个全面的标准库，用于张量创建和处理以及数学运算。
# + 张量可以序列化到磁盘上并重新加载。
# + PyTorch中的所有张量操作都可以在CPU和GPU上执行，而无需更改代码。
# + PyTorch使用结尾的下划线来表示函数在张量上就地运行（例如Tensor.sqrt_）。