#!/usr/bin/env python
# coding: utf-8

# In[1]:


import warnings
warnings.simplefilter(action='ignore')


# In[2]:


import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


# ### 1. 准备 CIFAR-10 数据

# In[3]:


(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()


# In[4]:


print('x_train:', x_train.shape)
print('y_train:', y_train.shape)
print('x_test:', x_test.shape)
print('y_test:', y_test.shape)


# In[5]:


x_train_normalize = x_train.astype('float32') / 255.0
x_test_normalize = x_test.astype('float32') / 255.0


# In[6]:


y_train_one_hot = tf.keras.utils.to_categorical(y_train)
y_test_one_hot = tf.keras.utils.to_categorical(y_test)


# In[7]:


y_test_one_hot.shape


# ### 2. 建立模型

# #### 2.1 建立 Sequential 模型

# In[8]:


model = tf.keras.models.Sequential([
    
    # 卷积层1与池化层1
    tf.keras.layers.Conv2D(input_shape=(32, 32, 3), filters=32, kernel_size=(3, 3), padding='same', activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    
    # 卷积层2与池化层2
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    
    # 平坦层
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.25),
    
    # 隐藏层(128个神经元)
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    
    # 输出层
    tf.keras.layers.Dense(10, activation='softmax')
])


# #### 2.2 查看模型的摘要

# In[9]:


print(model.summary())


# ### 3. 加载之前训练的模型

# In[10]:


try:
    model.load_weights('save_model/model_cifar10_cnn.h5')
    print('加载模型成功! 继续训练模型.')
except:
    print('加载模型失败, 开始训练一个新模型')


# ### 4. 训练模型

# In[11]:


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# In[12]:


train_history = model.fit(x=x_train_normalize, y=y_train_one_hot, validation_split=0.2,
                         epochs=10, batch_size=128, verbose=2)


# ### 5. 以图形显示训练过程

# In[13]:


def show_train_history(train_history, train, validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.xlabel('Epoch')
    plt.ylabel(train)
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()


# In[14]:


show_train_history(train_history, 'acc', 'val_acc')


# In[15]:


show_train_history(train_history, 'loss', 'val_loss')


# ### 6. 评估模型的准确率

# In[16]:


scores = model.evaluate(x_test_normalize, y_test_one_hot)
print()
print('accuracy:', scores[1])


# ### 7. 进行预测

# #### 7.1 执行预测

# In[17]:


predictions = model.predict_classes(x_test_normalize)


# #### 7.2 预测结果

# In[18]:


predictions[:10]


# #### 7.3 定义函数以显示10项预测结果

# In[19]:


label_dict = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'
}


# In[20]:


def plot_images_labels_prediction(images, labels, predictions, idx, num=10):
    """
    images: 图像数组
    labels: 真实值数组, 其实每个元素的数字代表一种图像类别的名称
    predictions: 预测结果数据
    idx: 开始显示的数据index
    num: 要显示的数据项数, 默认为10, 不超过25
    """
    fig = plt.gcf()
    fig.set_size_inches(12, 14)
    if num > 25:
        num = 25
    for i in range(0, num):
        ax = plt.subplot(5, 5, i+1)
        ax.imshow(images[idx], cmap='binary')
        title = str(labels[idx][0]) + ',' + label_dict[labels[idx][0]]
        if len(predictions) > 0:
            title += '=>' + label_dict[predictions[idx]]
        ax.set_title(title, fontsize=10)
        ax.set_xticks([])
        ax.set_yticks([])
        idx += 1
    plt.show()


# In[21]:


plot_images_labels_prediction(x_test, y_test, predictions, idx=0, num=10)


# ### 8. 查看预测概率

# #### 8.1 使用测试数据进行预测

# In[22]:


predicted_probability = model.predict(x_test_normalize)


# #### 8.2 建立 show_predicted_probability 函数以相看预测概率

# In[23]:


def show_predicted_probability(x, y, predictions, predicted_probability, idx):
    print('label:', label_dict[y[idx][0]])
    print('predict:', label_dict[predictions[idx]])
    plt.figure(figsize=(2, 2))
    plt.imshow(x[idx].reshape((32, 32, 3)))
    plt.show()
    for j in range(10):
        print(label_dict[j], ':', predicted_probability[idx][j])


# #### 8.3 查看第0项数据预测的概率

# In[24]:


show_predicted_probability(x_test, y_test, predictions, predicted_probability, 0)


# #### 8.4 查看第3项数据预测的概率

# In[25]:


show_predicted_probability(x_test, y_test, predictions, predicted_probability, 3)


# ### 9. 显示混淆矩阵

# #### 9.1 查看预测结果的形状

# In[26]:


print('predictons shape:', predictions.shape)


# #### 9.2 查看测试 label 真实值的形状

# In[27]:


print('y_test shape:', y_test.shape)
print(y_test)


# #### 9.3 将测试 label 真实值转换为一维数据

# In[28]:


y_test.reshape(-1)


# #### 9.4 建立混淆矩阵

# In[29]:


pd.crosstab(y_test.reshape(-1), predictions, rownames=['label'], colnames=['predict'])


# In[30]:


print(label_dict)


# ### 10. 保存模型

# #### 10.1 保存为 json

# In[31]:


model_json = model.to_json()
with open('save_model/model_cifar10_cnn.json', 'w') as json_file:
    json_file.write(model_json)


# #### 10.2 保存为 yaml

# In[32]:


model_yaml = model.to_yaml()
with open('save_model/model_cifar10_cnn.yaml', 'w') as yaml_file:
    yaml_file.write(model_yaml)


# #### 10.3 保存为 h5

# In[33]:


model.save_weights('save_model/model_cifar10_cnn.h5', save_format='h5')
print('模型已保存到:', 'save_model/model_cifar10_cnn.h5')