#!/usr/bin/env python # coding: utf-8 # In[1]: import warnings warnings.simplefilter(action='ignore') # In[2]: import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import pandas as pd # ### 1. 数据预处理 # In[3]: (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # In[4]: x_train_flatten = x_train.reshape(x_train.shape[0], 784).astype('float32') x_test_flatten = x_test.reshape(x_test.shape[0], 784).astype('float32') # In[5]: x_train_normalize = x_train_flatten / 255.0 x_test_normalize = x_test_flatten / 255.0 # In[6]: y_train_one_hot = tf.keras.utils.to_categorical(y_train) y_test_one_hot = tf.keras.utils.to_categorical(y_test) # ### 2. 建立模型 # #### 2.1 建立 Sequential 模型 # In[7]: model = tf.keras.models.Sequential([ tf.keras.layers.Dense(units=1000, input_dim=784, kernel_initializer='normal', activation='relu'), # 输入层-隐藏层(这里隐藏层为1000个神经元) tf.keras.layers.Dropout(0.5), # 添加 Dropout 层 tf.keras.layers.Dense(units=10, kernel_initializer='normal', activation='softmax') # 输出层 ]) # #### 2.2 查看模型的摘要 # In[8]: print(model.summary()) # ### 3. 训练模型 # In[9]: model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # In[10]: train_history = model.fit(x=x_train_normalize, y=y_train_one_hot, validation_split=0.2, epochs=10, batch_size=200, verbose=2) # ### 4. 以图形显示训练过程 # In[11]: def show_train_history(train_history, train, validation): plt.plot(train_history.history[train]) plt.plot(train_history.history[validation]) plt.title('Train History') plt.xlabel('Epoch') plt.ylabel(train) plt.legend(['train', 'validation'], loc='upper left') plt.show() # In[12]: show_train_history(train_history, 'acc', 'val_acc') # In[13]: show_train_history(train_history, 'loss', 'val_loss') # ### 5. 评估模型的准确率 # In[14]: scores = model.evaluate(x_test_normalize, y_test_one_hot) print() print('accuracy:', scores[1]) # ### 6. 进行预测 # #### 6.1 执行预测 # In[15]: predictions = model.predict_classes(x_test_normalize) # #### 6.2 预测结果 # In[16]: predictions # #### 6.3 定义函数以显示10项预测结果 # In[17]: def plot_images_labels_prediction(images, labels, predictions, idx, num=10): """ images: 数字图像数组 labels: 真实值数组 predictions: 预测结果数据 idx: 开始显示的数据index num: 要显示的数据项数, 默认为10, 不超过25 """ fig = plt.gcf() fig.set_size_inches(12, 14) if num > 25: num = 25 for i in range(0, num): ax = plt.subplot(5, 5, i+1) ax.imshow(images[idx], cmap='binary') title = 'lable=' + str(labels[idx]) if len(predictions) > 0: title += ',predict=' + str(predictions[idx]) ax.set_title(title, fontsize=10) ax.set_xticks([]) ax.set_yticks([]) idx += 1 plt.show() # In[18]: plot_images_labels_prediction(x_test, y_test, predictions, idx=0, num=10) # ### 7. 显示混淆矩阵 # #### 7.1 建立混淆矩阵 # In[19]: pd.crosstab(y_test, predictions, rownames=['label'], colnames=['predict']) # #### 7.2 建立真实值与预测 DataFrame # In[20]: df = pd.DataFrame({'label': y_test, 'predict': predictions}) df[:2] # #### 7.3 查询真实值是 "5" 但预测值是 "2" 的数据 # In[21]: df[(df.label==5)&(df.predict==2)] # In[22]: plot_images_labels_prediction(x_test, y_test, predictions, idx=340, num=1) # In[23]: plot_images_labels_prediction(x_test, y_test, predictions, idx=1289, num=1)