#!/usr/bin/env python # coding: utf-8 # In[1]: from IPython.core.display import HTML HTML(""" """) # # Классификация рукописных цифр с помощью TensorFlow # ![](images/mnist.jpeg) # # Классическая задача распознавания рукописных цифр на основе набора данных [MNIST](http://yann.lecun.com/exdb/mnist/) # ## Постановка задачи # # - 60_000 изображений с цифрами # - Отцентрированы # - Черно-белые # - Размер 28x28 пикселя # # Простейшая однослойная нейросеть # ![](images/one_layer.png) # Изображение цифры, размером 28x28 уплощается в вектор размера 784 пикселей, который подается на вход однослойной нейросети. Каждый нейрон по отдельности проивзодит взвешенную сумму всех входных сигналов, добавляет некоторый bias-порог и пропускает результат через некоторую функцию активации. # ![](images/perceptron_schematic.png) # ### Функция перцептрона # # $$Y = g(X.W + b) $$ # # $$sigmoid(z_n) = \frac{1}{1 + e^{z_n}} $$ # # $$softmax(z_n) = \frac{e^{z_n}}{||{e^z}||} $$ # ![](images/activation_functions.png) # Так как функции активации по определению возвращают числа от 0 до 1, то мы получаем, что фактически наш перцептрон возвращает вероятности принадлежности к классу. # ![](images/eval_single_layer.png) # ![](images/softmax-regression-vectorequation.png) # ### Тренировка сети # # $$H_{y'}(y) = - \sum{y'_i log(y_i)}$$ # # y - оригинальный вектор # # y' - предсказанный вектор # ![](images/cross_entropy.png) # ## Практика 0 # # - импортировать TF # - переменные, константы, подстановки в TF # - запуск вычислений # In[2]: import tensorflow as tf tf.set_random_seed(0) # Создание констант # In[3]: node1 = tf.constant(3.0) node2 = tf.constant(4.0) print(node1, node2) # Запуск вычислений # In[4]: sess = tf.InteractiveSession() print(sess.run([node1, node2])) # In[5]: node3 = tf.add(node1, node2) print("node3:", node3) print("sess.run(node3):", sess.run(node3)) # Создание подстановок # In[6]: a = tf.placeholder(tf.float32) b = tf.placeholder(tf.float32) adder_node = a + b # переопределенный оператор для tf.add # In[7]: print(sess.run(adder_node, {a: 3, b: 4.5})) print(sess.run(adder_node, {a: [1, 3], b: [2, 4]})) # Создание переменных # In[8]: W = tf.Variable([.3], dtype=tf.float32) b = tf.Variable([-.3], dtype=tf.float32) x = tf.placeholder(tf.float32) linear_model = W * x + b print(linear_model) # In[9]: init = tf.global_variables_initializer() sess.run(init) print(sess.run(linear_model, {x: [1, 2, 3, 4]})) # In[10]: y = tf.placeholder(tf.float32) squared_deltas = tf.square(linear_model - y) loss = tf.reduce_sum(squared_deltas) # функция потерь - или ошибки print(sess.run(loss, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]})) # In[11]: # Поправим вручную потери линейного алгоритма. fixW = tf.assign(W, [-1.]) fixb = tf.assign(b, [1.]) sess.run([fixW, fixb]) print(sess.run(loss, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]})) # # Практика 1 # # - импортировать TF # - загрузить dataset # - описать структуру сети # - запустить обучение # # Формулы для реализации # # $$Y = softmax(X.W + b) $$ # # $$softmax(z_n) = \frac{e^{z_n}}{||{e^z}||} $$ # # $$H_{y'}(y) = - \sum{y'_i log(y_i)}$$ # In[12]: # reset jupyter kernell to avoid variables duplications get_ipython().run_line_magic('reset', '-sf') import tensorflow as tf tf.set_random_seed(0) tf.reset_default_graph() # In[13]: with tf.name_scope('X'): X = tf.placeholder(tf.float32, [None, 784], name='X') x_image = tf.reshape(X, [-1, 28, 28, 1]) with tf.name_scope('weights'): W = tf.Variable(tf.zeros([784, 10]), name='weights') with tf.name_scope('biases'): b = tf.Variable(tf.zeros([10]), name='biases') tf.summary.image('input', x_image, 3) tf.summary.histogram("weights", W) tf.summary.histogram("biases", b) # In[14]: with tf.name_scope('Wx_plus_b'): # Модель Y = X.W + b Y = tf.nn.softmax(tf.matmul(tf.reshape(X, [-1, 784]), W) + b, name='labels') # Подстановка для корректных значений входных данных Y_ = tf.placeholder(tf.float32, [None, 10], name='Y_') with tf.name_scope('total'): # Функция потерь H = Sum(Y_ * log(Y)) cross_entropy = -tf.reduce_sum(Y_ * tf.log(Y), name='xentropy') with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): # Доля верных ответов найденных в наборе is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32), name='xentropy_mean') tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('cross_entropy', cross_entropy) # In[15]: with tf.name_scope('train'): # Оптимизируем функцию потерь меотодом градиентного спуска # 0.003 - это шаг градиента, гиперпараметр optimizer = tf.train.GradientDescentOptimizer(0.003) # Минимизируем потери train_step = optimizer.minimize(cross_entropy) # In[16]: # Загружаем тренировочный датасет MNIST from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=True) # In[17]: def save_summary(sess, mnist, writer, merged, accuracy, i): if i % 10 == 0: test_data={X: mnist.test.images, Y_: mnist.test.labels} summary, a = sess.run([merged, accuracy], feed_dict=test_data) writer.add_summary(summary, i) if i % 200 == 0: print("Test: {}".format(a)) # In[18]: with tf.Session() as sess: merged = tf.summary.merge_all() # Merge all the summaries and write them out to writer = tf.summary.FileWriter("/tmp/tensorflow/one_layer", sess.graph) tf.global_variables_initializer().run() for i in range(1000): # загружаем набор изображений и меток классов batch_X, batch_Y = mnist.train.next_batch(100) train_data={X: batch_X, Y_: batch_Y} # train sess.run(train_step, feed_dict=train_data) save_summary(sess, mnist, writer, merged, accuracy, i) writer.close() # In[19]: get_ipython().system('ls /tmp/tensorflow/') # In[20]: def single_layer(mnist): X = tf.placeholder(tf.float32, [None, 784], name='X') W = tf.Variable(tf.zeros([784, 10]), name='weights') b = tf.Variable(tf.zeros([10]), name='biases') Y = tf.nn.softmax(tf.matmul(tf.reshape(X, [-1, 784]), W) + b, name='labels') Y_ = tf.placeholder(tf.float32, [None, 10], name='Y_') cross_entropy = -tf.reduce_sum(Y_ * tf.log(Y), name='xentropy') is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1)) accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32), name='xentropy_mean') optimizer = tf.train.GradientDescentOptimizer(0.003) train_step = optimizer.minimize(cross_entropy) with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(1000): batch_X, batch_Y = mnist.train.next_batch(100) train_data={X: batch_X, Y_: batch_Y} # train sess.run(train_step, feed_dict=train_data) if i % 100 == 0: test_data={X: mnist.test.images, Y_: mnist.test.labels} a, c = sess.run([accuracy, cross_entropy], feed_dict=test_data) print("Test: {}, {}".format(a, c)) # In[21]: single_layer(mnist) # # 0.91 # # ![](images/tiny_success.jpg) # ![](images/go_deeper.jpg) # # Практика 2 # # - Многослойная нейросеть # - проблема переобучения # - регуляризация # # Многослойная сеть # # ![](images/deep_neural_network.png) # ![](images/deep_mnist.png) # $$sigmoid(z_n) = \frac{1}{1 + e^{z_n}} $$ # # ![](images/sigmoid.png) # In[51]: # reset jupyter kernell to avoid variables duplications get_ipython().run_line_magic('reset', '-sf') import math import tensorflow as tf tf.set_random_seed(0) tf.reset_default_graph() # Загружаем тренировочный датасет MNIST from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=True) # In[63]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt def draw(a, c): plt.figure(1, (15, 5)) plt.subplot(121) plt.title('accuracy') plt.plot(a) plt.subplot(122) plt.title('cross_entropy') plt.plot(c) plt.show() # In[24]: # Объявим размеры слоёв layer_sizes = [28*28, 200, 100, 60, 30, 10] W1 = tf.Variable(tf.truncated_normal([layer_sizes[0], layer_sizes[1]], stddev=0.1)) B1 = tf.Variable(tf.zeros(layer_sizes[1])) # 200 W2 = tf.Variable(tf.truncated_normal([layer_sizes[1], layer_sizes[2]], stddev=0.1)) B2 = tf.Variable(tf.zeros(layer_sizes[2])) # 100 W3 = tf.Variable(tf.truncated_normal([layer_sizes[2], layer_sizes[3]], stddev=0.1)) B3 = tf.Variable(tf.zeros(layer_sizes[3])) # 60 W4 = tf.Variable(tf.truncated_normal([layer_sizes[3], layer_sizes[4]], stddev=0.1)) B4 = tf.Variable(tf.zeros(layer_sizes[4])) # 30 W5 = tf.Variable(tf.truncated_normal([layer_sizes[4], layer_sizes[5]], stddev=0.1)) B5 = tf.Variable(tf.zeros(layer_sizes[5])) # 10 # In[25]: X = tf.placeholder(tf.float32, [None, 784]) Y1 = tf.nn.sigmoid(tf.matmul(X, W1) + B1) Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + B2) Y3 = tf.nn.sigmoid(tf.matmul(Y2, W3) + B3) Y4 = tf.nn.sigmoid(tf.matmul(Y3, W4) + B4) Ylogits = tf.matmul(Y4, W5) + B5 Y = tf.nn.softmax(Ylogits) Y_ = tf.placeholder(tf.float32, [None, 10]) # In[26]: def train_deep(X, Y, Ylogits, Y_, epoches=1000): cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_) cross_entropy = tf.reduce_mean(cross_entropy)*100 is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1)) accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32)) learning_rate=0.003 optimizer = tf.train.AdamOptimizer(learning_rate) train_step = optimizer.minimize(cross_entropy) accuracy_vis, cross_entropy_vis = [], [] with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(epoches): batch_X, batch_Y = mnist.train.next_batch(100) train_data={X: batch_X, Y_: batch_Y} # train sess.run(train_step, feed_dict=train_data) if i % 10 == 0: test_data={X: mnist.test.images, Y_: mnist.test.labels} a, c = sess.run([accuracy, cross_entropy], feed_dict=test_data) accuracy_vis.append(a) cross_entropy_vis.append(c) if i % 100 == 0: print("Test: {}, {}".format(a, c)) return (accuracy_vis, cross_entropy_vis) # In[27]: a, c = train_deep(X, Y, Ylogits, Y_) # In[28]: draw(a,c) # # 0.94??? # # ![](images/fry.png) # ### sigmoid + softmax + 5 layers # - Качество классификации: 0.94 # - Медленно сходится # - Есть резкие перепады при обучении # - Нужна другая функция активации # # Relu = Rectified Linear Unit # # ![](images/relu.png) # # ```python # tf.nn.relu(tf.matmul(X,W1)+B1) # ``` # In[29]: X = tf.placeholder(tf.float32, [None, 784]) Y1 = tf.nn.relu(tf.matmul(X, W1) + B1) Y2 = tf.nn.relu(tf.matmul(Y1, W2) + B2) Y3 = tf.nn.relu(tf.matmul(Y2, W3) + B3) Y4 = tf.nn.relu(tf.matmul(Y3, W4) + B4) Ylogits = tf.matmul(Y4, W5) + B5 Y = tf.nn.softmax(Ylogits) Y_ = tf.placeholder(tf.float32, [None, 10]) # In[30]: a, c = train_deep(X, Y, Ylogits, Y_) # In[31]: draw(a,c) # ### relu + softmax + 5 layers # # - качество классификации 0.97 # - быстро сходится # - относительно стабильно?? # ![](images/fry.png) # In[32]: a, c = train_deep(X, Y, Ylogits, Y_, 5000) # # Шумы в кривой точности # In[33]: draw(a[50:], c[50:]) # # Learning rate decay # # - замедлять скорость обучения по ходу обучения # # ``` # max_learning_rate = 0.003 # min_learning_rate = 0.0001 # decay_speed = 2000.0 # learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed) # ``` # In[60]: def train_lr_decay(X, Y, Ylogits, Y_, mnist, epoches=1000, verbose=100): cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_) cross_entropy = tf.reduce_mean(cross_entropy)*100 is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1)) accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32)) max_learning_rate = 0.003 min_learning_rate = 0.0001 decay_speed = 2000.0 learning_rate = tf.placeholder(tf.float32) optimizer = tf.train.AdamOptimizer(learning_rate) train_step = optimizer.minimize(cross_entropy) accuracy_vis, cross_entropy_vis = [], [] with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(epoches): batch_X, batch_Y = mnist.train.next_batch(100) lr = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed) train_data={X: batch_X, Y_: batch_Y, learning_rate: lr} # train sess.run(train_step, feed_dict=train_data) if i % 10 == 0: test_data={X: mnist.test.images, Y_: mnist.test.labels} a, c = sess.run([accuracy, cross_entropy], feed_dict=test_data) accuracy_vis.append(a) cross_entropy_vis.append(c) if i % verbose == 0: print("Test: {}, {}".format(a, c)) return (accuracy_vis, cross_entropy_vis) # In[35]: a, c = train_lr_decay(X, Y, Ylogits, Y_, mnist, 10000) # # Переобучение # In[36]: draw(a[50:], c[50:]) # # Drop Out # # ![](images/dropout.png) # ```python # pkeep = tf.placeholder(tf.float32) # Y1d = tf.nn.dropout(Y1, pkeep) # # train_data={X: batch_X, Y_: batch_Y, learning_rate: lr, pkeep: 0.75} # test_data={X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0} # ``` # In[37]: def train_lr_decay_dropout(X, Y, Ylogits, Y_, pkeep, mnist, epoches=1000): cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_) cross_entropy = tf.reduce_mean(cross_entropy)*100 is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1)) accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32)) max_learning_rate = 0.003 min_learning_rate = 0.0001 decay_speed = 2000.0 learning_rate = tf.placeholder(tf.float32) optimizer = tf.train.AdamOptimizer(learning_rate) train_step = optimizer.minimize(cross_entropy) accuracy_vis, cross_entropy_vis = [], [] with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(epoches): batch_X, batch_Y = mnist.train.next_batch(100) lr = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed) train_data={X: batch_X, Y_: batch_Y, learning_rate: lr, pkeep: 0.75} # train sess.run(train_step, feed_dict=train_data) if i % 10 == 0: test_data={X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0} a, c = sess.run([accuracy, cross_entropy], feed_dict=test_data) accuracy_vis.append(a) cross_entropy_vis.append(c) if i % 500 == 0: print("Test: {}, {}".format(a, c)) return (accuracy_vis, cross_entropy_vis) # In[38]: pkeep = tf.placeholder(tf.float32) X = tf.placeholder(tf.float32, [None, 784]) Y1 = tf.nn.dropout(tf.nn.relu(tf.matmul(X, W1) + B1), pkeep) Y2 = tf.nn.dropout(tf.nn.relu(tf.matmul(Y1, W2) + B2), pkeep) Y3 = tf.nn.dropout(tf.nn.relu(tf.matmul(Y2, W3) + B3), pkeep) Y4 = tf.nn.dropout(tf.nn.relu(tf.matmul(Y3, W4) + B4), pkeep) Ylogits = tf.matmul(Y4, W5) + B5 Y = tf.nn.softmax(Ylogits) Y_ = tf.placeholder(tf.float32, [None, 10]) # In[39]: a, c = train_lr_decay_dropout(X, Y, Ylogits, Y_, pkeep, mnist, 10000) # In[40]: draw(a[50:], c[50:]) # # Резюме # # - 5 слоёв # - relu & softmax # - learning rate decay # - dropout # # 0.98 # # ![](images/good_success.jpg) # # Свёрточная нейросеть # ![](images/common_cnn_design.jpg) # ![](images/1.png) # ![](images/2.png) # ![](images/3.png) # ![](images/conv_layers.png) # In[54]: mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=False) # In[66]: X = tf.placeholder(tf.float32, [None, 28, 28, 1]) Y_ = tf.placeholder(tf.float32, [None, 10]) lr = tf.placeholder(tf.float32) K = 4 # first convolutional layer output depth L = 8 # second convolutional layer output depth M = 12 # third convolutional layer N = 200 # fully connected layer # In[70]: # сверточные слои W1 = tf.Variable(tf.truncated_normal([5, 5, 1, K], stddev=0.1)) B1 = tf.Variable(tf.ones([K])/10) W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1)) B2 = tf.Variable(tf.ones([L])/10) W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1)) B3 = tf.Variable(tf.ones([M])/10) # In[71]: # полносвязный слой W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1)) B4 = tf.Variable(tf.ones([N])/10) W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1)) B5 = tf.Variable(tf.ones([10])/10) # In[72]: stride = 1 # output is 28x28 Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME') + B1) stride = 2 # output is 14x14 Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2) stride = 2 # output is 7x7 Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3) # меняем размерность выхода из последнего сверточного слоая в полносвязный слой YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M]) Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4) Ylogits = tf.matmul(Y4, W5) + B5 Y = tf.nn.softmax(Ylogits) # In[61]: a, c = train_lr_decay(X, Y, Ylogits, Y_, mnist, epoches=1000, verbose=100) # In[65]: draw(a, c) # # 99.2 # # ![](images/like_a_boss.jpg)