#!/usr/bin/env python # coding: utf-8 # # Backpropagation # In[43]: class MulLayer: def __init__(self): self.x = None self.y = None def forward(self, x, y): self.x = x self.y = y out = x * y return out def backward(self, din): dx = din * self.y dy = din * self.x return dx, dy class AddLayer: def __init__(self): pass def forward(self, x, y): out = x + y return out def backward(self, din): dx = din * 1 dy = din * 1 return dx, dy # In[44]: apple = 100 apple_num = 2 tax = 1.1 mul_apple_layer = MulLayer() mul_tax_layer = MulLayer() # forward apple_price = mul_apple_layer.forward(apple, apple_num) price = mul_tax_layer.forward(apple_price, tax) print("price:", int(price), end="\n\n") # backward dprice = 1 dapple_price, dtax = mul_tax_layer.backward(dprice) print("dapple_price:", dapple_price) print("dtax:", dtax, end="\n\n") dapple, dapple_num = mul_apple_layer.backward(dapple_price) print("dapple:", dapple) print("dapple_num:", int(dapple_num)) # ## Affine 계층 # In[110]: a = np.array([[1, 2, 3], [4, 5, 6]]) print(a.ndim, a.shape) print(a) print() print(np.sum(a)) print(np.sum(a, axis=0)) # Axis along which a sum is performed. print(np.sum(a, axis=1)) # In[119]: class Affine: def __init__(self, W, b): self.W = W self.b = b self.x = None self.dW = None self.db = None def forward(self, x): self.x = x out = np.dot(self.x, self.W) + self.b return out def backward(self, din): dx = np.dot(din, self.W.T) self.dW = np.dot(self.x.T, din) self.db = np.sum(din, axis=0) return dx # In[139]: x = np.array([[1, 2]]) W = np.array([[1, 2, 3], [4, 5, 6]]) b = np.array([7, 8, 9]) print("x.shape: {0}, W.shape: {1}, b.shape: {2}".format(x.shape, W.shape, b.shape)) print() affine = Affine(W, b) out = affine.forward(x) print("out: {0}, out.shape: {1}".format(out, out.shape)) print() din = np.ones_like(out) # [[1, 1, 1]] dx = affine.backward(din) print("dx.shape: {0}, dW.shape: {1}, db.shape: {2}".format(dx.shape, affine.dW.shape, affine.db.shape)) print("dx: {0}".format(dx)) print("affine.dW: \n{0}".format(affine.dW)) print("affine.db: {0}".format(affine.db)) # In[141]: x = np.array([[1, 2], [2, 4]]) W = np.array([[1, 2, 3], [4, 5, 6]]) b = np.array([7, 8, 9]) print("x.shape: {0}, W.shape: {1}, b.shape: {2}".format(x.shape, W.shape, b.shape)) print() affine = Affine(W, b) out = affine.forward(x) print("out: \n{0}\nout.shape: {1}".format(out, out.shape)) print() din = np.ones_like(out) dx = affine.backward(din) print("dx.shape: {0}, dW.shape: {1}, db.shape: {2}".format(dx.shape, affine.dW.shape, affine.db.shape)) print("dx: {0}".format(dx)) print("affine.dW: \n{0}".format(affine.dW)) print("affine.db: {0}".format(affine.db)) # In[100]: import numpy as np def softmax(x): if x.ndim == 2: x = x.T x = x - np.max(x, axis=0) y = np.exp(x) / np.sum(np.exp(x), axis=0) return y.T x = x - np.max(x) return np.exp(x) / np.sum(np.exp(x)) def cross_entropy_error(y, t): #print(y.shape, t.shape) if y.ndim == 1: y = y.reshape(1, y.size) t = t.reshape(1, t.size) if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size class Relu: def __init__(self): self.mask = None def forward(self, x): self.mask = (x <= 0) out = x.copy() out[self.mask] = 0 return out def backward(self, din): din[self.mask] = 0 dx = din return dx class Sigmoid: def __init__(self): self.out = None def forward(self, x): out = sigmoid(x) self.out = out return out def backward(self, din): dx = din * self.out * (1.0 - self.out) return dx class SoftmaxWithLoss: def __init__(self): self.loss = None self.y = None self.t = None def forward(self, x, t): self.t = t self.y = softmax(x) self.loss = cross_entropy_error(self.y, self.t) return self.loss def backward(self, din=1): batch_size = self.t.shape[0] dx = (self.y - self.t) / float(batch_size) return dx