t=g(y)

### 24.2.1 加法节点的反向传播

```class AddLayer:
def __init__(self):
pass
def forward(self,x,y):
return x+y
def backward(self,dout):
dx=dout*1
dy=dout*1
return dx,dy```

### 24.2.2 乘法法节点的反向传播

```class MultiLayer:
def __init__(self):
self.x=None
self.y=None
def forward(self,x,y):
self.x=x
self.y=y
return x*y
def backward(self,dout):
dx=dout*self.y
dy=dout*self.x
return dx,dy```

### 24.3.1 ReLU激活函数

```class ReLu:
def __init__(self):

def forward(self,x):
out=x.copy()
#使不大于0的设置为0，其它不变。
return out
def backward(self,dout):
dx=dout
return dx```

### 24.3.2 Sigmoid激活函数

```class sigmoid:
def __init__(self):
self.out=None
def forward(self,x):
out=1/(1+np.exp(-x))
self.out=out
return out
def backward(self,dout):
dx=dout*(1.0-self.out)*self.out
return dx```

### 24.4 Affine/softmax层的反向传播

DY=np.array([[1,2,3],[4,5,6]])

dB=np.sum(DY,axis=0)

Affine层的代码实现

```class Affine:
def __init__(self,W,B):
self.W=W
self.B=B
self.X=None
self.dW=None
self.dB=None
def forward(self,X):
self.X=X
out=np.dot(X,self.W)+self.B
return out
def backward(self,dout):
dX=np.dot(dout,self.W.T)
self.dW=np.dot(self.X.T,dout)
self.dB=np.sum(dout,axis=0)
return dx```

### 24.4.1 Softmax-with loss 层

-hot后为[0,0,0,0,1,0,0,0,0]，得分或概率最大。

```class softmaxwithloss:
def __init__(self):
self.loss=None
self.y=None
self.t=None
def forward(self,x,t):
self.y=softmax(x)
self.t=t
self.loss=cross_entropy_error(self.y,self.t)
return self.loss
def backward(self,dout=1):
#如果是批处理，需要除以批量数据
batch_size=self.t.shape[0]
dx=(self.y-self.t)/batch_size
return dx```

### 24.5.2 神经网络学习的反向传播法的实现

1）概述

2）定义各层类

①softmax 函数及Sigmoid类

```def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T

x = x - np.max(x) #防止出现溢出情况
return np.exp(x) / np.sum(np.exp(x))

class Sigmoid:
def __init__(self):
self.out=None

def forward(self,x):
self.out=1 / (1 + np.exp(-x))
return self.out

def backward(self,dout):
dx=dout*(1.0 -self.out) * self.out
return dx

class Relu:
def __init__(self):

def forward(self, x):
out = x.copy()

return out

def backward(self, dout):
dx = dout

return dx```

②Affine类或称为sumweigt

```class Affine:
def __init__(self, W, b):
self.W =W
self.b = b

self.x = None
self.original_x_shape = None
# 权重和偏置参数的导数
self.dW = None
self.db = None

def forward(self, x):
# 对应张量
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x

out = np.dot(self.x, self.W) + self.b

return out

def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.x.T, dout)
self.db = np.sum(dout, axis=0)

dx = dx.reshape(*self.original_x_shape)  # 还原输入数据的形状（对应张量）
return dx```

③最后一层

```class SoftmaxWithLoss:
def __init__(self):
self.loss = None
self.y = None # softmax的输出
self.t = None # 监督数据

def forward(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)

return self.loss

def backward(self, dout=1):
batch_size = self.t.shape[0]
if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况
dx = (self.y - self.t) / batch_size
else:
dx = self.y.copy()
dx[np.arange(batch_size), self.t] -= 1
dx = dx / batch_size

return dx```

3）定义损失函数

```def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)

# 如果t为one-hot格式，把它转换为数字格式
if t.size == y.size:
t = t.argmax(axis=1)

batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-8)) / batch_size```

4）定义神经网络类

```import numpy as np
from collections import OrderedDict

class TwoLayerNet:

def __init__(self, input_size, hidden_size,output_size, weight_init_std = 0.01):
# 初始化权重
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)

# 生成层
self.layers = OrderedDict()
self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
#self.layers['Sigmoid1'] = Sigmoid()
self.layers['Relu1'] = Relu()
self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

self.lastLayer = SoftmaxWithLoss()

def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)

return x

# x:输入数据, t:监督数据
def loss(self, x, t):
y = self.predict(x)
return self.lastLayer.forward(y, t)

def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
#print("预测值",y[0],y.shape)

if t.ndim != 1:
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy

# x:输入数据, t:监督数据
loss_W = lambda W: self.loss(x, t)

# forward
self.loss(x, t)

# backward
dout = 1
dout = self.lastLayer.backward(dout)

layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)

# 用一个字典记录各参数（权重和偏置）的梯度

5）使用误差反向传播法训练模型

```import numpy as np

import matplotlib.pyplot as plt

# 读入数据

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 20000  # 适当设定循环的次数

train_size = x_train.shape[0]

batch_size = 100

learning_rate = 0.1

train_loss_list = []

train_acc_list = []

test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):

# 计算梯度

for key in ('W1', 'b1', 'W2', 'b2'):

loss = network.loss(x_batch, t_batch)

train_loss_list.append(loss)

if i % iter_per_epoch == 0:
# 更新
if i%5000==0:
learning_rate*=0.9

# 更新参数
print(learning_rate)

train_acc = network.accuracy(x_train, t_train)

test_acc = network.accuracy(x_test, t_test)

train_acc_list.append(train_acc)

test_acc_list.append(test_acc)

print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

# 绘制图形

markers = {'train': 'o', 'test': 's'}

x = np.arange(len(train_acc_list))

plt.plot(x, train_acc_list, label='train acc')

plt.plot(x, test_acc_list, label='test acc', linestyle='--')

plt.xlabel("epochs")

plt.ylabel("accuracy")

plt.ylim(0, 1.0)

plt.legend(loc='lower right')

plt.show()```

.09000000000000001

train acc, test acc | 0.9837833333333333, 0.9724

0.09000000000000001

train acc, test acc | 0.9842666666666666, 0.9722

0.08100000000000002

train acc, test acc | 0.98475, 0.9716

0.08100000000000002

train acc, test acc | 0.9853166666666666, 0.9733

0.08100000000000002

train acc, test acc | 0.9859666666666667, 0.9726

0.08100000000000002

train acc, test acc | 0.9861166666666666, 0.9707

0.08100000000000002

train acc, test acc | 0.9873, 0.9737

0.08100000000000002

train acc, test acc | 0.9873833333333333, 0.9744

0.08100000000000002

train acc, test acc | 0.9881, 0.973

0.08100000000000002

train acc, test acc | 0.9886666666666667, 0.9747

0.08100000000000002

train acc, test acc | 0.9888833333333333, 0.9743

6）利用各种算法对MNIST数据集的影响

```def smooth_curve(x):
"""用于使损失函数的图形变圆滑
参考：http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html
"""
window_len = 11
s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
w = np.kaiser(window_len, 2)
y = np.convolve(w/w.sum(), s, mode='valid')
return y[5:len(y)-5]

import matplotlib.pyplot as plt
# 0:读入MNIST数据==========
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000

# 1:进行实验的设置==========
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
#optimizers['RMSprop'] = RMSprop()

networks = {}
train_loss = {}
for key in optimizers.keys():
networks[key] = TwoLayerNet(
input_size=784, hidden_size=100,
output_size=10)
train_loss[key] = []

# 2:开始训练==========
for i in range(max_iterations):

for key in optimizers.keys():

loss = networks[key].loss(x_batch, t_batch)
train_loss[key].append(loss)

if i % 100 == 0:
print( "===========" + "iteration:" + str(i) + "===========")
for key in optimizers.keys():
loss = networks[key].loss(x_batch, t_batch)
print(key + ":" + str(loss))

# 3.绘制图形==========