BP（Back Propagation）是深度学习神经网络的理论核心，本文通过两个例子展示手动推导BP的过程。

1

1. 一元方程

z对x的微分可以表示如下：

2.

2

````import oneflow as of`
`import oneflow.nn as nn`
`import oneflow.optim as optim`
```
```
`class Sample(nn.Module):`
`    def __init__(self):`
`        super(Sample, self).__init__()`
`        self.w1 = of.tensor(10.0, dtype=of.float, requires_grad=True)`
`        self.b1 = of.tensor(1.0, dtype=of.float, requires_grad=True)`
`        self.w2 = of.tensor(20.0, dtype=of.float, requires_grad=True)`
`        self.loss = nn.MSELoss()`
```
```
`    def parameters(self):`
`        return [self.w1, self.b1, self.w2]`
```
```
`    def forward(self, x, label):`
`        y1 = self.w1 * x + self.b1`
`        y2 = y1 * self.w2`
`        y3 = 2 * y2`
`        return self.loss(y3, label)`
```
```
`model = Sample()`
```
```
`optimizer = optim.SGD(model.parameters(), lr=0.005)`
`data = of.tensor(1.0, dtype=of.float)`
`label = of.tensor(500.0, dtype=of.float)`
```
```
`loss = model(data, label)`
`print("------------before backward()---------------")`
`print("w1 =", model.w1)`
`print("b1 =", model.b1)`
`print("w2 =", model.w2)`
`print("w1.grad =", model.w1.grad)`
`print("b1.grad =", model.b1.grad)`
`print("w2.grad =", model.w2.grad)`
`loss.backward()`
`print("------------after backward()---------------")`
`print("w1 =", model.w1)`
`print("b1 =", model.b1)`
`print("w2 =", model.w2)`
`print("w1.grad =", model.w1.grad)`
`print("b1.grad =", model.b1.grad)`
`print("w2.grad =", model.w2.grad)`
`optimizer.step()`
`print("------------after step()---------------")`
`print("w1 =", model.w1)`
`print("b1 =", model.b1)`
`print("w2 =", model.w2)`
`print("w1.grad =", model.w1.grad)`
`print("b1.grad =", model.b1.grad)`
`print("w2.grad =", model.w2.grad)`
`optimizer.zero_grad()`
`print("------------after zero_grad()---------------")`
`print("w1 =", model.w1)`
`print("b1 =", model.b1)`
`print("w2 =", model.w2)`
`print("w1.grad =", model.w1.grad)`
`print("b1.grad =", model.b1.grad)`
`print("w2.grad =", model.w2.grad)````

````------------before backward()---------------`
`w1 = tensor(10., requires_grad=True)`
`b1 = tensor(1., requires_grad=True)`
`w2 = tensor(20., requires_grad=True)`
`w1.grad = None`
`b1.grad = None`
`w2.grad = None`
`------------after backward()---------------`
`w1 = tensor(10., requires_grad=True)`
`b1 = tensor(1., requires_grad=True)`
`w2 = tensor(20., requires_grad=True)`
`w1.grad = tensor(-4800.)`
`b1.grad = tensor(-4800.)`
`w2.grad = tensor(-2640.)`
`------------after step()---------------`
`w1 = tensor(34., requires_grad=True)`
`b1 = tensor(25., requires_grad=True)`
`w2 = tensor(33.2000, requires_grad=True)`
`w1.grad = tensor(-4800.)`
`b1.grad = tensor(-4800.)`
`w2.grad = tensor(-2640.)`
`------------after zero_grad()---------------`
`w1 = tensor(34., requires_grad=True)`
`b1 = tensor(25., requires_grad=True)`
`w2 = tensor(33.2000, requires_grad=True)`
`w1.grad = tensor(0.)`
`b1.grad = tensor(0.)`
`w2.grad = tensor(0.)````

3

````import oneflow as of`
`import oneflow.nn as nn`
`import oneflow.optim as optim`
```
```
`class Sample(nn.Module):`
`    def __init__(self):`
`        super(Sample, self).__init__()`
`        self.op1 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2,2), bias=False)`
`        self.op2 = nn.AvgPool2d(kernel_size=(2,2))`
`        self.loss = nn.MSELoss()`
```
```
`    def forward(self, x, label):`
`        y1 = self.op1(x)`
`        y2 = self.op2(y1)`
`        return self.loss(y2, label)`
```
```
`model = Sample()`
```
```
`optimizer = optim.SGD(model.parameters(), lr=0.005)`
`data = of.randn(1, 1, 3, 3)`
`label = of.randn(1, 1, 1, 1)`
```
```
`loss = model(data, label)`
`print("------------before backward()---------------")`
`param = model.parameters()`
`print("w =", next(param))`
`loss.backward()`
`print("------------after backward()---------------")`
`param = model.parameters()`
`print("w =", next(param))`
`optimizer.step()`
`print("------------after step()---------------")`
`param = model.parameters()`
`print("w =", next(param))`
`optimizer.zero_grad()`
`print("------------after zero_grad()---------------")`
`param = model.parameters()`
`print("w =", next(param))````

````------------before backward()---------------`
`w = tensor([[[[ 0.2621, -0.2583],`
`          [-0.1751, -0.0839]]]], dtype=oneflow.float32, grad_fn=<accumulate_grad>)`
`------------after backward()---------------`
`w = tensor([[[[ 0.2621, -0.2583],`
`          [-0.1751, -0.0839]]]], dtype=oneflow.float32, grad_fn=<accumulate_grad>)`
`------------after step()---------------`
`w = tensor([[[[ 0.2587, -0.2642],`
`          [-0.1831, -0.0884]]]], dtype=oneflow.float32, grad_fn=<accumulate_grad>)`
`------------after zero_grad()---------------`
`w = tensor([[[[ 0.2587, -0.2642],`
`          [-0.1831, -0.0884]]]], dtype=oneflow.float32, grad_fn=<accumulate_grad>)````

1.http://speech.ee.ntu.edu.tw/~tlkagk/courses.html

2.https://speech.ee.ntu.edu.tw/~hylee/index.php