Press "Enter" to skip to content

```import d2lzh as d2l
from mxnet import gluon,init,nd
from mxnet.gluon import nn
#残差块
#已在d2lzh包有定义
class Residual(nn.Block):
def __init__(self,num_channels,use_1x1conv=False,strides=1,**kwargs):
super(Residual,self).__init__(**kwargs)
self.conv1=nn.Conv2D(num_channels,kernel_size=3,padding=1,strides=strides)
self.conv2=nn.Conv2D(num_channels,kernel_size=3,padding=1)
#是否有1x1卷积层来修改通道数以及卷积层的步幅
if use_1x1conv:
self.conv3=nn.Conv2D(num_channels,kernel_size=1,strides=strides)
else:
self.conv3=None
self.bn1=nn.BatchNorm()
self.bn2=nn.BatchNorm()

def forward(self,X):
Y=nd.relu(self.bn1(self.conv1(X)))
Y=self.bn2(self.conv2(Y))
if self.conv3:
X=self.conv3(X)
return nd.relu(Y+X)

#观察使用1x1卷积前后的输入输出形状
blk=Residual(3)
blk.initialize()
X=nd.random.uniform(shape=(4,3,6,6))
print(blk(X).shape)#输入输出形状一致(4, 3, 6, 6)
blk2=Residual(6,use_1x1conv=True,strides=2)
blk2.initialize()
print(blk2(X).shape)#通道数改变，高宽减半(4, 6, 3, 3)```

```#构造ResNet模型
net=nn.Sequential()
net.add(nn.Conv2D(64,kernel_size=7,strides=2,padding=3),
nn.BatchNorm(),
nn.Activation('relu'),nn.MaxPool2D(pool_size=3,strides=2,padding=1))
#对第一个模型做特别处理
def resnet_block(num_channels,num_residuals,first_block=False):
blk=nn.Sequential()
for i in range(num_residuals):
if i==0 and not first_block:
blk.add(Residual(num_channels,use_1x1conv=True,strides=2))
else:
blk.add(Residual(num_channels))
return blk
#加入残差块，每个模块2个残差块
net.add(resnet_block(64,2,first_block=True),
resnet_block(128,2),
resnet_block(256,2),
resnet_block(512,2))
#最后加入全局平均池化层接一个全连接层输出
net.add(nn.GlobalAvgPool2D(),nn.Dense(10))
#观察形状的变化
X=-nd.random.uniform(shape=(1,1,224,224))
net.initialize()
for layer in net:
X=layer(X)
print(layer.name,'shape:',X.shape)

'''
conv33 shape: (1, 64, 112, 112)
batchnorm27 shape: (1, 64, 112, 112)
relu0 shape: (1, 64, 112, 112)
pool0 shape: (1, 64, 56, 56)
sequential1 shape: (1, 64, 56, 56)
sequential2 shape: (1, 128, 28, 28)
sequential3 shape: (1, 256, 14, 14)
sequential4 shape: (1, 512, 7, 7)
pool1 shape: (1, 512, 1, 1)
dense0 shape: (1, 10)
'''```

```lr,num_epochs,batch_size,ctx=0.05,5,50,d2l.try_gpu()
net.initialize(force_reinit=True,ctx=ctx,init=init.Xavier())
trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr})
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size,resize=96)
d2l.train_ch5(net,train_iter,test_iter,batch_size,trainer,ctx,num_epochs)
'''
epoch 1, loss 0.3976, train acc 0.859, test acc 0.895, time 124.1 sec
epoch 2, loss 0.2347, train acc 0.913, test acc 0.903, time 122.9 sec
epoch 3, loss 0.1829, train acc 0.932, test acc 0.887, time 123.1 sec
epoch 4, loss 0.1433, train acc 0.948, test acc 0.916, time 123.0 sec
epoch 5, loss 0.1113, train acc 0.959, test acc 0.921, time 123.0 sec
'''```

```mxnet.base.MXNetError: [12:10:21] c:\jenkins\workspace\mxnet-tag\mxnet\src\storage\./pooled_storage_manager.h:157: cudaMalloc failed: out of memory
mxnet.base.MXNetError: [12:17:38] c:\jenkins\workspace\mxnet-tag\mxnet\3rdparty\mshadow\mshadow\./stream_gpu-inl.h:62: Check failed: e == cudaSuccess: CUDA: an illegal memory access was encountered```