Press "Enter" to skip to content

生成对抗网络GAN图像生成实例

本文实现了DCGAN [1],这是一种生成对抗网络。DCGAN模型将完全连接层替换为全局池层。众所周知,GAN的目的是在鉴别器和发生器之间实现纳什均衡。也就是说,这两种模式都不应该表现得特别好。实验所使用的数据集为CUB200-2011,这是一个鸟类数据集,训练集包含5994个图像,实验设置如下所示:训练次数为200次,学习率为0.0002。此外,我切下原始图像的中心作为模型的输入,模型的大小是128×128像素。

 

代码:

 

main.py

 

# Name: DCGAN_main
# Author: Reacubeth
# Time: 2021/5/28 19:47
# Mail: [email protected]
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
 
from __future__ import print_function
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
 
from DCGAN import Discriminator, Generator
 
 
manualSeed = 999
random.seed(manualSeed)
torch.manual_seed(manualSeed)
 
path = 'bird/CUB_200_2011/'
ROOT_TRAIN = path + 'dataset/train/'
 
 
workers = 2
batch_size = 128
image_size = 128
nc = 3
nz = 100
ngf = 64
ndf = 64
num_epochs = 200
lr = 0.0002  # 0.0002
beta1 = 0.5
ngpu = 4
 
dataset = dset.ImageFolder(root=ROOT_TRAIN,
                           transform=transforms.Compose([
                               transforms.Resize(image_size),
                               transforms.CenterCrop(image_size),
                               transforms.ToTensor(),
                               transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                           ]))
 
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)
 
 
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
 
real_batch = next(iter(dataloader))
plt.figure(figsize=(8, 8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(), (1, 2, 0)))
 
 
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)
 
 
netG = Generator(ngpu).to(device)
if (device.type == 'cuda') and (ngpu > 1):
    netG = nn.DataParallel(netG, list(range(ngpu)))
netG.apply(weights_init)
print(netG)
 
netD = Discriminator(ngpu).to(device)
if (device.type == 'cuda') and (ngpu > 1):
    netD = nn.DataParallel(netD, list(range(ngpu)))
netD.apply(weights_init)
print(netD)
 
 
criterion = nn.BCELoss()
 
# Create batch of latent vectors that we will use to visualize
#  the progression of the generator
fixed_noise = torch.randn(64, nz, 1, 1, device=device)
 
real_label = 1.
fake_label = 0.
 
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))
 
# Training Loop
 
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
 
print("Starting Training Loop...")
for epoch in range(num_epochs):
    for i, data in enumerate(dataloader, 0):
        netD.zero_grad()
        real_cpu = data[0].to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
        output = netD(real_cpu).view(-1)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()
 
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake = netG(noise)
        label.fill_(fake_label)
        output = netD(fake.detach()).view(-1)
        errD_fake = criterion(output, label)
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        errD = errD_real + errD_fake
        optimizerD.step()
 
 
        netG.zero_grad()
        label.fill_(real_label)
        output = netD(fake).view(-1)
        errG = criterion(output, label)
        errG.backward()
        D_G_z2 = output.mean().item()
        optimizerG.step()
 
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, num_epochs, i, len(dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
 
        G_losses.append(errG.item())
        D_losses.append(errD.item())
 
        if (iters % 500 == 0) or ((epoch == num_epochs - 1) and (i == len(dataloader) - 1)):
            with torch.no_grad():
                fake = netG(fixed_noise).detach().cpu()
            img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
 
        iters += 1
 
torch.save(netD, 'checkpoint/netD' + str(num_epochs) + '.pth')
 
plt.figure(figsize=(10, 5))
plt.title("Generator and Discriminator Loss During Training")
plt.plot(G_losses, label="G")
plt.plot(D_losses, label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.savefig('gan_losses.pdf', bbox_inches='tight')
plt.show()
 
fig = plt.figure(figsize=(8, 8))
plt.axis("off")
ims = [[plt.imshow(np.transpose(i, (1, 2, 0)), animated=True)] for i in img_list]
ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)
plt.savefig('fake_img.pdf', bbox_inches='tight')
HTML(ani.to_jshtml())
 
 
real_batch = next(iter(dataloader))
 
plt.figure(figsize=(15, 15))
plt.subplot(1, 2, 1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=5, normalize=True).cpu(), (1, 2, 0)))
plt.savefig('real_img.pdf', bbox_inches='tight')
 
 
plt.subplot(1, 2, 2)
plt.axis("off")
plt.title("Fake Images")
plt.imshow(np.transpose(img_list[-1], (1, 2, 0)))
plt.savefig('cmp_img.pdf', bbox_inches='tight')
plt.show()

 

DCGAN.py

 

# Name: DCGAN.py
# Author: Reacubeth
# Time: 2021/5/28 18:42
# Mail: [email protected]
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
 
import torch.nn as nn
import torch.nn.functional as F
 
# Number of channels in the training images. For color images this is 3
nc = 3
 
# Size of z latent vector (i.e. size of generator input)
nz = 100
 
# Size of feature maps in generator
ngf = 128
 
# Size of feature maps in discriminator
ndf = 128
 
# https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
 
 
class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(nz, ngf * 16, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 16),
            nn.ReLU(True),
            # state size. (ngf*16) x 4 x 4
            nn.ConvTranspose2d(ngf * 16, ngf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 8 x 8
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 16 x 16
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 32 x 32
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 64 x 64
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 128 x 128
        )
 
    def forward(self, x):
        x = self.main(x)
        return x
 
 
class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.use_fully = True
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 128 x 128
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 64 x 64
 
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 32 x 32
 
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 16 x 16
 
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 8 x 8
 
            nn.Conv2d(ndf * 8, ndf * 16, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 16),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*16) x 4 x 4
 
            # use_fully
            # nn.Conv2d(ndf * 16, 1, 4, 1, 0, bias=False),
            # nn.Sigmoid()
        )
 
        if self.use_fully:
            print('use_fully')
            self.out = nn.Sequential(
                nn.Linear(ndf * 16 * 4 * 4, 1),
                nn.Sigmoid()
            )
 
    def forward(self, x):
        x = self.main(x)
        if self.use_fully:
            x = x.view(x.size(0), -1)
            x = self.out(x)
        return x

 

与原始图像相比,生成的图像如下图所示。左边的格子是真实图像,右边的是生成的图像。

 

 

可以观察到,假图像与真图像接近,甚至有些生成的图像很难与真图像区分开来。当然,有些图像在绿色背景下有点模糊。为了观察生成的图像的演变,我还将这些伪图像在不同时期进行了可视化。有趣的是,在早期阶段已经提取了一些潜在的特征。在第50次迭代中,整个鸟类的轮廓已经开始出现。

 

 

下图显示了训练过程中损失的变化。我们可以看到生成器的损耗比鉴别器的略高。其中一个可能的原因是全连通层(自己加的)的存在。我们还可以看到它们的损失都不稳定,这可能导致模式崩溃。

 

另外,为了验证GAN的原理,我设置了一个比原来更高的学习率(0.001)。我发现生成器的损失立即收敛到0,而鉴别器的损失保持在一个较高的水平,这就是为什幺很难训练GAN模型的原因。

 

最后我借助Grad-CAM[2]将DCGAN中鉴别器的不同中间层的注意可视化。Grad-CAM利用流入CNN的最后一个卷积层的梯度信息为每个神经元分配重要值。具体地说,类的梯度通过使用(softmax之前的Logits)来计算,并且特征图的激活值被定义为。这些回流梯度与宽度和高度维度(分别以和索引)上的全局池策略一起应用,以获得神经元重要性照明

 

*** QuickLaTeX cannot compile formula:
\alpha^c\k
*** Error message:
Cannot download image from QuickLaTeX server: cURL error 28: Operation timed out after 5000 milliseconds with 0 out of 0 bytes received
Please make sure your server/PHP settings allow HTTP requests to external resources ("allow_url_fopen", etc.)
These links might help in finding solution:
http://wordpress.org/extend/plugins/core-control/
http://wordpress.org/support/topic/an-unexpected-http-error-occurred-during-the-api-request-on-wordpress-3?replies=37

 

。等式??和??描述了这个过程。

 

 

 

在数据集CUB200-2011下,DCGAN中鉴别器的最后一个卷积层的热图如封面所示。第二列和第三列分别是Grad-CAM和Grad-CAM++ [3]的热图。最后两列是叠加在原始图像上的结果。我选择把让鉴别器做出虚假判断的注意力形象化。从结果可以看出,在大多数情况下,热值较高的地方不是鸟类,而是环境,这说明在GAN模型中,鉴别器不应表现得如此糟糕。

 

[1] Radford, Alec, Luke Metz, and Soumith Chintala. “Unsupervised representation learning with deep convolutional generative adversarial networks.” arXiv preprint arXiv:1511.06434 (2015).

 

[2] Selvaraju, Ramprasaath R., et al. “Grad-cam: Visual explanations from deep networks via gradient-based localization.” Proceedings of the IEEE international conference on computer vision. 2017.

 

[3] Chattopadhay, Aditya, et al. “Grad-cam++: Generalized gradient-based visual explanations for deep convolutional networks.” 2018 IEEE Winter Conference on Applications of Computer Vision (WACV). IEEE, 2018.

Be First to Comment

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注