## 深度学习与艺术 – 神经风格迁移

#### 在此作业中，你将：

import os
import sys
import scipy.io
import scipy.misc
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
from nst_utils import *
import numpy as np
import tensorflow as tf
%matplotlib inline

## 2 迁移学习

model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
print(model)

{'input': <tf.Variable 'Variable:0' shape=(1, 300, 400, 3) dtype=float32_ref>, 'conv1_1': <tf.Tensor 'Relu:0' shape=(1, 300, 400, 64) dtype=float32>, 'conv1_2': <tf.Tensor 'Relu_1:0' shape=(1, 300, 400, 64) dtype=float32>, 'avgpool1': <tf.Tensor 'AvgPool:0' shape=(1, 150, 200, 64) dtype=float32>, 'conv2_1': <tf.Tensor 'Relu_2:0' shape=(1, 150, 200, 128) dtype=float32>, 'conv2_2': <tf.Tensor 'Relu_3:0' shape=(1, 150, 200, 128) dtype=float32>, 'avgpool2': <tf.Tensor 'AvgPool_1:0' shape=(1, 75, 100, 128) dtype=float32>, 'conv3_1': <tf.Tensor 'Relu_4:0' shape=(1, 75, 100, 256) dtype=float32>, 'conv3_2': <tf.Tensor 'Relu_5:0' shape=(1, 75, 100, 256) dtype=float32>, 'conv3_3': <tf.Tensor 'Relu_6:0' shape=(1, 75, 100, 256) dtype=float32>, 'conv3_4': <tf.Tensor 'Relu_7:0' shape=(1, 75, 100, 256) dtype=float32>, 'avgpool3': <tf.Tensor 'AvgPool_2:0' shape=(1, 38, 50, 256) dtype=float32>, 'conv4_1': <tf.Tensor 'Relu_8:0' shape=(1, 38, 50, 512) dtype=float32>, 'conv4_2': <tf.Tensor 'Relu_9:0' shape=(1, 38, 50, 512) dtype=float32>, 'conv4_3': <tf.Tensor 'Relu_10:0' shape=(1, 38, 50, 512) dtype=float32>, 'conv4_4': <tf.Tensor 'Relu_11:0' shape=(1, 38, 50, 512) dtype=float32>, 'avgpool4': <tf.Tensor 'AvgPool_3:0' shape=(1, 19, 25, 512) dtype=float32>, 'conv5_1': <tf.Tensor 'Relu_12:0' shape=(1, 19, 25, 512) dtype=float32>, 'conv5_2': <tf.Tensor 'Relu_13:0' shape=(1, 19, 25, 512) dtype=float32>, 'conv5_3': <tf.Tensor 'Relu_14:0' shape=(1, 19, 25, 512) dtype=float32>, 'conv5_4': <tf.Tensor 'Relu_15:0' shape=(1, 19, 25, 512) dtype=float32>, 'avgpool5': <tf.Tensor 'AvgPool_4:0' shape=(1, 10, 13, 512) dtype=float32>}

model["input"].assign(image)

sess.run(model["conv4_2"])

## 3 神经风格迁移

### 3.1 计算内容损失

content_image = scipy.misc.imread("images/louvre.jpg")
imshow(content_image)

d:\vr\virtual_environment\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning:     imread is deprecated!
imread is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use imageio.imread instead.
"""Entry point for launching an IPython kernel.

<matplotlib.image.AxesImage at 0x26905cddb38>

#### 3.1.1 如何确保生成的图像G与图像C的内容匹配？

J c o n t e n t ( C , G ) = 1 4 × n H × n W × n C ∑ all entries ( a ( C ) − a ( G ) ) 2 (1) J_{content}(C,G) = \frac{1}{4 \times n_H \times n_W \times n_C}\sum _{ \text{all entries}} (a^{(C)} – a^{(G)})^2\tag{1} J co n t e n t ​ ( C , G ) = 4 × n H ​ × n W ​ × n C ​ 1 ​ all entries ∑ ​ ( a ( C ) − a ( G ) ) 2 ( 1 )

1. 从a_G检索尺寸：

1. 要从张量X检索尺寸，请使用： X.get_shape().as_list()

1. 如上图所示展开a_C和a_G

1. 如果遇到问题，请查看

Hint1

Hint2

1. 计算内容损失：

1. 如果遇到问题，请查看

Hint3

1. ,

Hint4

Hint5

def compute_content_cost(a_C, a_G):
"""
计算内容代价的函数

参数：
a_C -- tensor类型，维度为(1, n_H, n_W, n_C)，表示隐藏层中图像C的内容的激活值。
a_G -- tensor类型，维度为(1, n_H, n_W, n_C)，表示隐藏层中图像G的内容的激活值。

返回：
J_content -- 实数，用上面的公式1计算的值。

"""
# 获取a_G的维度信息
m, n_H, n_W, n_C = a_G.get_shape().as_list()

# 对a_C与a_G从3维降到2维
a_C_unrolled = tf.transpose(tf.reshape(a_C, [n_H * n_W, n_C]))
a_G_unrolled = tf.transpose(tf.reshape(a_G, [n_H * n_W, n_C]))

#计算内容代价
#J_content = (1 / (4 * n_H * n_W * n_C)) * tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled, a_G_unrolled)))
J_content = 1 / (4 * n_H * n_W * n_C) * tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled, a_G_unrolled)))
return J_content

tf.reset_default_graph()
with tf.Session() as test:
tf.set_random_seed(1)
a_C = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4)
a_G = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4)
J_content = compute_content_cost(a_C, a_G)
print("J_content = " + str(J_content.eval()))

test.close()

J_content = 6.7655935

### 3.2 计算风格损失

style_image = scipy.misc.imread("images/monet_800600.jpg")
imshow(style_image)

d:\vr\virtual_environment\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning:     imread is deprecated!
imread is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use imageio.imread instead.
"""Entry point for launching an IPython kernel.

<matplotlib.image.AxesImage at 0x269080c23c8>

#### 3.2.1 风格矩阵

def gram_matrix(A):
"""
参数：
A -- 矩阵的shape为(n_C, n_H * n_W)
返回：
GA -- A的Gram矩阵，形状为(n_C, n_C)
"""
GA = tf.matmul(A,tf.transpose(A))
return GA

tf.reset_default_graph()
with tf.Session() as test:
tf.set_random_seed(1)
A = tf.random_normal([3, 2*1], mean=1, stddev=4)
GA = gram_matrix(A)

print("GA = " + str(GA.eval()))

GA = [[ 6.422305 -4.429122 -2.096682]
[-4.429122 19.465837 19.563871]
[-2.096682 19.563871 20.686462]]

#### 3.2.2 风格损失

J s t y l e [ l ] ( S , G ) = 1 4 × n C 2 × ( n H × n W ) 2 ∑ i = 1 n C ∑ j = 1 n C ( G i j ( S ) − G i j ( G ) ) 2 (2) J_{style}^{[l]}(S,G) = \frac{1}{4 \times {n_C}^2 \times (n_H \times n_W)^2} \sum _{i=1}^{n_C}\sum_{j=1}^{n_C}(G^{(S)}_{ij} – G^{(G)}_{ij})^2\tag{2} J s t y l e [ l ] ​ ( S , G ) = 4 × n C ​ 2 × ( n H ​ × n W ​ ) 2 1 ​ i = 1 ∑ n C ​ ​ j = 1 ∑ n C ​ ​ ( G ij ( S ) ​ − G ij ( G ) ​ ) 2 ( 2 )

1. 从隐藏层激活a_G中检索尺寸：

1. 要从张量X检索尺寸，请使用：

X.get_shape().as_list()

1. 如上图所示，将隐藏层激活a_S和a_G展开为2D矩阵。

1. 你可能会发现

Hint1

Hint2

1. 有用。

1. 计算图像S和G的风格矩阵。（使用以前编写的函数）

1. 计算风格损失：

1. 你可能会发现

Hint3

1. ,

Hint4

Hint5

1. 有用。

def compute_layer_style_cost(a_S, a_G):

# 从a_G中检索维度
m, n_H, n_W, n_C = a_G.get_shape().as_list()

# 重新塑造图像的形状(n_C, n_H * n_W)
a_S = tf.reshape(a_S,shape=(n_H * n_W,n_C))
a_G = tf.reshape(a_G,shape=(n_H * n_W,n_C))

# 计算图像S和G的gram_矩阵
GS = gram_matrix(tf.transpose(a_S))
GG = gram_matrix(tf.transpose(a_G))

# 计算损失
J_style_layer = tf.reduce_sum(tf.square(tf.subtract(GS,GG))) / (4 * (n_C * n_C) * (n_W * n_H) * (n_W * n_H))

return J_style_layer

tf.reset_default_graph()
with tf.Session() as test:
tf.set_random_seed(1)
a_S = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4)
a_G = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4)
J_style_layer = compute_layer_style_cost(a_S, a_G)

print("J_style_layer = " + str(J_style_layer.eval()))

J_style_layer = 9.190278

#### 3.2.3 风格权重

STYLE_LAYERS = [
('conv1_1', 0.2),
('conv2_1', 0.2),
('conv3_1', 0.2),
('conv4_1', 0.2),
('conv5_1', 0.2)]

J s t y l e ( S , G ) = ∑ l λ [ l ] J s t y l e [ l ] ( S , G ) J_{style}(S,G) = \sum_{l} \lambda^{[l]} J^{[l]}_{style}(S,G) J s t y l e ​ ( S , G ) = l ∑ ​ λ [ l ] J s t y l e [ l ] ​ ( S , G )

λ [ l ] \lambda^{[l]} λ [ l ]

2.从STYLE_LAYERS循环（layer_name，coeff）：
a. 选择当前层的输出张量 例如，要从层"conv1_1"中调用张量，你可以这样做：out = model["conv1_1"]
b. 通过在张量"out"上运行会话，从当前层获取style图像的风格
c. 获取一个表示当前层生成的图像风格的张量。 这只是"out"。
d. 现在，你拥有两种风格。使用上面实现的函数计算当前层的style_cost
e. 将当前层的（style_cost x coeff）添加到整体风格损失（J_style）中
3.返回J_style，它现在应该是每层的（style_cost x coeff）之和。

def compute_style_cost(model, STYLE_LAYERS):
"""
计算从几个选择的层的总体样式成本

参数：
model -- 我们的tensorflow模型
STYLE_LAYERS -- 一个python列表，包含:
- 我们想从中提取样式的层的名称
- 每一个都有一个系数
返回：
J_style -- 表示标量值的张量，由式(2)定义的样式代价
"""
# 初始化整体样式开销
J_style = 0

for layer_name, coeff in STYLE_LAYERS:

# 选择当前所选层的输出张量
out = model[layer_name]

# 通过out运行会话，将a_S设置为所选层的隐藏层激活
a_S = sess.run(out)

# 将a_G设置为来自同一层的隐藏层激活。在这里，a_G引用model[layer_name]，并且还没有计算。在后面的代码中，我们将指定图像G作为模型输入，这样当我们运行会话时，这将是从适当的层绘制的激活，G作为输入。
a_G = out

# 计算当前层的style_cost
J_style_layer = compute_layer_style_cost(a_S, a_G)

# 将该图层的coeff * J_style_layer添加到整体样式开销中
J_style += coeff * J_style_layer

return J_style

### 3.3 定义优化的总损失

J ( G ) = α J c o n t e n t ( C , G ) + β J s t y l e ( S , G ) J(G) = \alpha J_{content}(C,G) + \beta J_{style}(S,G) J ( G ) = α J co n t e n t ​ ( C , G ) + β J s t y l e ​ ( S , G )

def total_cost(J_content, J_style, alpha = 10, beta = 40):
J = alpha * J_content + beta * J_style
return J

tf.reset_default_graph()
with tf.Session() as test:
np.random.seed(3)
J_content = np.random.randn()
J_style = np.random.randn()
J = total_cost(J_content, J_style)
print("J = " + str(J))

J = 35.34667875478276

#### 你应该记住：

α \alpha α 和 β \beta β 是控制内容和风格之间相对权重的超参数

## 4 解决优化问题

1. 创建一个交互式会话

1. 加载内容图像

1. 加载风格图像

1. 随机初始化要生成的图像

1. 加载VGG16模型

1. 构建TensorFlow计算图：

1. 通过VGG16模型运行内容图像并计算内容损失

1. 通过VGG16模板运行风格图像并计算风格损失

1. 计算总损失

1. 定义优化器和学习率

1. 初始化TensorFlow图，并运行大量迭代，然后再每个步骤更新生成的图像。

# 重置图
tf.reset_default_graph()
# 启动交互式会话
sess = tf.InteractiveSession()

content_image = scipy.misc.imread("images/louvre_small.jpg")
content_image = reshape_and_normalize_image(content_image)

d:\vr\virtual_environment\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning:     imread is deprecated!
imread is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use imageio.imread instead.
"""Entry point for launching an IPython kernel.

style_image = scipy.misc.imread("images/monet.jpg")
style_image = reshape_and_normalize_image(style_image)

d:\vr\virtual_environment\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning:     imread is deprecated!
imread is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use imageio.imread instead.
"""Entry point for launching an IPython kernel.

generated_image = generate_noise_image(content_image)
imshow(generated_image[0])

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

<matplotlib.image.AxesImage at 0x26905ec3d30>

model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")

1. 将内容图像分配为VGG模型的输入。

1. 将a_C设置为张量，为层”conv4_2″提供隐藏层激活。

1. 设置a_G为张量，为同一层提供隐藏层激活。

1. 使用a_C和a_G计算内容损失。

# 将内容图像指定为VGG模型的输入。
sess.run(model['input'].assign(content_image))
# 选择conv4_2层的输出张量
out = model['conv4_2']
# 设置a_C为我们选择的图层的隐藏层激活
a_C = sess.run(out)
# 将a_G设置为来自同一层的隐藏层激活。这里，a_G引用了model['conv4_2']，还没有计算。在后面的代码中，我们将指定图像G作为模型输入，这样当我们运行会话时，这将是从适当的层绘制的激活，G作为输入。
a_G = out
# 计算内容成本
J_content = compute_content_cost(a_C, a_G)

# 将模型的输入指定为“style”图像
sess.run(model['input'].assign(style_image))
# 计算样式成本
J_style = compute_style_cost(model, STYLE_LAYERS)

J = total_cost(J_content, J_style, alpha = 10, beta = 40)

optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(J)

def model_nn(sess, input_image, num_iterations = 200):
# 初始化全局变量(需要在初始化器上运行会话)
sess.run(tf.global_variables_initializer())

# 通过模型运行带噪声的输入图像(初始生成的图像)。使用assign()。
generated_image=sess.run(model['input'].assign(input_image))

for i in range(num_iterations):

# 在train_step上运行会话以最小化总成本
sess.run(train_step)

# 通过在当前model['input']上运行会话来计算生成的图像
generated_image = sess.run(model['input'])

# 每20次打印一次。
if i%20 == 0:
Jt, Jc, Js = sess.run([J, J_content, J_style])
print("Iteration " + str(i) + " :")
print("total cost = " + str(Jt))
print("content cost = " + str(Jc))
print("style cost = " + str(Js))

# 保存当前生成的图像到“/output”目录
save_image("output/" + str(i) + ".png", generated_image)

# 保存最后生成的图像
save_image('output/generated_image.jpg', generated_image)

return generated_image

model_nn(sess, generated_image)

Iteration 0 :
total cost = 4936893000.0
content cost = 7881.85
style cost = 123420350.0
Iteration 20 :
total cost = 931792700.0
content cost = 15150.729
style cost = 23291030.0
Iteration 40 :
total cost = 476977900.0
content cost = 16802.03
style cost = 11920246.0
Iteration 60 :
total cost = 306887600.0
content cost = 17398.729
style cost = 7667841.0
Iteration 80 :
total cost = 224318640.0
content cost = 17652.709
style cost = 5603553.0
Iteration 100 :
total cost = 177715900.0
content cost = 17879.422
style cost = 4438427.5
Iteration 120 :
total cost = 147169620.0
content cost = 18050.78
style cost = 3674727.5
Iteration 140 :
total cost = 125411320.0
content cost = 18213.465
style cost = 3130729.5
Iteration 160 :
total cost = 108912420.0
content cost = 18361.072
style cost = 2718220.2
Iteration 180 :
total cost = 96001230.0
content cost = 18497.363
style cost = 2395406.5

array([[[[ -45.12358  ,  -72.19441  ,   51.746346 ],
[ -24.75327  ,  -44.2964   ,   29.886335 ],
[ -39.64303  ,  -31.56099  ,   13.718143 ],
...,
[ -24.76011  ,  -10.788876 ,   14.371523 ],
[ -28.816984 ,   -5.280051 ,   23.481634 ],
[ -40.314568 ,   -6.0927963,   49.9986   ]],
[[ -58.39488  ,  -53.06708  ,   26.432343 ],
[ -32.944817 ,  -32.69456  ,   -1.5097439],
[ -26.427433 ,  -31.894102 ,   15.655808 ],
...,
[ -25.357164 ,   -9.746335 ,   24.563683 ],
[ -20.002506 ,  -20.456278 ,   12.591088 ],
[ -38.10473  ,  -10.029796 ,   10.0670185]],
[[ -50.200523 ,  -50.721996 ,   15.647173 ],
[ -37.31482  ,  -42.209923 ,   -6.276647 ],
[ -33.782967 ,  -25.933123 ,    5.7868314],
...,
[ -11.978632 ,  -41.413166 ,   10.257919 ],
[ -13.608108 ,  -24.304035 ,   14.8848295],
[ -23.316372 ,  -21.182524 ,   12.9698   ]],
...,
[[ -45.040543 ,  -44.788315 ,  -27.18235  ],
[ -90.703514 ,  -68.318245 , -255.0205   ],
[ -65.162224 ,  -61.23871  , -127.02678  ],
...,
[ -62.159573 ,  -74.07849  ,  -31.950476 ],
[ -75.70332  ,  -98.683426 ,  -27.93272  ],
[   3.5887198,  -34.177395 ,   23.60415  ]],
[[ -19.461033 ,  -72.949875 ,   11.1744175],
[-165.36955  ,  -96.42717  ,  -28.43835  ],
[  18.556105 ,  -60.75096  ,  -17.065166 ],
...,
[ -91.97378  ,  -86.639656 ,  -49.875256 ],
[-101.63406  , -109.606384 ,  -63.365128 ],
[ -69.75303  , -100.961685 ,   -3.9610627]],
[[  40.353714 ,  -34.501305 ,   46.080757 ],
[  28.722298 ,  -80.35448  ,   24.79451  ],
[  33.369385 ,  -26.925333 ,   19.59569  ],
...,
[-117.021164 , -103.20966  ,  -19.439291 ],
[-147.96053  , -143.07509  ,  -31.319807 ],
[ -25.229664 , -101.636154 ,   23.11785  ]]]], dtype=float32)

## 5 使用你自己的图像进行测试

1. 单击笔记本上部选项卡中的”File -> Open”

1. 转到”/images”并上传图像(要求：(WIDTH = 300, HEIGHT = 225))，例如将其重命名为”my_content.png”和”my_style.png”

1. 从以下位置更改部分（3.4）中的代码：

content_image = scipy.misc.imread("images/louvre.jpg")
style_image = scipy.misc.imread("images/claude-monet.jpg")

content_image = scipy.misc.imread("images/my_content.jpg")
style_image = scipy.misc.imread("images/my_style.jpg")