CTF 中的欺骗 AI 问题一般分成两类：基于 神经网络 的和基于 统计模型 的。如果题目要求选手欺骗神经网络，一般会给出白盒的模型（往往是图像分类任务）；如果是要求选手欺骗统计学习模型，有些题目会给出白盒的模型参数，也有的提供训练数据集。

## 欺骗 kNN：[西湖论剑2020] 指鹿为马

### 任务目标

```import numpy as np
from PIL import Image
import math
import operator
import os
import time
import base64
import random
data = []
p = Image.open('./horse.png').convert('L')
p = np.array(p).reshape(-1)
p = np.append(p,0)
data.append(p)
return np.array(data)
data = []
p = Image.open('./deer.png').convert('L')
p = np.array(p).reshape(-1)
p = np.append(p,1)
data.append(p)
return np.array(data)
data = []
p = Image.open(pic).convert('L')
p = np.array(p).reshape(-1)
p = np.append(p,1)
data.append(p)
return np.array(data)
def euclideanDistance(instance1, instance2, length):
distance = 0
for x in range(length):
distance += pow((instance1[x] - instance2[x]), 2)
return math.sqrt(distance)
def getNeighbors(trainingSet, testInstance, k):
distances = []
length = len(testInstance) - 1
for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance, trainingSet[x], length)
distances.append((trainingSet[x], dist))
distances.sort(key=operator.itemgetter(1))
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
def getResponse(neighbors):
for x in range(len(neighbors)):
response = neighbors[x][-1]
else:
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct / float(len(testSet))) * 100.0
def check(pic):
source_p = Image.open('deer.png')
try:
c_p = Image.open(pic)
except:
exit()
diff_pixel = 0
a, b = source_p.size
if c_p.size[0] != a and c_p.size[1] != b:
exit()
for y in range(b):
for x in range(a):
diff_pixel += abs(source_p.getpixel((x, y)) - c_p.getpixel((x, y)))
return diff_pixel
def main():
while 1:
print('-' * 134)
print('''      ____       __            _          _   _                _                              _   _            _
|  __ \     / _|          | |        | | | |              | |                            | | | |          | |
| |__) |___| |_ ___ _ __  | |_ ___   | |_| |__   ___    __| | ___  ___ _ __    __ _ ___  | |_| |__   ___  | |__   ___  _ __ ___  ___
|  _  // _ \  _/ _ \ '__| | __/ _ \  | __| '_ \ / _ \  / _` |/ _ \/ _ \ '__|  / _` / __| | __| '_ \ / _ \ | '_ \ / _ \| '__/ __|/ _ \\
| | \ \  __/ ||  __/ |    | || (_) | | |_| | | |  __/ | (_| |  __/  __/ |    | (_| \__ \ | |_| | | |  __/ | | | | (_) | |  \__ \  __/
|_|  \_\___|_| \___|_|     \__\___/   \__|_| |_|\___|  \__,_|\___|\___|_|     \__,_|___/  \__|_| |_|\___| |_| |_|\___/|_|  |___/\___|
''')
print('-'*134)
print('\t1.show source code')
print('\t2.give me the source pictures')
print('\t4.exit')
choose = input('>')
if choose == '1':
w = open('run.py','r')
continue
elif choose == '2':
print('this is horse`s picture:')
print(h.decode())
print('-'*134)
print('this is deer`s picture:')
print(d.decode())
continue
elif choose == '4':
break
elif choose == '3':
pic = input('>')
try:
pic = base64.b64decode(pic)
except:
exit()
if b"<?php" in pic or b'eval' in pic:
print("Hacker!!This is not WEB,It`s Just a misc!!!")
exit()
salt = str(random.getrandbits(15))
pic_name = 'tmp_'+salt+'.png'
tmp_pic = open(pic_name,'wb')
tmp_pic.write(pic)
tmp_pic.close()
if check(pic_name)>=100000:
print('Don`t give me the horse source picture!!!')
os.remove(pic_name)
break
k = 1
trainingSet = np.append(ma, lu).reshape(2, 5185)
neighbors = getNeighbors(trainingSet, testSet[0], k)
result = getResponse(neighbors)
if repr(result) == '0':
os.system('clear')
print('Yes,I want this horse like deer,here is your flag encoded by base64')
print(flag.decode())
os.remove(pic_name)
break
else:
print('I want horse but not deer!!!')
os.remove(pic_name)
break
else:
print('wrong choose!!!')
break
exit()
if __name__=='__main__':
main()```

1. 检查选手上传的图片与

`deer`

1. 的像素差是否小于 100000。如果超过限制，则报告错误。

1. 求选手图片与

`deer`

`horse`

1. 的欧几里得距离。离谁更近，就判定为哪个分类。

1. 如果选手图片被判定为马，则选手获胜。

`deer``horse` 都是灰度图，如下：

### 尝试：随机噪声

```for y in range(b):
for x in range(a):
diff_pixel += abs(source_p.getpixel((x, y)) - c_p.getpixel((x, y)))
return diff_pixel```

## 欺骗白盒神经网络

CTF 中的欺骗神经网络题一般如下：给定一个预训练好的分类模型（PyTorch 或者 TensorFlow），再给定一张原图。要求小幅度修改原图，使得神经网络将其误分类为另一个类别。

### 实践：训练神经网络

```import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
trans_to_tensor = transforms.Compose([
transforms.ToTensor()
])
data_train = torchvision.datasets.MNIST(
'./data',
train=True,
transform=trans_to_tensor,
data_test = torchvision.datasets.MNIST(
'./data',
train=False,
transform=trans_to_tensor,
data_train, data_test```

```train_loader = torch.utils.data.DataLoader(
data_train,
batch_size=100,
shuffle=True)```

```class MyNet(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28*28, 100)
self.fc2 = nn.Linear(100, 10)
def forward(self, x):
x = x.view(-1, 28*28)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = torch.sigmoid(x)
return x
net = MyNet()```

```criterion = nn.CrossEntropyLoss()

```def fit(net, epoch=1):
net.train()
run_loss = 0
for num_epoch in range(epoch):
print(f'epoch {num_epoch}')
x, y = data[0], data[1]
outputs = net(x)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
run_loss += loss.item()
if i % 100 == 99:
print(f'[{i+1} / 600] loss={run_loss / 100}')
run_loss = 0
test(net)
def test(net):
net.eval()
x, y = test_data[0], test_data[1]
outputs = net(x)
pred = torch.max(outputs, 1)[1]
print(f'test acc: {sum(pred == y)} / {y.shape[0]}')
net.train()```

### 实践：欺骗白盒多层感知机

1. 将图片输入网络，得到网络输出。

1. 将网络输出与期望输出求 loss 值（这里采用交叉熵）。

1. 将图片像素减去自己的梯度 * alpha，不改变网络参数。

```def play(epoch):
loss_fn = nn.CrossEntropyLoss()   # 交叉熵损失函数
for num_epoch in range(epoch):
output = net(img)
target = torch.tensor([3])    # 误导网络，使之分类为 3
loss = loss_fn(output, target)
loss.backward()               # 计算梯度
if num_epoch % 10 == 9:
print(f'[{num_epoch + 1} / {epoch}] loss: {loss} pred: {torch.max(output, 1)[1].item()}')
if torch.max(output, 1)[1].item() == 3:
print(f'done in round {num_epoch + 1}')
return
img = origin.view(1, 28, 28)
play(100)```