## CV计算机视觉核心07-目标检测

1、检测问题的输出是什幺？怎幺用数字来表示？

2、我们已经掌握了分类层的设计方法，是否有用？

3、其实我们有个隐藏技能：拟合层的设计方法。

Yolo:

fc=>fc => ( C, x, y, w, h, one-hot )

fc作用是提取全局特征。

Batch Normalization : BN层（可以解决离散的问题）

relu解决sigmoid的梯度离散问题，relu梯度为1，解决了梯度离散的问题。

Recall: TP(P表示positive，识别为正的；T表示true)

yolo的损失函数：

13（416/32=13）。奇数个会有对应的中心点。

1313的做一个上采样得到26
26，再跟原来中间的26*26做一个融合拼接：

## 初始版本v0的yolo:

#### 用于创建自定义数据集的加载PennFudanDataset_main.py

```import os
import numpy as np
import torch
from PIL import Image
class PennFudanDataset(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
# dataset[0]
def __getitem__(self, idx):
img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
# convert the PIL Image into a numpy array
# instances are encoded as different colors
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {
}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
# len(dataset)
def __len__(self):
return len(self.imgs)
import transforms as T
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)```

#### v0yolo_model.py

```#coding:utf-8
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math
class VGG(nn.Module):
def __init__(self):
super(VGG,self).__init__()
# the vgg's layers
#self.features = features
cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
layers= []
batch_norm = False
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2,stride = 2)]
else:
if batch_norm:
layers += [conv2d,nn.Batchnorm2d(v),nn.ReLU(inplace=True)]
else:
layers += [conv2d,nn.ReLU(inplace=True)]
in_channels = v
# use the vgg layers to get the feature
self.features = nn.Sequential(*layers)
# 全局池化
# 决策层：分类层
self.classifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,1000),
)
for m in self.modules():
if isinstance(m,nn.Conv2d):
nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.BatchNorm2d):
nn.init.constant_(m.weight,1)
nn.init.constant_(m.bias,1)
elif isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.constant_(m.bias,0)
def forward(self,x):
x = self.features(x)
x_fea = x
x = self.avgpool(x)
x_avg = x
x = x.view(x.size(0),-1)
x = self.classifier(x)
return x,x_fea,x_avg
def extractor(self,x):
x = self.features(x)
return x
class YOLOV0(nn.Module):
def __init__(self):
super(YOLOV0,self).__init__()
vgg = VGG()
self.extractor = vgg.extractor
# 这里的avgpool就相当于ROIpooling
# 决策层：检测层
self.detector = nn.Sequential(
#  这里的输入要和线性层能够匹配上。
#  从25088=>4096
nn.Linear(512*7*7,4096),
#  经激活函数
nn.ReLU(True),
nn.Dropout(),
#nn.Linear(4096,1470),
#  5表示[c,x,y,w,h]
nn.Linear(4096,5),
)
for m in self.modules():
if isinstance(m,nn.Conv2d):
nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.BatchNorm2d):
nn.init.constant_(m.weight,1)
nn.init.constant_(m.bias,1)
elif isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.constant_(m.bias,0)
def forward(self,x):
# 用vgg提取特征
x = self.extractor(x)
# x = torch.Size([1, 512, 16, 16])
# print('x_feature:',x.shape)
#import pdb
#pdb.set_trace()
# 均值pooling，就是roipooling
x = self.avgpool(x)
# 减小长宽
# x_avgpool: torch.Size([1, 512, 7, 7])
# print('x_avgpool:',x.shape)
x = x.view(x.size(0),-1)
# x降维： torch.Size([1, 25088])
# print('x降维：',x.shape)
x = self.detector(x)
# x_detector: torch.Size([1, 5])
# print('x_detector:',x.shape)
b,_ = x.shape
#x = x.view(b,7,7,30) (不输出b,7,7,30了)=> 这里只检测一个目标[1,5]，获得一个框用来直接拟合。
x = x.view(b,1,1,5)
return x

if __name__ == '__main__':
vgg = VGG()
# 这里的x是随机生成的
x  = torch.randn(1,3,512,512)
# 将x输入到vgg模型中
feature,x_fea,x_avg = vgg(x)
# 打印输出结果
# torch.Size([1, 1000])
# torch.Size([1, 512, 16, 16])
# torch.Size([1, 512, 7, 7])
print(feature.shape)
print(x_fea.shape)
print(x_avg.shape)

yolov0 = YOLOV0()
# 注意这里是yolo的初始版本，1*1*1*5 其中5表示[c,x,y,w,h]
feature = yolov0(x)
# feature_size b*7*7*30
# torch.Size([1, 1, 1, 5])
print(feature.shape)
print(feature)```

#### v0yolotrain.py

```#coding:utf-8
from PennFudanDataset_main import *
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from v0yolo_model import *
import cv2
import numpy as np
import time
import sys
import os
## 数据处理
#服务器上的地址 /data/2020-722-YOLOV4-Practical-datasets/PenFudanPed
# dataset地址：/Users/zhaomignming/Documents/mmteacher/datasets
#datapath='/Users/zhaomignming/Documents/mmteacher/datasets/PennFudanPed'
# datapath='/Users/zhaomingming/data_sets/PennFudanPed'
datapath = 'zhaomingming'
dataset = PennFudanDataset(datapath, get_transform(train=False))
dataset_test = PennFudanDataset(datapath, get_transform(train=False))
indices = torch.randperm(len(dataset)).tolist()
#dataset = torch.utils.data.Subset(dataset, indices[:-50])
#dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
#import pdb
#pdb.set_trace()
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
dataset = torch.utils.data.Subset(dataset, [0])
dataset_test = torch.utils.data.Subset(dataset_test, indices[0:2])
def collate_fn(batch):
return tuple(zip(*batch))
# define training and validation data loaders
dataset, batch_size=1, shuffle=False, num_workers=1,
collate_fn=collate_fn)
dataset_test, batch_size=2, shuffle=False, num_workers=4,
collate_fn=collate_fn)
def input_process(batch):
#import pdb
#pdb.set_trace()
#batch[0]，0维是表示图片数量
batch_size=len(batch[0])
# 这里是输入，每张图片是3通道448,448
input_batch= torch.zeros(batch_size,3,448,448)
for i in range(batch_size):
inputs_tmp = Variable(batch[0][i])
inputs_tmp1=cv2.resize(inputs_tmp.permute([1,2,0]).numpy(),(448,448))
inputs_tmp2=torch.tensor(inputs_tmp1).permute([2,0,1])
input_batch[i:i+1,:,:,:]= torch.unsqueeze(inputs_tmp2,0)
return input_batch
#batch[1][0]['boxes'][0]
def target_process(batch):
batch_size=len(batch[0])
target_batch= torch.zeros(batch_size,1,1,5)
#import pdb
#pdb.set_trace()
for i in range(batch_size):
#只处理batch中的第一张图片
# batch[1]表示label
# batch[0]表示image
bbox=batch[1][i]['boxes'][0]
#这里是获得图片的channel、x、y
_,hi,wi = batch[0][i].numpy().shape
#下面bbox是通过归一化，是一个大于0小于1的数值了
bbox = bbox/ torch.tensor([wi,hi,wi,hi])
#这里bbox置信度肯定是1。通过concat实现[c,w,h]
cbbox =  torch.cat([torch.ones(1),bbox])
#放到四维矩阵中
target_batch[i:i+1,:,:,:] = torch.unsqueeze(cbbox,0)
return target_batch

num_classes = 2
n_class    = 2
batch_size = 6
epochs     = 500
lr         = 1e-3
momentum   = 0
w_decay    = 1e-5
step_size  = 50
gamma      = 0.5
# 定义模型
yolov0_model = YOLOV0()
# import pdb
# pdb.set_trace()
# 定义优化算法为sdg:随机梯度下降
optimizer = optim.SGD(yolov0_model.detector.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)
# 定义学习率变化策略
# 每30个epoch 学习率乘以0.5
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)  # decay LR by a factor of 0.5 every 30 epochs
# 矩阵形式写法，写法简单，但是可读性不强
def lossfunc(outputs,labels):
#import  pdb
#pdb.set_trace()
tmp = (outputs-labels)**2
# 定义直接拟合的学习率，可读性强
def lossfunc_details(outputs,labels):
# 判断维度
assert ( outputs.shape == labels.shape),"outputs shape[%s] not equal labels shape[%s]"%(outputs.shape,labels.shape)
b,w,h,c = outputs.shape #[b,1,1,5]
loss = 0
#遍历每个batch图片的每个cell的loss
for bi in range(b):
for wi in range(w):
for hi in range(h):
#import pdb
#pdb.set_trace()
# detect_vector=[confidence,x,y,w,h]
detect_vector = outputs[bi,wi,hi]
gt_dv = labels[bi,wi,hi]
conf_pred = detect_vector[0]
conf_gt = gt_dv[0]
x_pred = detect_vector[1]
x_gt = gt_dv[1]
y_pred = detect_vector[2]
y_gt = gt_dv[2]
w_pred = detect_vector[3]
w_gt = gt_dv[3]
h_pred = detect_vector[4]
h_gt = gt_dv[4]
loss_confidence = (conf_pred-conf_gt)**2
#loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred**0.5-w_gt**0.5)**2 + (h_pred**0.5-h_gt**0.5)**2
loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred-w_gt)**2 + (h_pred-h_gt)**2
loss_tmp = loss_confidence + 0.3*loss_geo
#print("loss[%s,%s] = %s,%s"%(wi,hi,loss_confidence.item(),loss_geo.item()))
loss += loss_tmp
return loss
# train
def train():
for epoch in range(epochs):
ts = time.time()
# 梯度清零
# 取图片
inputs = input_process(batch)
print('inputs.shape:',inputs.shape)
# 取标注
labels = target_process(batch)
print('labels:',labels)
#import pdb
#pdb.set_trace()
# 将图片输入模型中个，获得输出。
# 获取得到输出
outputs = yolov0_model(inputs)
print('outputs:',outputs)
#import pdb
#pdb.set_trace()
#loss = criterion(outputs, labels)
# 预测与真实标签计算loss
# 这里传入的labels是和outputs结构一样的：
loss = lossfunc_details(outputs,labels)
loss.backward()
optimizer.step()
#print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
if iter % 10 == 0:
#    print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
print("epoch{}, iter{}, loss: {}, lr: {}".format(epoch, iter, loss.data.item(),optimizer.state_dict()['param_groups'][0]['lr']))

#print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
#print("*"*30)
#val(epoch)
scheduler.step()
# inference
def val(epoch):
yolov0_model.eval()
total_ious = []
pixel_accs = []
inputs = input_process(batch)
target,label= target_process(batch)
output = yolov1_model(inputs)
output = output.data.cpu().numpy()
N, _, h, w = output.shape
pred = output.transpose(0, 2, 3, 1).reshape(-1, n_class).argmax(axis=1).reshape(N, h, w)
if __name__ == "__main__":
train()```

## v1版本的yolo:

#### 用于创建自定义数据集的加载PennFudanDataset_main.py

```import os
import numpy as np
import torch
from PIL import Image
class PennFudanDataset(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
# dataset[0]
def __getitem__(self, idx):
img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
# convert the PIL Image into a numpy array
# instances are encoded as different colors
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {
}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
# len(dataset)
def __len__(self):
return len(self.imgs)
import transforms as T
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)```

#### v1yolomodel.py

```#coding:utf-8
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math
class VGG(nn.Module):
def __init__(self):
super(VGG,self).__init__()
# the vgg's layers
#self.features = features
cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
layers= []
batch_norm = False
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2,stride = 2)]
else:
if batch_norm:
layers += [conv2d,nn.Batchnorm2d(v),nn.ReLU(inplace=True)]
else:
layers += [conv2d,nn.ReLU(inplace=True)]
in_channels = v
# use the vgg layers to get the feature
self.features = nn.Sequential(*layers)
# 全局池化
# 决策层：分类层
self.classifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,1000),
)
for m in self.modules():
if isinstance(m,nn.Conv2d):
nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.BatchNorm2d):
nn.init.constant_(m.weight,1)
nn.init.constant_(m.bias,1)
elif isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.constant_(m.bias,0)
def forward(self,x):
x = self.features(x)
x_fea = x
x = self.avgpool(x)
x_avg = x
x = x.view(x.size(0),-1)
x = self.classifier(x)
return x,x_fea,x_avg
def extractor(self,x):
x = self.features(x)
return x
class YOLOV1(nn.Module):
def __init__(self):
super(YOLOV1,self).__init__()
vgg = VGG()
self.extractor = vgg.extractor
# 决策层：检测层
self.detector = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(),
#nn.Linear(4096,1470),
nn.Linear(4096,245),
#nn.Linear(4096,5),
)
for m in self.modules():
if isinstance(m,nn.Conv2d):
nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.BatchNorm2d):
nn.init.constant_(m.weight,1)
nn.init.constant_(m.bias,1)
elif isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.constant_(m.bias,0)
def forward(self,x):
x = self.extractor(x)
#import pdb
#pdb.set_trace()
x = self.avgpool(x)
x = x.view(x.size(0),-1)
x = self.detector(x)
# detector: torch.Size([1, 245]) 这里的245是7*7*5所得
print('detector:',x.shape)
b,_ = x.shape
#x = x.view(b,7,7,30)
# 这里我们只有预测框，没有分类
x = x.view(b,7,7,5)

#x = x.view(b,1,1,5)
return x

if __name__ == '__main__':
vgg = VGG()
x  = torch.randn(1,3,512,512)
feature,x_fea,x_avg = vgg(x)
print(feature.shape)
print(x_fea.shape)
print(x_avg.shape)

yolov1 = YOLOV1()
feature = yolov1(x)
# feature_size b*7*7*30
# feature.shape: torch.Size([1, 7, 7, 5])
print('feature.shape:',feature.shape)
print(feature)```

#### v1yolotrain.py

```#coding:utf-8
from PennFudanDataset_main import *
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from v1yolomodel import *
import cv2
import numpy as np
import time
import sys
import os
## 数据处理
#服务器上的地址 /data/2020-722-YOLOV4-Practical-datasets/PenFudanPed
# dataset地址：/Users/zhaomignming/Documents/mmteacher/datasets
#datapath='/Users/zhaomignming/Documents/mmteacher/datasets/PennFudanPed'
datapath='/Users/zhaomingming/data_sets/PennFudanPed'
# datapath = 'zhaomingming'
dataset = PennFudanDataset(datapath, get_transform(train=False))
dataset_test = PennFudanDataset(datapath, get_transform(train=False))
indices = torch.randperm(len(dataset)).tolist()
#dataset = torch.utils.data.Subset(dataset, indices[:-50])
#dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
#import pdb
#pdb.set_trace()
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
dataset = torch.utils.data.Subset(dataset, [0])
dataset_test = torch.utils.data.Subset(dataset_test, indices[0:2])
def collate_fn(batch):
return tuple(zip(*batch))
# define training and validation data loaders
dataset, batch_size=1, shuffle=False, num_workers=1,
collate_fn=collate_fn)
dataset_test, batch_size=2, shuffle=False, num_workers=4,
collate_fn=collate_fn)
def input_process(batch):
#import pdb
#pdb.set_trace()
batch_size=len(batch[0])
input_batch= torch.zeros(batch_size,3,448,448)
for i in range(batch_size):
inputs_tmp = Variable(batch[0][i])
inputs_tmp1=cv2.resize(inputs_tmp.permute([1,2,0]).numpy(),(448,448))
inputs_tmp2=torch.tensor(inputs_tmp1).permute([2,0,1])
input_batch[i:i+1,:,:,:]= torch.unsqueeze(inputs_tmp2,0)
return input_batch
#batch[1][0]['boxes'][0]
def target_process(batch,grid_number=7):
#中心点落在哪个框中，这个框的执行度就是1。其余全为0。
# batch[1]表示label
# batch[0]表示image
batch_size=len(batch[0])
target_batch= torch.zeros(batch_size,grid_number,grid_number,5)
#import pdb
#pdb.set_trace()
for i in range(batch_size):
labels = batch[1]
batch_labels = labels[i]
#import pdb
#pdb.set_trace()
number_box = len(batch_labels['boxes'])
for wi in range(grid_number):
for hi in range(grid_number):
# 遍历每个标注的框
for bi in range(number_box):
bbox=batch_labels['boxes'][bi]
_,himg,wimg = batch[0][i].numpy().shape
# 框归一化一下
bbox = bbox/ torch.tensor([wimg,himg,wimg,himg])
#import pdb
#pdb.set_trace()
# 计算框的中心点
center_x= (bbox[0]+bbox[2])*0.5
center_y= (bbox[1]+bbox[3])*0.5
#print("[%s,%s,%s],[%s,%s,%s]"%(wi/grid_number,center_x,(wi+1)/grid_number,hi/grid_number,center_y,(hi+1)/grid_number))
# 判断中心点有没有落在当前cell中，这里是7*7*5,如果是7*7*10时,就需要判断一下是大框还是小框。
if center_x<=(wi+1)/grid_number and center_x>=wi/grid_number and center_y<=(hi+1)/grid_number and center_y>= hi/grid_number:
#pdb.set_trace()
cbbox =  torch.cat([torch.ones(1),bbox])
# 中心点落在grid内，
target_batch[i:i+1,wi:wi+1,hi:hi+1,:] = torch.unsqueeze(cbbox,0)
#else:
#cbbox =  torch.cat([torch.zeros(1),bbox])
#import pdb
#pdb.set_trace()
#rint(target_batch[i:i+1,wi:wi+1,hi:hi+1,:])
#target_batch[i:i+1,wi:wi+1,hi:hi+1,:] = torch.unsqueeze(cbbox,0)
return target_batch

num_classes = 2
n_class    = 2
batch_size = 6
epochs     = 500
lr         = 1e-3
momentum   = 0
w_decay    = 1e-5
step_size  = 50
gamma      = 0.5
# 定义模型
yolov1_model = YOLOV1()
import pdb
pdb.set_trace()
# 定义优化算法为sdg:随机梯度下降
optimizer = optim.SGD(yolov1_model.detector.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)
# 定义学习率变化策略
# 每30个epoch 学习率乘以0.5
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)  # decay LR by a factor of 0.5 every 30 epochs
# 矩阵形式写法，写法简单，但是可读性不强
def lossfunc(outputs,labels):
#import  pdb
#pdb.set_trace()
tmp = (outputs-labels)**2
# 定义直接拟合的学习率，可读性强
def lossfunc_details(outputs,labels):
# 判断维度
assert ( outputs.shape == labels.shape),"outputs shape[%s] not equal labels shape[%s]"%(outputs.shape,labels.shape)
#import pdb
#pdb.set_trace()
b,w,h,c = outputs.shape
loss = 0
#import pdb
#pdb.set_trace()
conf_loss_matrix = torch.zeros(b,w,h)
geo_loss_matrix = torch.zeros(b,w,h)
loss_matrix = torch.zeros(b,w,h)

for bi in range(b):
for wi in range(w):
for hi in range(h):
#import pdb
#pdb.set_trace()
# detect_vector=[confidence,x,y,w,h]
detect_vector = outputs[bi,wi,hi]
gt_dv = labels[bi,wi,hi]
conf_pred = detect_vector[0]
conf_gt = gt_dv[0]
x_pred = detect_vector[1]
x_gt = gt_dv[1]
y_pred = detect_vector[2]
y_gt = gt_dv[2]
w_pred = detect_vector[3]
w_gt = gt_dv[3]
h_pred = detect_vector[4]
h_gt = gt_dv[4]
loss_confidence = (conf_pred-conf_gt)**2
#loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred**0.5-w_gt**0.5)**2 + (h_pred**0.5-h_gt**0.5)**2

loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred-w_gt)**2 + (h_pred-h_gt)**2
loss_geo = conf_gt*loss_geo
loss_tmp = loss_confidence + 0.3*loss_geo
#print("loss[%s,%s] = %s,%s"%(wi,hi,loss_confidence.item(),loss_geo.item()))
loss += loss_tmp
conf_loss_matrix[bi,wi,hi]=loss_confidence
geo_loss_matrix[bi,wi,hi]=loss_geo
loss_matrix[bi,wi,hi]=loss_tmp
#打印出batch中每张片的位置loss,和置信度输出
print(geo_loss_matrix)
print(outputs[0,:,:,0]>0.5)
return loss,loss_matrix,geo_loss_matrix,conf_loss_matrix
# train
def train():
for epoch in range(epochs):
ts = time.time()
# 取图片
inputs = input_process(batch)
# 取标注
labels = target_process(batch)

# 获取得到输出
outputs = yolov1_model(inputs)
#import pdb
#pdb.set_trace()
#loss = criterion(outputs, labels)
loss,lm,glm,clm = lossfunc_details(outputs,labels)
loss.backward()
optimizer.step()
#print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
if iter % 10 == 0:
#    print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
print("epoch{}, iter{}, loss: {}, lr: {}".format(epoch, iter, loss.data.item(),optimizer.state_dict()['param_groups'][0]['lr']))

#print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
#print("*"*30)
#val(epoch)
scheduler.step()
# inference
def val(epoch):
yolov1_model.eval()
total_ious = []
pixel_accs = []