## DOTA数据集简介

DOTA数据集全称：Dataset for Object deTection in Aerial images

DOTA数据集v1.0共收录2806张4000 × 4000的图片，总共包含188282个目标。

DOTA数据集论文介绍：https://arxiv.org/pdf/1711.10398.pdf

DOTA数据集总共有3个版本

## DOTAV2.0

https://github.com/zstar1003/Dataset

## 图片分割

gap：两个子图的重合宽度
subsize：子图大小
num_process：线程数

```import os
import codecs
import numpy as np
import math
from dota_utils import GetFileFromThisRootDir
import cv2
import shapely.geometry as shgeo
import dota_utils as util
import copy
from multiprocessing import Pool
from functools import partial
import time
def choose_best_pointorder_fit_another(poly1, poly2):
"""
To make the two polygons best fit with each point
"""
x1 = poly1[0]
y1 = poly1[1]
x2 = poly1[2]
y2 = poly1[3]
x3 = poly1[4]
y3 = poly1[5]
x4 = poly1[6]
y4 = poly1[7]
combinate = [np.array([x1, y1, x2, y2, x3, y3, x4, y4]), np.array([x2, y2, x3, y3, x4, y4, x1, y1]),
np.array([x3, y3, x4, y4, x1, y1, x2, y2]), np.array([x4, y4, x1, y1, x2, y2, x3, y3])]
dst_coordinate = np.array(poly2)
distances = np.array([np.sum((coord - dst_coordinate) ** 2) for coord in combinate])
sorted = distances.argsort()
return combinate[sorted[0]]
def cal_line_length(point1, point2):
return math.sqrt(math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
def split_single_warp(name, split_base, rate, extent):
split_base.SplitSingle(name, rate, extent)
class splitbase():
def __init__(self,
basepath,
outpath,
code='utf-8',
gap=512,
subsize=1024,
thresh=0.7,
choosebestpoint=True,
ext='.png',
num_process=8
):
"""
:param basepath: base path for dota data
:param outpath: output base path for dota data,
the basepath and outputpath have the similar subdirectory, 'images' and 'labelTxt'
:param code: encodeing format of txt file
:param gap: overlap between two patches
:param subsize: subsize of patch
:param thresh: the thresh determine whether to keep the instance if the instance is cut down in the process of split
:param choosebestpoint: used to choose the first point for the
:param ext: ext for the image format
:param padding: if to padding the images so that all the images have the same size
"""
self.basepath = basepath
self.outpath = outpath
self.code = code
self.gap = gap
self.subsize = subsize
self.slide = self.subsize - self.gap
self.thresh = thresh
self.imagepath = os.path.join(self.basepath, 'images')
self.labelpath = os.path.join(self.basepath, 'labelTxt')
self.outimagepath = os.path.join(self.outpath, 'images')
self.outlabelpath = os.path.join(self.outpath, 'labelTxt')
self.choosebestpoint = choosebestpoint
self.ext = ext
self.num_process = num_process
self.pool = Pool(num_process)
# pdb.set_trace()
if not os.path.isdir(self.outpath):
os.mkdir(self.outpath)
if not os.path.isdir(self.outimagepath):
# pdb.set_trace()
os.mkdir(self.outimagepath)
if not os.path.isdir(self.outlabelpath):
os.mkdir(self.outlabelpath)
# pdb.set_trace()
## point: (x, y), rec: (xmin, ymin, xmax, ymax)
# def __del__(self):
#     self.f_sub.close()
## grid --> (x, y) position of grids
def polyorig2sub(self, left, up, poly):
polyInsub = np.zeros(len(poly))
for i in range(int(len(poly) / 2)):
polyInsub[i * 2] = int(poly[i * 2] - left)
polyInsub[i * 2 + 1] = int(poly[i * 2 + 1] - up)
return polyInsub
def calchalf_iou(self, poly1, poly2):
"""
It is not the iou on usual, the iou is the value of intersection over poly1
"""
inter_poly = poly1.intersection(poly2)
inter_area = inter_poly.area
poly1_area = poly1.area
half_iou = inter_area / poly1_area
return inter_poly, half_iou
def saveimagepatches(self, img, subimgname, left, up):
subimg = copy.deepcopy(img[up: (up + self.subsize), left: (left + self.subsize)])
outdir = os.path.join(self.outimagepath, subimgname + self.ext)
h, w, c = np.shape(subimg)
outimg = np.zeros((self.subsize, self.subsize, 3))
outimg[0:h, 0:w, :] = subimg
cv2.imwrite(outdir, outimg)
else:
cv2.imwrite(outdir, subimg)
def GetPoly4FromPoly5(self, poly):
distances = [cal_line_length((poly[i * 2], poly[i * 2 + 1]), (poly[(i + 1) * 2], poly[(i + 1) * 2 + 1])) for i
in range(int(len(poly) / 2 - 1))]
distances.append(cal_line_length((poly[0], poly[1]), (poly[8], poly[9])))
pos = np.array(distances).argsort()[0]
count = 0
outpoly = []
while count < 5:
# print('count:', count)
if (count == pos):
outpoly.append((poly[count * 2] + poly[(count * 2 + 2) % 10]) / 2)
outpoly.append((poly[(count * 2 + 1) % 10] + poly[(count * 2 + 3) % 10]) / 2)
count = count + 1
elif (count == (pos + 1) % 5):
count = count + 1
continue
else:
outpoly.append(poly[count * 2])
outpoly.append(poly[count * 2 + 1])
count = count + 1
return outpoly
def savepatches(self, resizeimg, objects, subimgname, left, up, right, down):
outdir = os.path.join(self.outlabelpath, subimgname + '.txt')
imgpoly = shgeo.Polygon([(left, up), (right, up), (right, down),
(left, down)])
with codecs.open(outdir, 'w', self.code) as f_out:
for obj in objects:
gtpoly = shgeo.Polygon([(obj['poly'][0], obj['poly'][1]),
(obj['poly'][2], obj['poly'][3]),
(obj['poly'][4], obj['poly'][5]),
(obj['poly'][6], obj['poly'][7])])
if (gtpoly.area <= 0):
continue
inter_poly, half_iou = self.calchalf_iou(gtpoly, imgpoly)
# print('writing...')
if (half_iou == 1):
polyInsub = self.polyorig2sub(left, up, obj['poly'])
outline = ' '.join(list(map(str, polyInsub)))
outline = outline + ' ' + obj['name'] + ' ' + str(obj['difficult'])
f_out.write(outline + '
')
elif (half_iou > 0):
# elif (half_iou > self.thresh):
##  print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
inter_poly = shgeo.polygon.orient(inter_poly, sign=1)
out_poly = list(inter_poly.exterior.coords)[0: -1]
if len(out_poly) < 4:
continue
out_poly2 = []
for i in range(len(out_poly)):
out_poly2.append(out_poly[i][0])
out_poly2.append(out_poly[i][1])
if (len(out_poly) == 5):
# print('==========================')
out_poly2 = self.GetPoly4FromPoly5(out_poly2)
elif (len(out_poly) > 5):
"""
if the cut instance is a polygon with points more than 5, we do not handle it currently
"""
continue
if (self.choosebestpoint):
out_poly2 = choose_best_pointorder_fit_another(out_poly2, obj['poly'])
polyInsub = self.polyorig2sub(left, up, out_poly2)
for index, item in enumerate(polyInsub):
if (item <= 1):
polyInsub[index] = 1
elif (item >= self.subsize):
polyInsub[index] = self.subsize
outline = ' '.join(list(map(str, polyInsub)))
if (half_iou > self.thresh):
outline = outline + ' ' + obj['name'] + ' ' + str(obj['difficult'])
else:
## if the left part is too small, label as '2'
outline = outline + ' ' + obj['name'] + ' ' + '2'
f_out.write(outline + '
')
# else:
self.saveimagepatches(resizeimg, subimgname, left, up)
def SplitSingle(self, name, rate, extent):
"""
split a single image and ground truth
:param name: image name
:param rate: the resize scale for the image
:param extent: the image format
:return:
"""
img = cv2.imread(os.path.join(self.imagepath, name + extent))
if np.shape(img) == ():
return
fullname = os.path.join(self.labelpath, name + '.txt')
objects = util.parse_dota_poly2(fullname)
for obj in objects:
obj['poly'] = list(map(lambda x: rate * x, obj['poly']))
# obj['poly'] = list(map(lambda x: ([2 * y for y in x]), obj['poly']))
if (rate != 1):
resizeimg = cv2.resize(img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC)
else:
resizeimg = img
outbasename = name + '__' + str(rate) + '__'
weight = np.shape(resizeimg)[1]
height = np.shape(resizeimg)[0]
left, up = 0, 0
while (left < weight):
if (left + self.subsize >= weight):
left = max(weight - self.subsize, 0)
up = 0
while (up < height):
if (up + self.subsize >= height):
up = max(height - self.subsize, 0)
right = min(left + self.subsize, weight - 1)
down = min(up + self.subsize, height - 1)
subimgname = outbasename + str(left) + '___' + str(up)
# self.f_sub.write(name + ' ' + subimgname + ' ' + str(left) + ' ' + str(up) + '
')
self.savepatches(resizeimg, objects, subimgname, left, up, right, down)
if (up + self.subsize >= height):
break
else:
up = up + self.slide
if (left + self.subsize >= weight):
break
else:
left = left + self.slide
def splitdata(self, rate):
"""
:param rate: resize rate before cut
"""
imagelist = GetFileFromThisRootDir(self.imagepath)
imagenames = [util.custombasename(x) for x in imagelist if (util.custombasename(x) != 'Thumbs')]
if self.num_process == 1:
for name in imagenames:
self.SplitSingle(name, rate, self.ext)
else:
# worker = partial(self.SplitSingle, rate=rate, extent=self.ext)
worker = partial(split_single_warp, split_base=self, rate=rate, extent=self.ext)
self.pool.map(worker, imagenames)
def __getstate__(self):
self_dict = self.__dict__.copy()
del self_dict['pool']
return self_dict
def __setstate__(self, state):
self.__dict__.update(state)
if __name__ == '__main__':
split = splitbase('D:/Dataset/DOTA-v2.0/train',
'D:/Dataset/DOTA-v2.0/trainsplit',
gap=200,
subsize=1024,
num_process=8
)
split.splitdata(1)
split = splitbase('D:/Dataset/DOTA-v2.0/val',
'D:/Dataset/DOTA-v2.0/valsplit',
gap=200,
subsize=1024,
num_process=8
)
split.splitdata(1)```

## 标签转换

DOTA数据集的标签并不符合YOLO的要求，需要进行转换，如下图所示，需要将左侧的原始标签转换成右侧的YOLO格式。

YOLO_Transform.py

```import dota_utils as util
import os
import numpy as np
from PIL import Image
from PIL import ImageFile
Image.MAX_IMAGE_PIXELS = None
## trans dota format to format YOLO(darknet) required
def dota2darknet(imgpath, txtpath, dstpath, extractclassname):
"""
:param imgpath: the path of images
:param txtpath: the path of txt in dota format
:param dstpath: the path of txt in YOLO format
:param extractclassname: the category you selected
:return:
"""
filelist = util.GetFileFromThisRootDir(txtpath)
for fullname in filelist:
objects = util.parse_dota_poly(fullname)
name = os.path.splitext(os.path.basename(fullname))[0]
img_fullname = os.path.join(imgpath, name + '.png')
img = Image.open(img_fullname)
img_w, img_h = img.size
# print img_w,img_h
with open(os.path.join(dstpath, name + '.txt'), 'w') as f_out:
for obj in objects:
poly = obj['poly']
bbox = np.array(util.dots4ToRecC(poly, img_w, img_h))
if (sum(bbox <= 0) + sum(bbox >= 1)) >= 1:
continue
if (obj['name'] in extractclassname):
id = extractclassname.index(obj['name'])
else:
continue
outline = str(id) + ' ' + ' '.join(list(map(str, bbox)))
f_out.write(outline + '
')
if __name__ == '__main__':
util.wordname_18)
util.wordname_18)```

dota_utils.py

```import sys
import codecs
import numpy as np
import shapely.geometry as shgeo
import os
import re
import math
"""
some basic functions which are useful for process DOTA data
"""
wordname_18 = [
'airport',
'small-vehicle',
'large-vehicle',
'plane',
'storage-tank',
'ship',
'harbor',
'ground-track-field',
'soccer-ball-field',
'tennis-court',
'swimming-pool',
'baseball-diamond',
'bridge',
'helicopter',
'container-crane',
def custombasename(fullname):
return os.path.basename(os.path.splitext(fullname)[0])
def GetFileFromThisRootDir(dir,ext = None):
allfiles = []
needExtFilter = (ext != None)
for root,dirs,files in os.walk(dir):
for filespath in files:
filepath = os.path.join(root, filespath)
extension = os.path.splitext(filepath)[1][1:]
if needExtFilter and extension in ext:
allfiles.append(filepath)
elif not needExtFilter:
allfiles.append(filepath)
return allfiles
def TuplePoly2Poly(poly):
outpoly = [poly[0][0], poly[0][1],
poly[1][0], poly[1][1],
poly[2][0], poly[2][1],
poly[3][0], poly[3][1]
]
return outpoly
def parse_dota_poly(filename):
"""
parse the dota ground truth in the format:
[(x1, y1), (x2, y2), (x3, y3), (x4, y4)]
"""
objects = []
#print('filename:', filename)
f = []
if (sys.version_info >= (3, 5)):
fd = open(filename, 'r')
f = fd
elif (sys.version_info >= 2.7):
fd = codecs.open(filename, 'r')
f = fd
# count = 0
while True:
# count = count + 1
# if count < 2:
#     continue
if line:
splitlines = line.strip().split(' ')
object_struct = {
}
### clear the wrong name after check all the data
#if (len(splitlines) >= 9) and (splitlines[8] in classname):
if (len(splitlines) < 9):
continue
if (len(splitlines) >= 9):
object_struct['name'] = splitlines[8]
if (len(splitlines) == 9):
object_struct['difficult'] = '0'
elif (len(splitlines) >= 10):
# if splitlines[9] == '1':
# if (splitlines[9] == 'tr'):
#     object_struct['difficult'] = '1'
# else:
object_struct['difficult'] = splitlines[9]
# else:
#     object_struct['difficult'] = 0
object_struct['poly'] = [(float(splitlines[0]), float(splitlines[1])),
(float(splitlines[2]), float(splitlines[3])),
(float(splitlines[4]), float(splitlines[5])),
(float(splitlines[6]), float(splitlines[7]))
]
gtpoly = shgeo.Polygon(object_struct['poly'])
object_struct['area'] = gtpoly.area
# poly = list(map(lambda x:np.array(x), object_struct['poly']))
# object_struct['long-axis'] = max(distance(poly[0], poly[1]), distance(poly[1], poly[2]))
# object_struct['short-axis'] = min(distance(poly[0], poly[1]), distance(poly[1], poly[2]))
# if (object_struct['long-axis'] < 15):
#     object_struct['difficult'] = '1'
#     global small_count
#     small_count = small_count + 1
objects.append(object_struct)
else:
break
return objects
def dots4ToRecC(poly, img_w, img_h):
xmin, ymin, xmax, ymax = dots4ToRec4(poly)
x = (xmin + xmax)/2
y = (ymin + ymax)/2
w = xmax - xmin
h = ymax - ymin
return x/img_w, y/img_h, w/img_w, h/img_h
def parse_dota_poly2(filename):
"""
parse the dota ground truth in the format:
[x1, y1, x2, y2, x3, y3, x4, y4]
"""
objects = parse_dota_poly(filename)
for obj in objects:
obj['poly'] = TuplePoly2Poly(obj['poly'])
obj['poly'] = list(map(int, obj['poly']))
return objects
def parse_dota_rec(filename):
"""
parse the dota ground truth in the bounding box format:
"xmin, ymin, xmax, ymax"
"""
objects = parse_dota_poly(filename)
for obj in objects:
poly = obj['poly']
bbox = dots4ToRec4(poly)
obj['bndbox'] = bbox
return objects
## bounding box transfer for varies format
def dots4ToRec4(poly):
xmin, xmax, ymin, ymax = min(poly[0][0], min(poly[1][0], min(poly[2][0], poly[3][0]))), \
max(poly[0][0], max(poly[1][0], max(poly[2][0], poly[3][0]))), \
min(poly[0][1], min(poly[1][1], min(poly[2][1], poly[3][1]))), \
max(poly[0][1], max(poly[1][1], max(poly[2][1], poly[3][1])))
return xmin, ymin, xmax, ymax
def dots4ToRec8(poly):
xmin, ymin, xmax, ymax = dots4ToRec4(poly)
return xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax
#return dots2ToRec8(dots4ToRec4(poly))
def dots2ToRec8(rec):
xmin, ymin, xmax, ymax = rec[0], rec[1], rec[2], rec[3]
return xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax
filelist = GetFileFromThisRootDir(srcpath)
# names = [custombasename(x.strip())for x in filelist]
filedict = {
}
for cls in wordname_15:
fd = open(os.path.join(dstpath, 'Task1_') + cls + r'.txt', 'w')
filedict[cls] = fd
for filepath in filelist:
objects = parse_dota_poly2(filepath)
subname = custombasename(filepath)
pattern2 = re.compile(r'__([\d+\.]+)__\d+___')
rate = re.findall(pattern2, subname)[0]
for obj in objects:
category = obj['name']
difficult = obj['difficult']
poly = obj['poly']
if difficult == '2':
continue
if rate == '0.5':
outline = custombasename(filepath) + ' ' + '1' + ' ' + ' '.join(map(str, poly))
elif rate == '1':
outline = custombasename(filepath) + ' ' + '0.8' + ' ' + ' '.join(map(str, poly))
elif rate == '2':
outline = custombasename(filepath) + ' ' + '0.6' + ' ' + ' '.join(map(str, poly))
filedict[category].write(outline + '
')
thresh = 0.1
filedict = {
}
# idname = datamap_inverse[idname]
for line in lines:
if len(line) == 0:
continue
# print('line:', line)
splitline = line.strip().split(' ')
filename = splitline[0]
confidence = splitline[1]
bbox = splitline[2:]
if float(confidence) > thresh:
if filename not in filedict:
# filedict[filename] = codecs.open(os.path.join(dstpath, filename + '.txt'), 'w', 'utf_16')
filedict[filename] = codecs.open(os.path.join(dstpath, filename + '.txt'), 'w')
# poly = util.dots2ToRec8(bbox)
poly = bbox
#               filedict[filename].write(' '.join(poly) + ' ' + idname + '_' + str(round(float(confidence), 2)) + '
')
# print('idname:', idname)
# filedict[filename].write(' '.join(poly) + ' ' + idname + '_' + str(round(float(confidence), 2)) + '
')
filedict[filename].write(' '.join(poly) + ' ' + idname + '
')
def polygonToRotRectangle(bbox):
"""
:param bbox: The polygon stored in format [x1, y1, x2, y2, x3, y3, x4, y4]
:return: Rotated Rectangle in format [cx, cy, w, h, theta]
"""
bbox = np.array(bbox,dtype=np.float32)
bbox = np.reshape(bbox,newshape=(2,4),order='F')
angle = math.atan2(-(bbox[0,1]-bbox[0,0]),bbox[1,1]-bbox[1,0])
center = [[0],[0]]
for i in range(4):
center[0] += bbox[0,i]
center[1] += bbox[1,i]
center = np.array(center,dtype=np.float32)/4.0
R = np.array([[math.cos(angle), -math.sin(angle)], [math.sin(angle), math.cos(angle)]], dtype=np.float32)
normalized = np.matmul(R.transpose(),bbox-center)
xmin = np.min(normalized[0,:])
xmax = np.max(normalized[0,:])
ymin = np.min(normalized[1,:])
ymax = np.max(normalized[1,:])
w = xmax - xmin + 1
h = ymax - ymin + 1
return [float(center[0]),float(center[1]),w,h,angle]```

## 图片格式转换

imagetrans.py

```import dota_utils as util
import cv2
import os
# this code is used to convert image formats
# from PNG to JPG
def imageformatTrans(srcpath, dstpath, format):
filelist = util.GetFileFromThisRootDir(srcpath)
for fullname in filelist:
basename = util.custombasename(fullname)
dstname = os.path.join(dstpath, basename + format)
cv2.imwrite(dstname, img)
if __name__ == '__main__':
# an example
imageformatTrans('path1', 'path2',
'.jpg')```

## 训练结果

YOLOv5l(未分割)13.5%5.52%
YOLOv5l(分割之后)33.5%18.6%