YOLOv2模型结构
YOLO的输入输出很复杂,Loss函数更加复杂,但是它的网络结构比较简单,只是Convolusion + Batch Normalization + Leaky Relu的重复叠加。
下面是 experiencor/keras-yolo2
定义的网络模型。
from keras.models import Sequential, Model from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda from keras.layers.advanced_activations import LeakyReLU from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard from keras.optimizers import SGD, Adam, RMSprop from keras.layers.merge import concatenate import keras.backend as K import tensorflow as tf # the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K) def space_to_depth_x2(x): return tf.space_to_depth(x, block_size=2) def ConvBatchLReLu(x,filters,kernel_size,index,trainable): # when strides = None, strides = pool_size. x = Conv2D(filters, kernel_size, strides=(1,1), padding='same', name='conv_{}'.format(index), use_bias=False, trainable=trainable)(x) x = BatchNormalization(name='norm_{}'.format(index), trainable=trainable)(x) x = LeakyReLU(alpha=0.1)(x) return(x) def ConvBatchLReLu_loop(x,index,convstack,trainable): for para in convstack: x = ConvBatchLReLu(x,para["filters"],para["kernel_size"],index,trainable) index += 1 return(x) def define_YOLOv2(IMAGE_H,IMAGE_W,GRID_H,GRID_W,TRUE_BOX_BUFFER,BOX,CLASS, trainable=False): convstack3to5 = [{"filters":128, "kernel_size":(3,3)}, # 3 {"filters":64, "kernel_size":(1,1)}, # 4 {"filters":128, "kernel_size":(3,3)}] # 5 convstack6to8 = [{"filters":256, "kernel_size":(3,3)}, # 6 {"filters":128, "kernel_size":(1,1)}, # 7 {"filters":256, "kernel_size":(3,3)}] # 8 convstack9to13 = [{"filters":512, "kernel_size":(3,3)}, # 9 {"filters":256, "kernel_size":(1,1)}, # 10 {"filters":512, "kernel_size":(3,3)}, # 11 {"filters":256, "kernel_size":(1,1)}, # 12 {"filters":512, "kernel_size":(3,3)}] # 13 convstack14to20 = [{"filters":1024, "kernel_size":(3,3)}, # 14 {"filters":512, "kernel_size":(1,1)}, # 15 {"filters":1024, "kernel_size":(3,3)}, # 16 {"filters":512, "kernel_size":(1,1)}, # 17 {"filters":1024, "kernel_size":(3,3)}, # 18 {"filters":1024, "kernel_size":(3,3)}, # 19 {"filters":1024, "kernel_size":(3,3)}] # 20 input_image = Input(shape=(IMAGE_H, IMAGE_W, 3),name="input_image") true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4),name="input_hack") # Layer 1 x = ConvBatchLReLu(input_image,filters=32,kernel_size=(3,3),index=1,trainable=trainable) x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_416to208")(x) # Layer 2 x = ConvBatchLReLu(x,filters=64,kernel_size=(3,3),index=2,trainable=trainable) x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_208to104")(x) # Layer 3 - 5 x = ConvBatchLReLu_loop(x,3,convstack3to5,trainable) x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_104to52")(x) # Layer 6 - 8 x = ConvBatchLReLu_loop(x,6,convstack6to8,trainable) x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_52to26")(x) # Layer 9 - 13 x = ConvBatchLReLu_loop(x,9,convstack9to13,trainable) skip_connection = x x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_26to13")(x) # Layer 14 - 20 x = ConvBatchLReLu_loop(x,14,convstack14to20,trainable) # Layer 21 skip_connection = ConvBatchLReLu(skip_connection,filters=64, kernel_size=(1,1),index=21,trainable=trainable) skip_connection = Lambda(space_to_depth_x2)(skip_connection) x = concatenate([skip_connection, x]) # Layer 22 x = ConvBatchLReLu(x,filters=1024,kernel_size=(3,3),index=22,trainable=trainable) # Layer 23 x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x) output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS),name="final_output")(x) # small hack to allow true_boxes to be registered when Keras build the model # for more information: https://github.com/fchollet/keras/issues/2790 output = Lambda(lambda args: args[0],name="hack_layer")([output, true_boxes]) model = Model([input_image, true_boxes], output) return(model, true_boxes) IMAGE_H, IMAGE_W = 416, 416 GRID_H, GRID_W = 13 , 13 TRUE_BOX_BUFFER = 50 BOX = int(len(ANCHORS)/2) CLASS = len(LABELS) ## true_boxes is the tensor that takes "b_batch" model, true_boxes = define_YOLOv2(IMAGE_H,IMAGE_W,GRID_H,GRID_W,TRUE_BOX_BUFFER,BOX,CLASS, trainable=False) model.summary()
__________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_image (InputLayer) (None, 416, 416, 3) 0 __________________________________________________________________________________________________ conv_1 (Conv2D) (None, 416, 416, 32) 864 input_image[0][0] __________________________________________________________________________________________________ norm_1 (BatchNormalization) (None, 416, 416, 32) 128 conv_1[0][0] __________________________________________________________________________________________________ leaky_re_lu_1 (LeakyReLU) (None, 416, 416, 32) 0 norm_1[0][0] __________________________________________________________________________________________________ maxpool1_416to208 (MaxPooling2D (None, 208, 208, 32) 0 leaky_re_lu_1[0][0] __________________________________________________________________________________________________ conv_2 (Conv2D) (None, 208, 208, 64) 18432 maxpool1_416to208[0][0] __________________________________________________________________________________________________ norm_2 (BatchNormalization) (None, 208, 208, 64) 256 conv_2[0][0] __________________________________________________________________________________________________ leaky_re_lu_2 (LeakyReLU) (None, 208, 208, 64) 0 norm_2[0][0] __________________________________________________________________________________________________ maxpool1_208to104 (MaxPooling2D (None, 104, 104, 64) 0 leaky_re_lu_2[0][0] __________________________________________________________________________________________________ conv_3 (Conv2D) (None, 104, 104, 128 73728 maxpool1_208to104[0][0] __________________________________________________________________________________________________ norm_3 (BatchNormalization) (None, 104, 104, 128 512 conv_3[0][0] __________________________________________________________________________________________________ leaky_re_lu_3 (LeakyReLU) (None, 104, 104, 128 0 norm_3[0][0] __________________________________________________________________________________________________ conv_4 (Conv2D) (None, 104, 104, 64) 8192 leaky_re_lu_3[0][0] __________________________________________________________________________________________________ norm_4 (BatchNormalization) (None, 104, 104, 64) 256 conv_4[0][0] __________________________________________________________________________________________________ leaky_re_lu_4 (LeakyReLU) (None, 104, 104, 64) 0 norm_4[0][0] __________________________________________________________________________________________________ conv_5 (Conv2D) (None, 104, 104, 128 73728 leaky_re_lu_4[0][0] __________________________________________________________________________________________________ norm_5 (BatchNormalization) (None, 104, 104, 128 512 conv_5[0][0] __________________________________________________________________________________________________ leaky_re_lu_5 (LeakyReLU) (None, 104, 104, 128 0 norm_5[0][0] __________________________________________________________________________________________________ maxpool1_104to52 (MaxPooling2D) (None, 52, 52, 128) 0 leaky_re_lu_5[0][0] __________________________________________________________________________________________________ conv_6 (Conv2D) (None, 52, 52, 256) 294912 maxpool1_104to52[0][0] __________________________________________________________________________________________________ norm_6 (BatchNormalization) (None, 52, 52, 256) 1024 conv_6[0][0] __________________________________________________________________________________________________ leaky_re_lu_6 (LeakyReLU) (None, 52, 52, 256) 0 norm_6[0][0] __________________________________________________________________________________________________ conv_7 (Conv2D) (None, 52, 52, 128) 32768 leaky_re_lu_6[0][0] __________________________________________________________________________________________________ norm_7 (BatchNormalization) (None, 52, 52, 128) 512 conv_7[0][0] __________________________________________________________________________________________________ leaky_re_lu_7 (LeakyReLU) (None, 52, 52, 128) 0 norm_7[0][0] __________________________________________________________________________________________________ conv_8 (Conv2D) (None, 52, 52, 256) 294912 leaky_re_lu_7[0][0] __________________________________________________________________________________________________ norm_8 (BatchNormalization) (None, 52, 52, 256) 1024 conv_8[0][0] __________________________________________________________________________________________________ leaky_re_lu_8 (LeakyReLU) (None, 52, 52, 256) 0 norm_8[0][0] __________________________________________________________________________________________________ maxpool1_52to26 (MaxPooling2D) (None, 26, 26, 256) 0 leaky_re_lu_8[0][0] __________________________________________________________________________________________________ conv_9 (Conv2D) (None, 26, 26, 512) 1179648 maxpool1_52to26[0][0] __________________________________________________________________________________________________ norm_9 (BatchNormalization) (None, 26, 26, 512) 2048 conv_9[0][0] __________________________________________________________________________________________________ leaky_re_lu_9 (LeakyReLU) (None, 26, 26, 512) 0 norm_9[0][0] __________________________________________________________________________________________________ conv_10 (Conv2D) (None, 26, 26, 256) 131072 leaky_re_lu_9[0][0] __________________________________________________________________________________________________ norm_10 (BatchNormalization) (None, 26, 26, 256) 1024 conv_10[0][0] __________________________________________________________________________________________________ leaky_re_lu_10 (LeakyReLU) (None, 26, 26, 256) 0 norm_10[0][0] __________________________________________________________________________________________________ conv_11 (Conv2D) (None, 26, 26, 512) 1179648 leaky_re_lu_10[0][0] __________________________________________________________________________________________________ norm_11 (BatchNormalization) (None, 26, 26, 512) 2048 conv_11[0][0] __________________________________________________________________________________________________ leaky_re_lu_11 (LeakyReLU) (None, 26, 26, 512) 0 norm_11[0][0] __________________________________________________________________________________________________ conv_12 (Conv2D) (None, 26, 26, 256) 131072 leaky_re_lu_11[0][0] __________________________________________________________________________________________________ norm_12 (BatchNormalization) (None, 26, 26, 256) 1024 conv_12[0][0] __________________________________________________________________________________________________ leaky_re_lu_12 (LeakyReLU) (None, 26, 26, 256) 0 norm_12[0][0] __________________________________________________________________________________________________ conv_13 (Conv2D) (None, 26, 26, 512) 1179648 leaky_re_lu_12[0][0] __________________________________________________________________________________________________ norm_13 (BatchNormalization) (None, 26, 26, 512) 2048 conv_13[0][0] __________________________________________________________________________________________________ leaky_re_lu_13 (LeakyReLU) (None, 26, 26, 512) 0 norm_13[0][0] __________________________________________________________________________________________________ maxpool1_26to13 (MaxPooling2D) (None, 13, 13, 512) 0 leaky_re_lu_13[0][0] __________________________________________________________________________________________________ conv_14 (Conv2D) (None, 13, 13, 1024) 4718592 maxpool1_26to13[0][0] __________________________________________________________________________________________________ norm_14 (BatchNormalization) (None, 13, 13, 1024) 4096 conv_14[0][0] __________________________________________________________________________________________________ leaky_re_lu_14 (LeakyReLU) (None, 13, 13, 1024) 0 norm_14[0][0] __________________________________________________________________________________________________ conv_15 (Conv2D) (None, 13, 13, 512) 524288 leaky_re_lu_14[0][0] __________________________________________________________________________________________________ norm_15 (BatchNormalization) (None, 13, 13, 512) 2048 conv_15[0][0] __________________________________________________________________________________________________ leaky_re_lu_15 (LeakyReLU) (None, 13, 13, 512) 0 norm_15[0][0] __________________________________________________________________________________________________ conv_16 (Conv2D) (None, 13, 13, 1024) 4718592 leaky_re_lu_15[0][0] __________________________________________________________________________________________________ norm_16 (BatchNormalization) (None, 13, 13, 1024) 4096 conv_16[0][0] __________________________________________________________________________________________________ leaky_re_lu_16 (LeakyReLU) (None, 13, 13, 1024) 0 norm_16[0][0] __________________________________________________________________________________________________ conv_17 (Conv2D) (None, 13, 13, 512) 524288 leaky_re_lu_16[0][0] __________________________________________________________________________________________________ norm_17 (BatchNormalization) (None, 13, 13, 512) 2048 conv_17[0][0] __________________________________________________________________________________________________ leaky_re_lu_17 (LeakyReLU) (None, 13, 13, 512) 0 norm_17[0][0] __________________________________________________________________________________________________ conv_18 (Conv2D) (None, 13, 13, 1024) 4718592 leaky_re_lu_17[0][0] __________________________________________________________________________________________________ norm_18 (BatchNormalization) (None, 13, 13, 1024) 4096 conv_18[0][0] __________________________________________________________________________________________________ leaky_re_lu_18 (LeakyReLU) (None, 13, 13, 1024) 0 norm_18[0][0] __________________________________________________________________________________________________ conv_19 (Conv2D) (None, 13, 13, 1024) 9437184 leaky_re_lu_18[0][0] __________________________________________________________________________________________________ norm_19 (BatchNormalization) (None, 13, 13, 1024) 4096 conv_19[0][0] __________________________________________________________________________________________________ conv_21 (Conv2D) (None, 26, 26, 64) 32768 leaky_re_lu_13[0][0] __________________________________________________________________________________________________ leaky_re_lu_19 (LeakyReLU) (None, 13, 13, 1024) 0 norm_19[0][0] __________________________________________________________________________________________________ norm_21 (BatchNormalization) (None, 26, 26, 64) 256 conv_21[0][0] __________________________________________________________________________________________________ conv_20 (Conv2D) (None, 13, 13, 1024) 9437184 leaky_re_lu_19[0][0] __________________________________________________________________________________________________ leaky_re_lu_21 (LeakyReLU) (None, 26, 26, 64) 0 norm_21[0][0] __________________________________________________________________________________________________ norm_20 (BatchNormalization) (None, 13, 13, 1024) 4096 conv_20[0][0] __________________________________________________________________________________________________ lambda_1 (Lambda) (None, 13, 13, 256) 0 leaky_re_lu_21[0][0] __________________________________________________________________________________________________ leaky_re_lu_20 (LeakyReLU) (None, 13, 13, 1024) 0 norm_20[0][0] __________________________________________________________________________________________________ concatenate_1 (Concatenate) (None, 13, 13, 1280) 0 lambda_1[0][0] leaky_re_lu_20[0][0] __________________________________________________________________________________________________ conv_22 (Conv2D) (None, 13, 13, 1024) 11796480 concatenate_1[0][0] __________________________________________________________________________________________________ norm_22 (BatchNormalization) (None, 13, 13, 1024) 4096 conv_22[0][0] __________________________________________________________________________________________________ leaky_re_lu_22 (LeakyReLU) (None, 13, 13, 1024) 0 norm_22[0][0] __________________________________________________________________________________________________ conv_23 (Conv2D) (None, 13, 13, 100) 102500 leaky_re_lu_22[0][0] __________________________________________________________________________________________________ final_output (Reshape) (None, 13, 13, 4, 25 0 conv_23[0][0] __________________________________________________________________________________________________ input_hack (InputLayer) (None, 1, 1, 1, 50, 0 __________________________________________________________________________________________________ hack_layer (Lambda) (None, 13, 13, 4, 25 0 final_output[0][0] input_hack[0][0] ================================================================================================== Total params: 50,650,436 Trainable params: 102,500 Non-trainable params: 50,547,936 _________________________________________________________________________________
参考链接
https://fairyonice.github.io/Part_3_Object_Detection_with_Yolo_using_VOC_2012_data_model.html
除非注明,否则均为[半杯茶的小酒杯]原创文章,转载必须以链接形式标明本文链接
Be First to Comment