Press "Enter" to skip to content

YOLOv2 on Pascal VOC2012 – 网络模型(Model)

YOLOv2模型结构

 

YOLO的输入输出很复杂,Loss函数更加复杂,但是它的网络结构比较简单,只是Convolusion + Batch Normalization + Leaky Relu的重复叠加。

 

下面是 experiencor/keras-yolo2
定义的网络模型。

 

from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers.merge import concatenate
import keras.backend as K
import tensorflow as tf
# the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)
def space_to_depth_x2(x):
    return tf.space_to_depth(x, block_size=2)
def ConvBatchLReLu(x,filters,kernel_size,index,trainable):
    # when strides = None, strides = pool_size.
    x = Conv2D(filters, kernel_size, strides=(1,1), 
               padding='same', name='conv_{}'.format(index), 
               use_bias=False, trainable=trainable)(x)
    x = BatchNormalization(name='norm_{}'.format(index), trainable=trainable)(x)
    x = LeakyReLU(alpha=0.1)(x)
    return(x)
def ConvBatchLReLu_loop(x,index,convstack,trainable):
    for para in convstack:
        x = ConvBatchLReLu(x,para["filters"],para["kernel_size"],index,trainable)
        index += 1
    return(x)
def define_YOLOv2(IMAGE_H,IMAGE_W,GRID_H,GRID_W,TRUE_BOX_BUFFER,BOX,CLASS, trainable=False):
    convstack3to5  = [{"filters":128, "kernel_size":(3,3)},  # 3
                      {"filters":64,  "kernel_size":(1,1)},  # 4
                      {"filters":128, "kernel_size":(3,3)}]  # 5
                    
    convstack6to8  = [{"filters":256, "kernel_size":(3,3)},  # 6
                      {"filters":128, "kernel_size":(1,1)},  # 7
                      {"filters":256, "kernel_size":(3,3)}]  # 8
    
    convstack9to13 = [{"filters":512, "kernel_size":(3,3)},  # 9
                      {"filters":256, "kernel_size":(1,1)},  # 10
                      {"filters":512, "kernel_size":(3,3)},  # 11
                      {"filters":256, "kernel_size":(1,1)},  # 12
                      {"filters":512, "kernel_size":(3,3)}]  # 13
        
    convstack14to20 = [{"filters":1024, "kernel_size":(3,3)}, # 14 
                       {"filters":512,  "kernel_size":(1,1)}, # 15
                       {"filters":1024, "kernel_size":(3,3)}, # 16
                       {"filters":512,  "kernel_size":(1,1)}, # 17
                       {"filters":1024, "kernel_size":(3,3)}, # 18
                       {"filters":1024, "kernel_size":(3,3)}, # 19
                       {"filters":1024, "kernel_size":(3,3)}] # 20
    
    input_image = Input(shape=(IMAGE_H, IMAGE_W, 3),name="input_image")
    true_boxes  = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4),name="input_hack")    
    # Layer 1
    x = ConvBatchLReLu(input_image,filters=32,kernel_size=(3,3),index=1,trainable=trainable)
    
    x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_416to208")(x)
    # Layer 2
    x = ConvBatchLReLu(x,filters=64,kernel_size=(3,3),index=2,trainable=trainable)
    x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_208to104")(x)
    
    # Layer 3 - 5
    x = ConvBatchLReLu_loop(x,3,convstack3to5,trainable)
    x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_104to52")(x)
    
    # Layer 6 - 8 
    x = ConvBatchLReLu_loop(x,6,convstack6to8,trainable)
    x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_52to26")(x) 
    # Layer 9 - 13
    x = ConvBatchLReLu_loop(x,9,convstack9to13,trainable)
        
    skip_connection = x
    x = MaxPooling2D(pool_size=(2, 2),name="maxpool1_26to13")(x)
    
    # Layer 14 - 20
    x = ConvBatchLReLu_loop(x,14,convstack14to20,trainable)
    # Layer 21
    skip_connection = ConvBatchLReLu(skip_connection,filters=64,
                                     kernel_size=(1,1),index=21,trainable=trainable)
    skip_connection = Lambda(space_to_depth_x2)(skip_connection)
    x = concatenate([skip_connection, x])
    # Layer 22
    x = ConvBatchLReLu(x,filters=1024,kernel_size=(3,3),index=22,trainable=trainable)
    # Layer 23
    x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
    output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS),name="final_output")(x)
    # small hack to allow true_boxes to be registered when Keras build the model 
    # for more information: https://github.com/fchollet/keras/issues/2790
    output = Lambda(lambda args: args[0],name="hack_layer")([output, true_boxes])
    model = Model([input_image, true_boxes], output)
    return(model, true_boxes)
IMAGE_H, IMAGE_W  = 416, 416
GRID_H,  GRID_W   = 13 , 13
TRUE_BOX_BUFFER   = 50
BOX               = int(len(ANCHORS)/2)
CLASS             = len(LABELS)
## true_boxes is the tensor that takes "b_batch"
model, true_boxes = define_YOLOv2(IMAGE_H,IMAGE_W,GRID_H,GRID_W,TRUE_BOX_BUFFER,BOX,CLASS, 
                                  trainable=False)
model.summary()

 

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_image (InputLayer)        (None, 416, 416, 3)  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 416, 416, 32) 864         input_image[0][0]                
__________________________________________________________________________________________________
norm_1 (BatchNormalization)     (None, 416, 416, 32) 128         conv_1[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 416, 416, 32) 0           norm_1[0][0]                     
__________________________________________________________________________________________________
maxpool1_416to208 (MaxPooling2D (None, 208, 208, 32) 0           leaky_re_lu_1[0][0]              
__________________________________________________________________________________________________
conv_2 (Conv2D)                 (None, 208, 208, 64) 18432       maxpool1_416to208[0][0]          
__________________________________________________________________________________________________
norm_2 (BatchNormalization)     (None, 208, 208, 64) 256         conv_2[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_2 (LeakyReLU)       (None, 208, 208, 64) 0           norm_2[0][0]                     
__________________________________________________________________________________________________
maxpool1_208to104 (MaxPooling2D (None, 104, 104, 64) 0           leaky_re_lu_2[0][0]              
__________________________________________________________________________________________________
conv_3 (Conv2D)                 (None, 104, 104, 128 73728       maxpool1_208to104[0][0]          
__________________________________________________________________________________________________
norm_3 (BatchNormalization)     (None, 104, 104, 128 512         conv_3[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_3 (LeakyReLU)       (None, 104, 104, 128 0           norm_3[0][0]                     
__________________________________________________________________________________________________
conv_4 (Conv2D)                 (None, 104, 104, 64) 8192        leaky_re_lu_3[0][0]              
__________________________________________________________________________________________________
norm_4 (BatchNormalization)     (None, 104, 104, 64) 256         conv_4[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_4 (LeakyReLU)       (None, 104, 104, 64) 0           norm_4[0][0]                     
__________________________________________________________________________________________________
conv_5 (Conv2D)                 (None, 104, 104, 128 73728       leaky_re_lu_4[0][0]              
__________________________________________________________________________________________________
norm_5 (BatchNormalization)     (None, 104, 104, 128 512         conv_5[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_5 (LeakyReLU)       (None, 104, 104, 128 0           norm_5[0][0]                     
__________________________________________________________________________________________________
maxpool1_104to52 (MaxPooling2D) (None, 52, 52, 128)  0           leaky_re_lu_5[0][0]              
__________________________________________________________________________________________________
conv_6 (Conv2D)                 (None, 52, 52, 256)  294912      maxpool1_104to52[0][0]           
__________________________________________________________________________________________________
norm_6 (BatchNormalization)     (None, 52, 52, 256)  1024        conv_6[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_6 (LeakyReLU)       (None, 52, 52, 256)  0           norm_6[0][0]                     
__________________________________________________________________________________________________
conv_7 (Conv2D)                 (None, 52, 52, 128)  32768       leaky_re_lu_6[0][0]              
__________________________________________________________________________________________________
norm_7 (BatchNormalization)     (None, 52, 52, 128)  512         conv_7[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_7 (LeakyReLU)       (None, 52, 52, 128)  0           norm_7[0][0]                     
__________________________________________________________________________________________________
conv_8 (Conv2D)                 (None, 52, 52, 256)  294912      leaky_re_lu_7[0][0]              
__________________________________________________________________________________________________
norm_8 (BatchNormalization)     (None, 52, 52, 256)  1024        conv_8[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_8 (LeakyReLU)       (None, 52, 52, 256)  0           norm_8[0][0]                     
__________________________________________________________________________________________________
maxpool1_52to26 (MaxPooling2D)  (None, 26, 26, 256)  0           leaky_re_lu_8[0][0]              
__________________________________________________________________________________________________
conv_9 (Conv2D)                 (None, 26, 26, 512)  1179648     maxpool1_52to26[0][0]            
__________________________________________________________________________________________________
norm_9 (BatchNormalization)     (None, 26, 26, 512)  2048        conv_9[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_9 (LeakyReLU)       (None, 26, 26, 512)  0           norm_9[0][0]                     
__________________________________________________________________________________________________
conv_10 (Conv2D)                (None, 26, 26, 256)  131072      leaky_re_lu_9[0][0]              
__________________________________________________________________________________________________
norm_10 (BatchNormalization)    (None, 26, 26, 256)  1024        conv_10[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_10 (LeakyReLU)      (None, 26, 26, 256)  0           norm_10[0][0]                    
__________________________________________________________________________________________________
conv_11 (Conv2D)                (None, 26, 26, 512)  1179648     leaky_re_lu_10[0][0]             
__________________________________________________________________________________________________
norm_11 (BatchNormalization)    (None, 26, 26, 512)  2048        conv_11[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_11 (LeakyReLU)      (None, 26, 26, 512)  0           norm_11[0][0]                    
__________________________________________________________________________________________________
conv_12 (Conv2D)                (None, 26, 26, 256)  131072      leaky_re_lu_11[0][0]             
__________________________________________________________________________________________________
norm_12 (BatchNormalization)    (None, 26, 26, 256)  1024        conv_12[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_12 (LeakyReLU)      (None, 26, 26, 256)  0           norm_12[0][0]                    
__________________________________________________________________________________________________
conv_13 (Conv2D)                (None, 26, 26, 512)  1179648     leaky_re_lu_12[0][0]             
__________________________________________________________________________________________________
norm_13 (BatchNormalization)    (None, 26, 26, 512)  2048        conv_13[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_13 (LeakyReLU)      (None, 26, 26, 512)  0           norm_13[0][0]                    
__________________________________________________________________________________________________
maxpool1_26to13 (MaxPooling2D)  (None, 13, 13, 512)  0           leaky_re_lu_13[0][0]             
__________________________________________________________________________________________________
conv_14 (Conv2D)                (None, 13, 13, 1024) 4718592     maxpool1_26to13[0][0]            
__________________________________________________________________________________________________
norm_14 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_14[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_14 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_14[0][0]                    
__________________________________________________________________________________________________
conv_15 (Conv2D)                (None, 13, 13, 512)  524288      leaky_re_lu_14[0][0]             
__________________________________________________________________________________________________
norm_15 (BatchNormalization)    (None, 13, 13, 512)  2048        conv_15[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_15 (LeakyReLU)      (None, 13, 13, 512)  0           norm_15[0][0]                    
__________________________________________________________________________________________________
conv_16 (Conv2D)                (None, 13, 13, 1024) 4718592     leaky_re_lu_15[0][0]             
__________________________________________________________________________________________________
norm_16 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_16[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_16 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_16[0][0]                    
__________________________________________________________________________________________________
conv_17 (Conv2D)                (None, 13, 13, 512)  524288      leaky_re_lu_16[0][0]             
__________________________________________________________________________________________________
norm_17 (BatchNormalization)    (None, 13, 13, 512)  2048        conv_17[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_17 (LeakyReLU)      (None, 13, 13, 512)  0           norm_17[0][0]                    
__________________________________________________________________________________________________
conv_18 (Conv2D)                (None, 13, 13, 1024) 4718592     leaky_re_lu_17[0][0]             
__________________________________________________________________________________________________
norm_18 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_18[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_18 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_18[0][0]                    
__________________________________________________________________________________________________
conv_19 (Conv2D)                (None, 13, 13, 1024) 9437184     leaky_re_lu_18[0][0]             
__________________________________________________________________________________________________
norm_19 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_19[0][0]                    
__________________________________________________________________________________________________
conv_21 (Conv2D)                (None, 26, 26, 64)   32768       leaky_re_lu_13[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_19 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_19[0][0]                    
__________________________________________________________________________________________________
norm_21 (BatchNormalization)    (None, 26, 26, 64)   256         conv_21[0][0]                    
__________________________________________________________________________________________________
conv_20 (Conv2D)                (None, 13, 13, 1024) 9437184     leaky_re_lu_19[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_21 (LeakyReLU)      (None, 26, 26, 64)   0           norm_21[0][0]                    
__________________________________________________________________________________________________
norm_20 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_20[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 13, 13, 256)  0           leaky_re_lu_21[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_20 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_20[0][0]                    
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 13, 13, 1280) 0           lambda_1[0][0]                   
                                                                 leaky_re_lu_20[0][0]             
__________________________________________________________________________________________________
conv_22 (Conv2D)                (None, 13, 13, 1024) 11796480    concatenate_1[0][0]              
__________________________________________________________________________________________________
norm_22 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_22[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_22 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_22[0][0]                    
__________________________________________________________________________________________________
conv_23 (Conv2D)                (None, 13, 13, 100)  102500      leaky_re_lu_22[0][0]             
__________________________________________________________________________________________________
final_output (Reshape)          (None, 13, 13, 4, 25 0           conv_23[0][0]                    
__________________________________________________________________________________________________
input_hack (InputLayer)         (None, 1, 1, 1, 50,  0                                            
__________________________________________________________________________________________________
hack_layer (Lambda)             (None, 13, 13, 4, 25 0           final_output[0][0]               
                                                                 input_hack[0][0]                 
==================================================================================================
Total params: 50,650,436
Trainable params: 102,500
Non-trainable params: 50,547,936
_________________________________________________________________________________

 

参考链接

 

https://fairyonice.github.io/Part_3_Object_Detection_with_Yolo_using_VOC_2012_data_model.html

 

除非注明,否则均为[半杯茶的小酒杯]原创文章,转载必须以链接形式标明本文链接

 

本文链接: http://www.banbeichadexiaojiubei.com/index.php/2020/12/13/yolov2-on-pascal-voc2012-%e7%bd%91%e7%bb%9c%e6%a8%a1%e5%9e%8bmodel/

Be First to Comment

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注