caffe中的BatchNorm层

在训练一个小的分类网络时,发现加上BatchNorm层之后的检索效果相对于之前,效果会有提升,因此将该网络结构记录在这里,供以后查阅使用:

添加该层之前:

  1 layer {
  2   name: "conv1"
  3   type: "Convolution"
  4   bottom: "data"
  5   top: "conv1"
  6   param {
  7     lr_mult: 1
  8     decay_mult: 1
  9   }
 10   param {
 11     lr_mult: 2
 12     decay_mult: 0
 13   }
 14   convolution_param {
 15     num_output: 64
 16     kernel_size: 7
 17     stride: 2
 18     weight_filler {
 19       type: "gaussian"
 20       std: 0.01
 21     }
 22     bias_filler {
 23       type: "constant"
 24       value: 0
 25     }
 26   }
 27 }
 28 layer {
 29   name: "relu1"
 30   type: "ReLU"
 31   bottom: "conv1"
 32   top: "conv1"
 33 }
 34 layer {
 35   name: "pool1"
 36   type: "Pooling"
 37   bottom: "conv1"
 38   top: "pool1"
 39   pooling_param {
 40     pool: MAX
 41     kernel_size: 3
 42     stride: 2
 43   }
 44 }
 45 layer {
 46   name: "norm1"
 47   type: "LRN"
 48   bottom: "pool1"
 49   top: "norm1"
 50   lrn_param {
 51     local_size: 5
 52     alpha: 0.0001
 53     beta: 0.75
 54   }
 55 }
 56 layer {
 57   name: "conv2"
 58   type: "Convolution"
 59   bottom: "norm1"
 60   top: "conv2"
 61   param {
 62     lr_mult: 1
 63     decay_mult: 1
 64   }
 65   param {
 66     lr_mult: 2
 67     decay_mult: 0
 68   }
 69   convolution_param {
 70     num_output: 128
 71     pad: 2
 72     kernel_size: 5
 73     stride: 2
 74     group: 2
 75     weight_filler {
 76       type: "gaussian"
 77       std: 0.01
 78     }
 79     bias_filler {
 80       type: "constant"
 81       value: 1
 82     }
 83   }
 84 }
 85 layer {
 86   name: "relu2"
 87   type: "ReLU"
 88   bottom: "conv2"
 89   top: "conv2"
 90 }
 91 layer {
 92   name: "pool2"
 93   type: "Pooling"
 94   bottom: "conv2"
 95   top: "pool2"
 96   pooling_param {
 97     pool: MAX
 98     kernel_size: 3
 99     stride: 2
100   }
101 }
102 layer {
103   name: "norm2"
104   type: "LRN"
105   bottom: "pool2"
106   top: "norm2"
107   lrn_param {
108     local_size: 5
109     alpha: 0.0001
110     beta: 0.75
111   }
112 }
113 layer {
114   name: "conv3"
115   type: "Convolution"
116   bottom: "norm2"
117   top: "conv3"
118   param {
119     lr_mult: 1
120     decay_mult: 1
121   }
122   param {
123     lr_mult: 2
124     decay_mult: 0
125   }
126   convolution_param {
127     num_output: 192
128     pad: 1
129     stride: 2
130     kernel_size: 3
131     weight_filler {
132       type: "gaussian"
133       std: 0.01
134     }
135     bias_filler {
136       type: "constant"
137       value: 0
138     }
139   }
140 }
141 layer {
142   name: "relu3"
143   type: "ReLU"
144   bottom: "conv3"
145   top: "conv3"
146 }
147 layer {
148   name: "conv4"
149   type: "Convolution"
150   bottom: "conv3"
151   top: "conv4"
152   param {
153     lr_mult: 1
154     decay_mult: 1
155   }
156   param {
157     lr_mult: 2
158     decay_mult: 0
159   }
160   convolution_param {
161     num_output: 192
162     pad: 1
163     kernel_size: 3
164     group: 2
165     weight_filler {
166       type: "gaussian"
167       std: 0.01
168     }
169     bias_filler {
170       type: "constant"
171       value: 1
172     }
173   }
174 }
175 layer {
176   name: "relu4"
177   type: "ReLU"
178   bottom: "conv4"
179   top: "conv4"
180 }
181 layer {
182   name: "conv5"
183   type: "Convolution"
184   bottom: "conv4"
185   top: "conv5"
186   param {
187     lr_mult: 1
188     decay_mult: 1
189   }
190   param {
191     lr_mult: 2
192     decay_mult: 0
193   }
194   convolution_param {
195     num_output: 128
196     pad: 1
197     stride: 2
198     kernel_size: 3
199     group: 2
200     weight_filler {
201       type: "gaussian"
202       std: 0.01
203     }
204     bias_filler {
205       type: "constant"
206       value: 1
207     }
208   }
209 }
210 layer {
211   name: "relu5"
212   type: "ReLU"
213   bottom: "conv5"
214   top: "conv5"
215 }
216 layer {
217   name: "pool5"
218   type: "Pooling"
219   bottom: "conv5"
220   top: "pool5"
221   pooling_param {
222     pool: MAX
223     kernel_size: 2
224     stride: 1
225   }
226 }
227 layer {
228   name: "fc6_srx"
229   type: "InnerProduct"
230   bottom: "pool5"
231   top: "fc6"
232   param {
233     lr_mult: 1
234     decay_mult: 1
235   }
236   param {
237     lr_mult: 2
238     decay_mult: 0
239   }
240   inner_product_param {
241     num_output: 768
242     weight_filler {
243       type: "gaussian"
244       std: 0.005
245     }
246     bias_filler {
247       type: "constant"
248       value: 1
249     }
250   }
251 }
252 layer {
253   name: "relu7"
254   type: "ReLU"
255   bottom: "fc6"
256   top: "fc6"
257 }
258 layer {
259   name: "drop7"
260   type: "Dropout"
261   bottom: "fc6"
262   top: "fc6"
263   dropout_param {
264     dropout_ratio: 0.5
265   }
266 }
267 layer {
268   name: "fc7_srx"
269   type: "InnerProduct"
270   bottom: "fc6"
271   top: "fc7"
272   param {
273     lr_mult: 1
274     decay_mult: 1
275   }
276   param {
277     lr_mult: 2
278     decay_mult: 0
279   }
280   inner_product_param {
281     num_output: 5275
282     weight_filler {
283       type: "gaussian"
284       std: 0.01
285     }
286     bias_filler {
287       type: "constant"
288       value: 0
289     }
290   }
291 }
292 layer{
293   name: "loss"
294   type: "SoftmaxWithLoss"
295   top: "SoftmaxWithLoss"
296   bottom: "fc7"
297   bottom: "label"
298   include {
299     phase: TRAIN
300   }
301 }
302 layer {
303   name: "accuracy"
304   type: "Accuracy"
305   bottom: "fc7"
306   bottom: "label"
307   top: "accuracy"
308   include {
309     phase: TEST
310   }
311 }

添加该层之后:

  1 layer {
  2   name: "conv1"
  3   type: "Convolution"
  4   bottom: "data"
  5   top: "conv1"
  6   param {
  7     lr_mult: 1
  8     decay_mult: 1
  9   }
 10   param {
 11     lr_mult: 2
 12     decay_mult: 0
 13   }
 14   convolution_param {
 15     num_output: 64
 16     kernel_size: 7
 17     stride: 2
 18     weight_filler {
 19       type: "gaussian"
 20       std: 0.01
 21     }
 22     bias_filler {
 23       type: "constant"
 24       value: 0
 25     }
 26   }
 27 }
 28 ##############
 29 layer {
 30   bottom: "conv1"
 31   top: "conv1"
 32   name: "bn1"
 33   type: "BatchNorm"
 34   param {
 35     lr_mult: 0
 36   }
 37   param {
 38     lr_mult: 0
 39   }
 40   param {
 41     lr_mult: 0
 42   }
 43 }
 44 ##############
 45 layer {
 46   name: "relu1"
 47   type: "ReLU"
 48   bottom: "conv1"
 49   top: "conv1"
 50 }
 51 layer {
 52   name: "pool1"
 53   type: "Pooling"
 54   bottom: "conv1"
 55   top: "pool1"
 56   pooling_param {
 57     pool: MAX
 58     kernel_size: 3
 59     stride: 2
 60   }
 61 }
 62 
 63 layer {
 64   name: "conv2"
 65   type: "Convolution"
 66   bottom: "pool1"
 67   top: "conv2"
 68   param {
 69     lr_mult: 1
 70     decay_mult: 1
 71   }
 72   param {
 73     lr_mult: 2
 74     decay_mult: 0
 75   }
 76   convolution_param {
 77     num_output: 128
 78     pad: 2
 79     kernel_size: 5
 80     stride: 2
 81     group: 2
 82     weight_filler {
 83       type: "gaussian"
 84       std: 0.01
 85     }
 86     bias_filler {
 87       type: "constant"
 88       value: 1
 89     }
 90   }
 91 }
 92 ##############
 93 layer {
 94   bottom: "conv2"
 95   top: "conv2"
 96   name: "bn2"
 97   type: "BatchNorm"
 98   param {
 99     lr_mult: 0
100   }
101   param {
102     lr_mult: 0
103   }
104   param {
105     lr_mult: 0
106   }
107 }
108 ##############
109 layer {
110   name: "relu2"
111   type: "ReLU"
112   bottom: "conv2"
113   top: "conv2"
114 }
115 layer {
116   name: "pool2"
117   type: "Pooling"
118   bottom: "conv2"
119   top: "pool2"
120   pooling_param {
121     pool: MAX
122     kernel_size: 3
123     stride: 2
124   }
125 }
126 
127 layer {
128   name: "conv3"
129   type: "Convolution"
130   bottom: "pool2"
131   top: "conv3"
132   param {
133     lr_mult: 1
134     decay_mult: 1
135   }
136   param {
137     lr_mult: 2
138     decay_mult: 0
139   }
140   convolution_param {
141     num_output: 192
142     pad: 1
143     stride: 2
144     kernel_size: 3
145     weight_filler {
146       type: "gaussian"
147       std: 0.01
148     }
149     bias_filler {
150       type: "constant"
151       value: 0
152     }
153   }
154 }
155 ##############
156 layer {
157   bottom: "conv3"
158   top: "conv3"
159   name: "bn3"
160   type: "BatchNorm"
161   param {
162     lr_mult: 0
163   }
164   param {
165     lr_mult: 0
166   }
167   param {
168     lr_mult: 0
169   }
170 }
171 ##############
172 layer {
173   name: "relu3"
174   type: "ReLU"
175   bottom: "conv3"
176   top: "conv3"
177 }
178 layer {
179   name: "conv4"
180   type: "Convolution"
181   bottom: "conv3"
182   top: "conv4"
183   param {
184     lr_mult: 1
185     decay_mult: 1
186   }
187   param {
188     lr_mult: 2
189     decay_mult: 0
190   }
191   convolution_param {
192     num_output: 192
193     pad: 1
194     kernel_size: 3
195     group: 2
196     weight_filler {
197       type: "gaussian"
198       std: 0.01
199     }
200     bias_filler {
201       type: "constant"
202       value: 1
203     }
204   }
205 }
206 ##############
207 layer {
208   bottom: "conv4"
209   top: "conv4"
210   name: "bn4"
211   type: "BatchNorm"
212   param {
213     lr_mult: 0
214   }
215   param {
216     lr_mult: 0
217   }
218   param {
219     lr_mult: 0
220   }
221 }
222 ##############
223 layer {
224   name: "relu4"
225   type: "ReLU"
226   bottom: "conv4"
227   top: "conv4"
228 }
229 layer {
230   name: "conv5"
231   type: "Convolution"
232   bottom: "conv4"
233   top: "conv5"
234   param {
235     lr_mult: 1
236     decay_mult: 1
237   }
238   param {
239     lr_mult: 2
240     decay_mult: 0
241   }
242   convolution_param {
243     num_output: 128
244     pad: 1
245     stride: 2
246     kernel_size: 3
247     group: 2
248     weight_filler {
249       type: "gaussian"
250       std: 0.01
251     }
252     bias_filler {
253       type: "constant"
254       value: 1
255     }
256   }
257 }
258 ##############
259 layer {
260   bottom: "conv5"
261   top: "conv5"
262   name: "bn5"
263   type: "BatchNorm"
264   param {
265     lr_mult: 0
266   }
267   param {
268     lr_mult: 0
269   }
270   param {
271     lr_mult: 0
272   }
273 }
274 ##############
275 layer {
276   name: "relu5"
277   type: "ReLU"
278   bottom: "conv5"
279   top: "conv5"
280 }
281 layer {
282   name: "pool5"
283   type: "Pooling"
284   bottom: "conv5"
285   top: "pool5"
286   pooling_param {
287     pool: MAX
288     kernel_size: 2
289     stride: 1
290   }
291 }
292 layer {
293   name: "fc6_srx"
294   type: "InnerProduct"
295   bottom: "pool5"
296   top: "fc6"
297   param {
298     lr_mult: 5
299     decay_mult: 2
300   }
301   param {
302     lr_mult: 8
303     decay_mult: 0
304   }
305   inner_product_param {
306     num_output: 768
307     weight_filler {
308       type: "gaussian"
309       std: 0.005
310     }
311     bias_filler {
312       type: "constant"
313       value: 1
314     }
315   }
316 }
317 layer {
318   name: "relu7"
319   type: "ReLU"
320   bottom: "fc6"
321   top: "fc6"
322 }
323 layer {
324   name: "drop7"
325   type: "Dropout"
326   bottom: "fc6"
327   top: "fc6"
328   dropout_param {
329     dropout_ratio: 0.5
330   }
331 }
332 layer {
333   name: "fc7_srx"
334   type: "InnerProduct"
335   bottom: "fc6"
336   top: "fc7"
337   param {
338     lr_mult: 5
339     decay_mult: 2
340   }
341   param {
342     lr_mult: 8
343     decay_mult: 0
344   }
345   inner_product_param {
346     num_output: 5275
347     weight_filler {
348       type: "gaussian"
349       std: 0.01
350     }
351     bias_filler {
352       type: "constant"
353       value: 0
354     }
355   }
356 }
357 layer{
358   name: "loss"
359   type: "SoftmaxWithLoss"
360   top: "SoftmaxWithLoss"
361   bottom: "fc7"
362   bottom: "label"
363   include {
364     phase: TRAIN
365   }
366 }
367 layer {
368   name: "accuracy"
369   type: "Accuracy"
370   bottom: "fc7"
371   bottom: "label"
372   top: "accuracy"
373   include {
374     phase: TEST
375   }
376 }