@@ -121,7 +121,9 @@ def __init__(self, FLAGS, cfg):
121
121
self .num_class = cfg .num_classes
122
122
# set hook for extraction of featuremaps and grads
123
123
self .set_hook (cfg )
124
- self .nms_idx_need_divid_numclass_arch = ['FasterRCNN' , 'MaskRCNN' , 'CascadeRCNN' ]
124
+ self .nms_idx_need_divid_numclass_arch = [
125
+ 'FasterRCNN' , 'MaskRCNN' , 'CascadeRCNN'
126
+ ]
125
127
"""
126
128
In these networks, the bbox array shape before nms contain num_class,
127
129
the nms_keep_idx of the bbox need to divide the num_class;
@@ -141,7 +143,7 @@ def build_trainer(self, cfg):
141
143
trainer .load_weights (cfg .weights )
142
144
143
145
# set for get extra_data before nms
144
- trainer .model .use_extra_data = True
146
+ trainer .model .use_extra_data = True
145
147
# set for record the bbox index before nms
146
148
if cfg .architecture in ['FasterRCNN' , 'MaskRCNN' ]:
147
149
trainer .model .bbox_post_process .nms .return_index = True
@@ -152,14 +154,12 @@ def build_trainer(self, cfg):
152
154
else :
153
155
# anchor free YOLOs: PP-YOLOE, PP-YOLOE+
154
156
trainer .model .yolo_head .nms .return_index = True
155
- elif cfg .architecture == 'BlazeFace' or cfg .architecture == 'SSD' :
157
+ elif cfg .architecture == 'BlazeFace' or cfg .architecture == 'SSD' :
156
158
trainer .model .post_process .nms .return_index = True
157
- elif cfg .architecture == 'RetinaNet' :
159
+ elif cfg .architecture == 'RetinaNet' :
158
160
trainer .model .head .nms .return_index = True
159
161
else :
160
- print (
161
- cfg .architecture + ' is not supported for cam temporarily!'
162
- )
162
+ print (cfg .architecture + ' is not supported for cam temporarily!' )
163
163
sys .exit ()
164
164
# Todo: Unify the head/post_process name in each model
165
165
@@ -169,19 +169,23 @@ def set_hook(self, cfg):
169
169
# set hook for extraction of featuremaps and grads
170
170
self .target_feats = {}
171
171
self .target_layer_name = cfg .target_feature_layer_name
172
+
172
173
# such as trainer.model.backbone, trainer.model.bbox_head.roi_extractor
173
174
174
175
def hook (layer , input , output ):
175
176
self .target_feats [layer ._layer_name_for_hook ] = output
176
177
177
178
try :
178
- exec ('self.trainer.' + self .target_layer_name + '._layer_name_for_hook = self.target_layer_name' )
179
+ exec ('self.trainer.' + self .target_layer_name +
180
+ '._layer_name_for_hook = self.target_layer_name' )
179
181
# self.trainer.target_layer_name._layer_name_for_hook = self.target_layer_name
180
- exec ('self.trainer.' + self .target_layer_name + '.register_forward_post_hook(hook)' )
182
+ exec ('self.trainer.' + self .target_layer_name +
183
+ '.register_forward_post_hook(hook)' )
181
184
# self.trainer.target_layer_name.register_forward_post_hook(hook)
182
185
except :
183
186
print ("Error! "
184
- "The target_layer_name--" + self .target_layer_name + " is not in model! "
187
+ "The target_layer_name--" + self .target_layer_name +
188
+ " is not in model! "
185
189
"Please check the spelling and "
186
190
"the network's architecture!" )
187
191
sys .exit ()
@@ -228,7 +232,7 @@ def get_bboxes_cams(self):
228
232
# currently, only include the rcnn architectures (fasterrcnn, maskrcnn, cascadercnn);
229
233
before_nms_indexes = extra_data ['nms_keep_idx' ].cpu ().numpy (
230
234
) // self .num_class # num_class
231
- else :
235
+ else :
232
236
before_nms_indexes = extra_data ['nms_keep_idx' ].cpu ().numpy ()
233
237
234
238
# Calculate and visualize the heatmap of per predict bbox
@@ -240,7 +244,7 @@ def get_bboxes_cams(self):
240
244
241
245
target_bbox_before_nms = int (before_nms_indexes [index ])
242
246
243
- if len (extra_data ['scores' ].shape )== 2 :
247
+ if len (extra_data ['scores' ].shape ) == 2 :
244
248
score_out = extra_data ['scores' ][target_bbox_before_nms ]
245
249
else :
246
250
score_out = extra_data ['scores' ][0 , :, target_bbox_before_nms ]
@@ -250,7 +254,6 @@ def get_bboxes_cams(self):
250
254
2) [num_of_image, num_classes, num_of_yolo_bboxes_before_nms], for example: [1, 80, 1000]
251
255
"""
252
256
253
-
254
257
# construct one_hot label and do backward to get the gradients
255
258
predicted_label = paddle .argmax (score_out )
256
259
label_onehot = paddle .nn .functional .one_hot (
@@ -266,8 +269,8 @@ def get_bboxes_cams(self):
266
269
# when the featuremap contains of multiple scales,
267
270
# take the featuremap of the last scale
268
271
# Todo: fuse the cam result from multisclae featuremaps
269
- if self .target_feats [self .target_layer_name ][
270
- - 1 ]. shape [ - 1 ] == 1 :
272
+ if self .target_feats [self .target_layer_name ][- 1 ]. shape [
273
+ - 1 ] == 1 :
271
274
"""
272
275
if the last level featuremap is 1x1 size,
273
276
we take the second last one
@@ -286,11 +289,12 @@ def get_bboxes_cams(self):
286
289
self .target_layer_name ].grad .squeeze ().cpu ().numpy ()
287
290
cam_feat = self .target_feats [
288
291
self .target_layer_name ].squeeze ().cpu ().numpy ()
289
- else : # roi level feature
292
+ else : # roi level feature
290
293
cam_grad = self .target_feats [
291
- self .target_layer_name ].grad .squeeze ().cpu ().numpy ()[target_bbox_before_nms ]
292
- cam_feat = self .target_feats [
293
- self .target_layer_name ].squeeze ().cpu ().numpy ()[target_bbox_before_nms ]
294
+ self .target_layer_name ].grad .squeeze ().cpu ().numpy ()[
295
+ target_bbox_before_nms ]
296
+ cam_feat = self .target_feats [self .target_layer_name ].squeeze (
297
+ ).cpu ().numpy ()[target_bbox_before_nms ]
294
298
295
299
# grad_cam:
296
300
exp = grad_cam (cam_feat , cam_grad )
@@ -305,23 +309,25 @@ def get_bboxes_cams(self):
305
309
# reshape the cam image to the input image size
306
310
resized_exp = resize_cam (exp , (img .shape [1 ], img .shape [0 ]))
307
311
mask = np .zeros ((img .shape [0 ], img .shape [1 ], 3 ))
308
- mask [int (target_bbox [3 ]):int (target_bbox [5 ]), int (target_bbox [2 ]):
309
- int (target_bbox [4 ]), :] = 1
312
+ mask [int (target_bbox [3 ]):int (target_bbox [5 ]), int (target_bbox [
313
+ 2 ]): int (target_bbox [4 ]), :] = 1
310
314
resized_exp = resized_exp * mask
311
315
# add the bbox cam back to the input image
312
316
overlay_vis = np .uint8 (resized_exp * 0.4 + img * 0.6 )
313
317
elif 'roi' in self .target_layer_name :
314
318
# get the bbox part of the image
315
- bbox_img = copy .deepcopy (img [int (target_bbox [3 ]):int (target_bbox [5 ]),
316
- int (target_bbox [2 ]):int (target_bbox [4 ]), :])
319
+ bbox_img = copy .deepcopy (img [int (target_bbox [3 ]):int (
320
+ target_bbox [5 ]), int (target_bbox [2 ]):int (target_bbox [
321
+ 4 ]), :])
317
322
# reshape the cam image to the bbox size
318
- resized_exp = resize_cam (exp , (bbox_img .shape [1 ], bbox_img .shape [0 ]))
323
+ resized_exp = resize_cam (exp ,
324
+ (bbox_img .shape [1 ], bbox_img .shape [0 ]))
319
325
# add the bbox cam back to the bbox image
320
326
bbox_overlay_vis = np .uint8 (resized_exp * 0.4 + bbox_img * 0.6 )
321
327
# put the bbox_cam image to the original image
322
328
overlay_vis = copy .deepcopy (img )
323
- overlay_vis [int (target_bbox [3 ]):int (target_bbox [5 ]),
324
- int ( target_bbox [2 ]):int (target_bbox [4 ]), :] = bbox_overlay_vis
329
+ overlay_vis [int (target_bbox [3 ]):int (target_bbox [5 ]), int (
330
+ target_bbox [2 ]):int (target_bbox [4 ]), :] = bbox_overlay_vis
325
331
else :
326
332
print (
327
333
'Only supported cam for backbone/neck feature and roi feature, the others are not supported temporarily!'
0 commit comments