Skip to content

Commit c84a595

Browse files
merge uapi paddledetection (#8957)
1 parent 1fd52ed commit c84a595

File tree

9 files changed

+97
-73
lines changed

9 files changed

+97
-73
lines changed

deploy/auto_compression/configs/rtdetr_r50vd_qat_dis.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
Global:
3-
reader_config: conigs/rtdetr_reader.yml
3+
reader_config: configs/rtdetr_reader.yml
44
include_nms: True
55
Evaluation: True
66
model_dir: ./rtdetr_r50vd_6x_coco/

deploy/python/keypoint_infer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def __init__(self,
9090
cpu_threads=cpu_threads,
9191
enable_mkldnn=enable_mkldnn,
9292
output_dir=output_dir,
93-
threshold=threshold,
93+
threshold=threshold,
9494
use_fd_format=use_fd_format)
9595
self.use_dark = use_dark
9696

ppdet/data/source/coco.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import os
1616
import copy
17+
1718
try:
1819
from collections.abc import Sequence
1920
except Exception:
@@ -23,10 +24,11 @@
2324
from .dataset import DetDataset
2425

2526
from ppdet.utils.logger import setup_logger
27+
2628
logger = setup_logger(__name__)
2729

2830
__all__ = [
29-
'COCODataSet', 'SlicedCOCODataSet', 'SemiCOCODataSet', 'COCODetDataset'
31+
'COCODataSet', 'SlicedCOCODataSet', 'SemiCOCODataSet', 'COCODetDataset', 'COCOInstSegDataset'
3032
]
3133

3234

@@ -127,7 +129,7 @@ def parse_dataset(self):
127129
if im_w < 0 or im_h < 0:
128130
logger.warning('Illegal width: {} or height: {} in annotation, '
129131
'and im_id: {} will be ignored'.format(
130-
im_w, im_h, img_id))
132+
im_w, im_h, img_id))
131133
continue
132134

133135
coco_rec = {
@@ -334,7 +336,7 @@ def parse_dataset(self):
334336
if im_w < 0 or im_h < 0:
335337
logger.warning('Illegal width: {} or height: {} in annotation, '
336338
'and im_id: {} will be ignored'.format(
337-
im_w, im_h, img_id))
339+
im_w, im_h, img_id))
338340
continue
339341

340342
slice_image_result = sahi.slicing.slice_image(
@@ -437,7 +439,7 @@ def parse_dataset(self):
437439
if im_w < 0 or im_h < 0:
438440
logger.warning('Illegal width: {} or height: {} in annotation, '
439441
'and im_id: {} will be ignored'.format(
440-
im_w, im_h, img_id))
442+
im_w, im_h, img_id))
441443
continue
442444

443445
coco_rec = {
@@ -594,3 +596,10 @@ def __getitem__(self, idx):
594596
@serializable
595597
class COCODetDataset(COCODataSet):
596598
pass
599+
600+
601+
# for PaddleX
602+
@register
603+
@serializable
604+
class COCOInstSegDataset(COCODataSet):
605+
pass

ppdet/engine/callbacks.py

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ def on_epoch_end(self, status):
182182
weight = None
183183
save_name = None
184184
if dist.get_world_size() < 2 or dist.get_rank() == 0:
185+
end_epoch = self.model.cfg.epoch
186+
save_name = str(epoch_id) if epoch_id != end_epoch - 1 else "model_final"
185187
if mode == 'train':
186188
end_epoch = self.model.cfg.epoch
187189
if (
@@ -191,29 +193,36 @@ def on_epoch_end(self, status):
191193
epoch_id) if epoch_id != end_epoch - 1 else "model_final"
192194
weight = self.weight.state_dict()
193195
elif mode == 'eval':
194-
if 'save_best_model' in status and status['save_best_model']:
195-
for metric in self.model._metrics:
196-
map_res = metric.get_results()
197-
eval_func = "ap"
198-
if 'pose3d' in map_res:
199-
key = 'pose3d'
200-
eval_func = "mpjpe"
201-
elif 'bbox' in map_res:
202-
key = 'bbox'
203-
elif 'keypoint' in map_res:
204-
key = 'keypoint'
205-
else:
206-
key = 'mask'
207-
208-
key = self.model.cfg.get('target_metrics', key)
209-
210-
if key not in map_res:
211-
logger.warning("Evaluation results empty, this may be due to " \
212-
"training iterations being too few or not " \
213-
"loading the correct weights.")
214-
return
215-
if map_res[key][0] >= self.best_ap:
216-
self.best_ap = map_res[key][0]
196+
for metric in self.model._metrics:
197+
map_res = metric.get_results()
198+
eval_func = "ap"
199+
if 'pose3d' in map_res:
200+
key = 'pose3d'
201+
eval_func = "mpjpe"
202+
elif 'bbox' in map_res:
203+
key = 'bbox'
204+
elif 'keypoint' in map_res:
205+
key = 'keypoint'
206+
else:
207+
key = 'mask'
208+
209+
key = self.model.cfg.get('target_metrics', key)
210+
211+
if key not in map_res:
212+
logger.warning("Evaluation results empty, this may be due to " \
213+
"training iterations being too few or not " \
214+
"loading the correct weights.")
215+
return
216+
epoch_ap = map_res[key][0]
217+
epoch_metric = {
218+
'metric': abs(epoch_ap),
219+
'epoch': epoch_id + 1
220+
}
221+
save_path = os.path.join(self.save_dir, f"{save_name}.pdstates")
222+
paddle.save(epoch_metric, save_path)
223+
if 'save_best_model' in status and status['save_best_model']:
224+
if epoch_ap >= self.best_ap:
225+
self.best_ap = epoch_ap
217226
save_name = 'best_model'
218227
weight = self.weight.state_dict()
219228
best_metric = {

ppdet/modeling/architectures/detr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def _forward(self):
104104
else:
105105
bbox, bbox_num, mask = self.post_process(
106106
preds, self.inputs['im_shape'], self.inputs['scale_factor'],
107-
self.inputs['image'])[2:].shape
107+
self.inputs['image'][2:].shape)
108108

109109
output = {'bbox': bbox, 'bbox_num': bbox_num}
110110
if self.with_mask:

ppdet/modeling/transformers/mask_rtdetr_transformer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,15 @@ def __init__(self,
5353
hidden_dim,
5454
decoder_layer,
5555
num_layers,
56-
eval_idx=-1):
56+
eval_idx=-1,
57+
eval_topk=100):
5758
super(MaskTransformerDecoder, self).__init__()
5859
self.layers = _get_clones(decoder_layer, num_layers)
5960
self.hidden_dim = hidden_dim
6061
self.num_layers = num_layers
6162
self.eval_idx = eval_idx if eval_idx >= 0 \
6263
else num_layers + eval_idx
64+
self.eval_topk = eval_topk
6365

6466
def forward(self,
6567
tgt,

ppdet/utils/cam_utils.py

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,9 @@ def __init__(self, FLAGS, cfg):
121121
self.num_class = cfg.num_classes
122122
# set hook for extraction of featuremaps and grads
123123
self.set_hook(cfg)
124-
self.nms_idx_need_divid_numclass_arch = ['FasterRCNN', 'MaskRCNN', 'CascadeRCNN']
124+
self.nms_idx_need_divid_numclass_arch = [
125+
'FasterRCNN', 'MaskRCNN', 'CascadeRCNN'
126+
]
125127
"""
126128
In these networks, the bbox array shape before nms contain num_class,
127129
the nms_keep_idx of the bbox need to divide the num_class;
@@ -141,7 +143,7 @@ def build_trainer(self, cfg):
141143
trainer.load_weights(cfg.weights)
142144

143145
# set for get extra_data before nms
144-
trainer.model.use_extra_data=True
146+
trainer.model.use_extra_data = True
145147
# set for record the bbox index before nms
146148
if cfg.architecture in ['FasterRCNN', 'MaskRCNN']:
147149
trainer.model.bbox_post_process.nms.return_index = True
@@ -152,14 +154,12 @@ def build_trainer(self, cfg):
152154
else:
153155
# anchor free YOLOs: PP-YOLOE, PP-YOLOE+
154156
trainer.model.yolo_head.nms.return_index = True
155-
elif cfg.architecture=='BlazeFace' or cfg.architecture=='SSD':
157+
elif cfg.architecture == 'BlazeFace' or cfg.architecture == 'SSD':
156158
trainer.model.post_process.nms.return_index = True
157-
elif cfg.architecture=='RetinaNet':
159+
elif cfg.architecture == 'RetinaNet':
158160
trainer.model.head.nms.return_index = True
159161
else:
160-
print(
161-
cfg.architecture+' is not supported for cam temporarily!'
162-
)
162+
print(cfg.architecture + ' is not supported for cam temporarily!')
163163
sys.exit()
164164
# Todo: Unify the head/post_process name in each model
165165

@@ -169,19 +169,23 @@ def set_hook(self, cfg):
169169
# set hook for extraction of featuremaps and grads
170170
self.target_feats = {}
171171
self.target_layer_name = cfg.target_feature_layer_name
172+
172173
# such as trainer.model.backbone, trainer.model.bbox_head.roi_extractor
173174

174175
def hook(layer, input, output):
175176
self.target_feats[layer._layer_name_for_hook] = output
176177

177178
try:
178-
exec('self.trainer.'+self.target_layer_name+'._layer_name_for_hook = self.target_layer_name')
179+
exec('self.trainer.' + self.target_layer_name +
180+
'._layer_name_for_hook = self.target_layer_name')
179181
# self.trainer.target_layer_name._layer_name_for_hook = self.target_layer_name
180-
exec('self.trainer.'+self.target_layer_name+'.register_forward_post_hook(hook)')
182+
exec('self.trainer.' + self.target_layer_name +
183+
'.register_forward_post_hook(hook)')
181184
# self.trainer.target_layer_name.register_forward_post_hook(hook)
182185
except:
183186
print("Error! "
184-
"The target_layer_name--"+self.target_layer_name+" is not in model! "
187+
"The target_layer_name--" + self.target_layer_name +
188+
" is not in model! "
185189
"Please check the spelling and "
186190
"the network's architecture!")
187191
sys.exit()
@@ -228,7 +232,7 @@ def get_bboxes_cams(self):
228232
# currently, only include the rcnn architectures (fasterrcnn, maskrcnn, cascadercnn);
229233
before_nms_indexes = extra_data['nms_keep_idx'].cpu().numpy(
230234
) // self.num_class # num_class
231-
else :
235+
else:
232236
before_nms_indexes = extra_data['nms_keep_idx'].cpu().numpy()
233237

234238
# Calculate and visualize the heatmap of per predict bbox
@@ -240,7 +244,7 @@ def get_bboxes_cams(self):
240244

241245
target_bbox_before_nms = int(before_nms_indexes[index])
242246

243-
if len(extra_data['scores'].shape)==2:
247+
if len(extra_data['scores'].shape) == 2:
244248
score_out = extra_data['scores'][target_bbox_before_nms]
245249
else:
246250
score_out = extra_data['scores'][0, :, target_bbox_before_nms]
@@ -250,7 +254,6 @@ def get_bboxes_cams(self):
250254
2) [num_of_image, num_classes, num_of_yolo_bboxes_before_nms], for example: [1, 80, 1000]
251255
"""
252256

253-
254257
# construct one_hot label and do backward to get the gradients
255258
predicted_label = paddle.argmax(score_out)
256259
label_onehot = paddle.nn.functional.one_hot(
@@ -266,8 +269,8 @@ def get_bboxes_cams(self):
266269
# when the featuremap contains of multiple scales,
267270
# take the featuremap of the last scale
268271
# Todo: fuse the cam result from multisclae featuremaps
269-
if self.target_feats[self.target_layer_name][
270-
-1].shape[-1]==1:
272+
if self.target_feats[self.target_layer_name][-1].shape[
273+
-1] == 1:
271274
"""
272275
if the last level featuremap is 1x1 size,
273276
we take the second last one
@@ -286,11 +289,12 @@ def get_bboxes_cams(self):
286289
self.target_layer_name].grad.squeeze().cpu().numpy()
287290
cam_feat = self.target_feats[
288291
self.target_layer_name].squeeze().cpu().numpy()
289-
else: # roi level feature
292+
else: # roi level feature
290293
cam_grad = self.target_feats[
291-
self.target_layer_name].grad.squeeze().cpu().numpy()[target_bbox_before_nms]
292-
cam_feat = self.target_feats[
293-
self.target_layer_name].squeeze().cpu().numpy()[target_bbox_before_nms]
294+
self.target_layer_name].grad.squeeze().cpu().numpy()[
295+
target_bbox_before_nms]
296+
cam_feat = self.target_feats[self.target_layer_name].squeeze(
297+
).cpu().numpy()[target_bbox_before_nms]
294298

295299
# grad_cam:
296300
exp = grad_cam(cam_feat, cam_grad)
@@ -305,23 +309,25 @@ def get_bboxes_cams(self):
305309
# reshape the cam image to the input image size
306310
resized_exp = resize_cam(exp, (img.shape[1], img.shape[0]))
307311
mask = np.zeros((img.shape[0], img.shape[1], 3))
308-
mask[int(target_bbox[3]):int(target_bbox[5]), int(target_bbox[2]):
309-
int(target_bbox[4]), :] = 1
312+
mask[int(target_bbox[3]):int(target_bbox[5]), int(target_bbox[
313+
2]):int(target_bbox[4]), :] = 1
310314
resized_exp = resized_exp * mask
311315
# add the bbox cam back to the input image
312316
overlay_vis = np.uint8(resized_exp * 0.4 + img * 0.6)
313317
elif 'roi' in self.target_layer_name:
314318
# get the bbox part of the image
315-
bbox_img = copy.deepcopy(img[int(target_bbox[3]):int(target_bbox[5]),
316-
int(target_bbox[2]):int(target_bbox[4]), :])
319+
bbox_img = copy.deepcopy(img[int(target_bbox[3]):int(
320+
target_bbox[5]), int(target_bbox[2]):int(target_bbox[
321+
4]), :])
317322
# reshape the cam image to the bbox size
318-
resized_exp = resize_cam(exp, (bbox_img.shape[1], bbox_img.shape[0]))
323+
resized_exp = resize_cam(exp,
324+
(bbox_img.shape[1], bbox_img.shape[0]))
319325
# add the bbox cam back to the bbox image
320326
bbox_overlay_vis = np.uint8(resized_exp * 0.4 + bbox_img * 0.6)
321327
# put the bbox_cam image to the original image
322328
overlay_vis = copy.deepcopy(img)
323-
overlay_vis[int(target_bbox[3]):int(target_bbox[5]),
324-
int(target_bbox[2]):int(target_bbox[4]), :] = bbox_overlay_vis
329+
overlay_vis[int(target_bbox[3]):int(target_bbox[5]), int(
330+
target_bbox[2]):int(target_bbox[4]), :] = bbox_overlay_vis
325331
else:
326332
print(
327333
'Only supported cam for backbone/neck feature and roi feature, the others are not supported temporarily!'

ppdet/utils/visualizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def draw_bbox(image, im_id, catid2name, bboxes, threshold):
8989
Draw bbox on image
9090
"""
9191
font_url = "https://paddledet.bj.bcebos.com/simfang.ttf"
92-
font_path , _ = get_path(font_url, "~/.cache/paddle/")
92+
font_path, _ = get_path(font_url, "~/.cache/paddle/")
9393
font_size = 18
9494
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
9595

0 commit comments

Comments
 (0)