-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel_utilities.py
144 lines (109 loc) · 6.09 KB
/
model_utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Imports
from tqdm import tqdm
# PyTorch Imports
import torch
import torchvision
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.anchor_utils import AnchorGenerator
from torchinfo import summary
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
# COCO Imports
from coco_eval import CocoEvaluator, convert_to_coco_api
# Model Class: LoggiBarcodeDetectionModel
class LoggiBarcodeDetectionModel(torch.nn.Module):
def __init__(self, min_img_size=800, max_img_size=1333, nr_classes=2, backbone="resnet101", backbone_pretrained=True):
super(LoggiBarcodeDetectionModel, self).__init__()
# Init variables
self.nr_classes = nr_classes
self.backbone = backbone
self.backbone_pretrained = backbone_pretrained
# Select the backbone
if self.backbone == "mobilenet_v2":
# Source: https://pytorch.org/vision/stable/_modules/torchvision/models/detection/mask_rcnn.html#maskrcnn_resnet50_fpn
# Load a pre-trained model for classification and return only the features
backbone_ = torchvision.models.mobilenet_v2(
pretrained=self.backbone_pretrained).features
# MaskRCNN needs to know the number of output channels in a backbone
# For mobilenet_v2, it's 1280, so we need to add it here
backbone_.out_channels = 1280
# Let's make the RPN generate 5 x 3 anchors per spatial location, with 5 different sizes and 3 different aspect ratios
# We have a Tuple[Tuple[int]] because each feature map could potentially have different sizes and aspect ratios
anchor_generator = AnchorGenerator(
sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),))
# Let's define what are the feature maps that we will use to perform the region of interest cropping, as well as the size of the crop after rescaling
# If your backbone returns a Tensor, featmap_names is expected to be ['0']
# More generally, the backbone should return an OrderedDict[Tensor], and in featmap_names you can choose which feature maps to use
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
featmap_names=['0'], output_size=7, sampling_ratio=2)
mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(
featmap_names=['0'], output_size=14, sampling_ratio=2)
# Put the pieces together inside a MaskRCNN model
self.model = MaskRCNN(backbone_, min_size=min_img_size, max_size=max_img_size, num_classes=2,
rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_roi_pooler)
# You can add your backbones here...
# elif self.backbone == "your_backbone_name"
elif self.backbone == "resnet101":
print("Using resnet backbone..")
backbone_ = resnet_fpn_backbone('resnext101_32x8d', pretrained=True, trainable_layers=0)
# modules = list(backbone_.children())[:-1]
# backbone_ = torch.nn.Sequential(*modules)
# for param in backbone_.parameters():
# param.requires_grad = False
backbone_.out_channels = 256
anchor_generator = AnchorGenerator(
sizes=((32, 64, 128, 256, 512)), aspect_ratios=((0.5, 1.0, 2.0)))
# Let's define what are the feature maps that we will use to perform the region of interest cropping, as well as the size of the crop after rescaling
# If your backbone returns a Tensor, featmap_names is expected to be ['0']
# More generally, the backbone should return an OrderedDict[Tensor], and in featmap_names you can choose which feature maps to use
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
featmap_names=['0'], output_size=7, sampling_ratio=2)
mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(
featmap_names=['0'], output_size=14, sampling_ratio=2)
# Put the pieces together inside a MaskRCNN model
self.model = MaskRCNN(backbone_, min_size=min_img_size, max_size=max_img_size, num_classes=2,
rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
mask_roi_pool=mask_roi_pooler)
return
def forward(self, inputs, targets=None):
# Compute outputs
if targets and self.training:
outputs = self.model(inputs, targets)
else:
outputs = self.model(inputs)
return outputs
def summary(self):
return summary(self.model, (1, 3, 1024, 1024))
@torch.inference_mode()
def evaluate(model, data_loader, device):
print("\nEvaluating...")
n_threads = torch.get_num_threads()
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
# coco = get_coco_api_from_dataset(data_loader.dataset)
coco = convert_to_coco_api(data_loader.dataset)
iou_types = ['bbox', 'segm']
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in tqdm(data_loader):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()}
for t in outputs]
res = {target["image_id"].item(): output for target,
output in zip(targets, outputs)}
coco_evaluator.update(res)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator
# Function: Compute VISUM 2022 Competition Metric
def visum2022score(bboxes_mAP, masks_mAP, bboxes_mAP_weight=0.5):
# Compute masks_mAP_weight from bboxes_mAP_weight
masks_mAP_weight = 1 - bboxes_mAP_weight
# Compute score, i.e., score = 0.5*bboxes_mAP + 0.5*masks_mAP
score = (bboxes_mAP_weight * bboxes_mAP) + (masks_mAP_weight * masks_mAP)
return score