Add a model for facial expression recognition (#100)

crywang · web-flow · commit 3c4f8c930807 · 2023-01-16T16:52:53.000+08:00
diff --git a/README.md b/README.md
@@ -19,6 +19,7 @@ Guidelines:
 | ------------------------------------------------------- | ----------------------------- | ---------- | -------------- | ------------ | --------------- | ------------ | ----------- |
 | [YuNet](./models/face_detection_yunet)                  | Face Detection                | 160x120    | 1.45           | 6.22         | 12.18           | 4.04         | 86.69       |
 | [SFace](./models/face_recognition_sface)                | Face Recognition              | 112x112    | 8.65           | 99.20        | 24.88           | 46.25        | ---         |
+| [FER](./models/facial_expression_recognition/)          | Facial Expression Recognition | 112x112    | 4.43           | 49.86        | 31.07           | 108.53\*     | ---         |
 | [LPD-YuNet](./models/license_plate_detection_yunet/)    | License Plate Detection       | 320x240    | ---            | 168.03       | 56.12           | 29.53        | ---         |
 | [YOLOX](./models/object_detection_yolox/)               | Object Detection              | 640x640    | 176.68         | 1496.70      | 388.95          | 420.98       | ---         |
 | [NanoDet](./models/object_detection_nanodet/)           | Object Detection              | 416x416    | 157.91         | 220.36       | 64.94           | 116.64       | ---         |
@@ -62,6 +63,10 @@ Some examples are listed below. You can find more in the directory of each model
 
 ![largest selfie](./models/face_detection_yunet/examples/largest_selfie.jpg)
 
+### Facial Expression Recognition with Progressive Teacher(./models/facial_expression_recognition/)
+
+![fer demo](./models/facial_expression_recognition/examples/selfie.jpg)
+
 ### Human Segmentation with [PP-HumanSeg](./models/human_segmentation_pphumanseg/)
 
 ![messi](./models/human_segmentation_pphumanseg/examples/messi.jpg)
diff --git a/benchmark/config/facial_expression_recognition.yaml b/benchmark/config/facial_expression_recognition.yaml
@@ -0,0 +1,16 @@
+Benchmark:
+  name: "Facial Expression Recognition Benchmark"
+  type: "Recognition"
+  data:
+    path: "benchmark/data/facial_expression_recognition/fer_evaluation"
+    files: ["RAF_test_0_61.jpg", "RAF_test_0_30.jpg", "RAF_test_6_25.jpg"]
+  metric: # 'sizes' is omitted since this model requires input of fixed size
+    warmup: 30
+    repeat: 10
+    reduction: "median"
+  backend: "default"
+  target: "cpu"
+
+Model:
+  name: "FacialExpressionRecog"
+  modelPath: "models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx"
diff --git a/benchmark/download_data.py b/benchmark/download_data.py
@@ -173,6 +173,10 @@ def get_confirm_token(response):  # in case of large files
         url='https://drive.google.com/u/0/uc?id=1BRIozREIzqkm_aMQ581j93oWoS-6TLST&export=download',
         sha='03892b9036c58d9400255ff73858caeec1f46609',
         filename='face_recognition.zip'),
+    facial_expression_recognition=Downloader(name='facial_expression_recognition',
+        url='https://drive.google.com/u/0/uc?id=13ZE0Pz302z1AQmBmYGuowkTiEXVLyFFZ&export=download',
+        sha='8f757559820c8eaa1b1e0065f9c3bbbd4f49efe2',
+        filename='facial_expression_recognition.zip'),
     text=Downloader(name='text',
         url='https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download',
         sha='a40cf095ceb77159ddd2a5902f3b4329696dd866',
diff --git a/benchmark/utils/dataloaders/recognition.py b/benchmark/utils/dataloaders/recognition.py
@@ -16,7 +16,10 @@ def __init__(self, **kwargs):
     def _load_label(self):
         labels = dict.fromkeys(self._files, None)
         for filename in self._files:
-            labels[filename] = np.loadtxt(os.path.join(self._path, '{}.txt'.format(filename[:-4])), ndmin=2)
+            if os.path.exists(os.path.join(self._path, '{}.txt'.format(filename[:-4]))):
+                labels[filename] = np.loadtxt(os.path.join(self._path, '{}.txt'.format(filename[:-4])), ndmin=2)
+            else:
+                labels[filename] = None
         return labels
 
     def __iter__(self):
diff --git a/benchmark/utils/metrics/recognition.py b/benchmark/utils/metrics/recognition.py
@@ -12,12 +12,20 @@ def forward(self, model, *args, **kwargs):
         img, bboxes = args
 
         self._timer.reset()
-        for idx, bbox in enumerate(bboxes):
+        if bboxes is not None:
+            for idx, bbox in enumerate(bboxes):
+                for _ in range(self._warmup):
+                    model.infer(img, bbox)
+                for _ in range(self._repeat):
+                    self._timer.start()
+                    model.infer(img, bbox)
+                    self._timer.stop()
+        else:
             for _ in range(self._warmup):
-                model.infer(img, bbox)
+                model.infer(img, None)
             for _ in range(self._repeat):
                 self._timer.start()
-                model.infer(img, bbox)
+                model.infer(img, None)
                 self._timer.stop()
 
         return self._getResult()
diff --git a/models/__init__.py b/models/__init__.py
@@ -14,6 +14,7 @@
 from .license_plate_detection_yunet.lpd_yunet import LPD_YuNet
 from .object_detection_nanodet.nanodet import NanoDet
 from .object_detection_yolox.yolox import YoloX
+from .facial_expression_recognition.facial_fer_model import FacialExpressionRecog
 
 class Registery:
     def __init__(self, name):
@@ -43,4 +44,4 @@ def register(self, item):
 MODELS.register(LPD_YuNet)
 MODELS.register(NanoDet)
 MODELS.register(YoloX)
-
+MODELS.register(FacialExpressionRecog)
diff --git a/models/facial_expression_recognition/README.md b/models/facial_expression_recognition/README.md
@@ -0,0 +1,40 @@
+
+# Progressive Teacher
+
+Progressive Teacher: [Boosting Facial Expression Recognition by A Semi-Supervised Progressive Teacher](https://scholar.google.com/citations?view_op=view_citation&hl=zh-CN&user=OCwcfAwAAAAJ&citation_for_view=OCwcfAwAAAAJ:u5HHmVD_uO8C)
+
+Note:
+- Progressive Teacher is contributed by [Jing Jiang](https://scholar.google.com/citations?user=OCwcfAwAAAAJ&hl=zh-CN).
+-  [MobileFaceNet](https://link.springer.com/chapter/10.1007/978-3-319-97909-0_46) is used as the backbone and the model is able to classify seven basic facial expressions (angry, disgust, fearful, happy, neutral, sad, surprised).
+- [facial_expression_recognition_mobilefacenet_2022july.onnx](https://github.com/opencv/opencv_zoo/raw/master/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx) is implemented thanks to [Chengrui Wang](https://github.com/opencv).
+
+Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html).
+
+| Models      | Accuracy | 
+|-------------|----------|
+| Progressive Teacher       | 88.27%  |
+
+
+## Demo
+
+***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep).
+
+Run the following command to try the demo:
+```shell
+# recognize the facial expression on images
+python demo.py --input /path/to/image
+```
+
+### Example outputs
+
+Note: Zoom in to to see the recognized facial expression in the top-left corner of each face boxes.
+
+![fer demo](./examples/selfie.jpg)
+
+## License
+
+All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
+
+## Reference
+
+- https://ieeexplore.ieee.org/abstract/document/9629313
diff --git a/models/facial_expression_recognition/demo.py b/models/facial_expression_recognition/demo.py
@@ -0,0 +1,131 @@
+import sys
+import argparse
+import copy
+import datetime
+
+import numpy as np
+import cv2 as cv
+
+from facial_fer_model import FacialExpressionRecog
+
+sys.path.append('../face_detection_yunet')
+from yunet import YuNet
+
+
+def str2bool(v):
+    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
+        return True
+    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
+        return False
+    else:
+        raise NotImplementedError
+
+
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
+
+parser = argparse.ArgumentParser(description='Facial Expression Recognition')
+parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-fm', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx', help='Path to the facial expression recognition model.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
+parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
+parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
+args = parser.parse_args()
+
+
+def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)):
+
+    print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res)))
+
+    output = image.copy()
+    landmark_color = [
+        (255,  0,   0),  # right eye
+        (0,    0, 255),  # left eye
+        (0,  255,   0),  # nose tip
+        (255,  0, 255),  # right mouth corner
+        (0,  255, 255)   # left mouth corner
+    ]
+
+    for ind, (det, fer_type) in enumerate(zip(det_res, fer_res)):
+        bbox = det[0:4].astype(np.int32)
+        fer_type = FacialExpressionRecog.getDesc(fer_type)
+        print("Face %2d: %d %d %d %d %s." % (ind, bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3], fer_type))
+        cv.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), box_color, 2)
+        cv.putText(output, fer_type, (bbox[0], bbox[1]+12), cv.FONT_HERSHEY_DUPLEX, 0.5, text_color)
+        landmarks = det[4:14].astype(np.int32).reshape((5, 2))
+        for idx, landmark in enumerate(landmarks):
+            cv.circle(output, landmark, 2, landmark_color[idx], 2)
+    return output
+
+
+def process(detect_model, fer_model, frame):
+    h, w, _ = frame.shape
+    detect_model.setInputSize([w, h])
+    dets = detect_model.infer(frame)
+
+    if dets is None:
+        return False, None, None
+
+    fer_res = np.zeros(0, dtype=np.int8)
+    for face_points in dets:
+        fer_res = np.concatenate((fer_res, fer_model.infer(frame, face_points[:-1])), axis=0)
+    return True, dets, fer_res
+
+
+if __name__ == '__main__':
+    detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx')
+
+    fer_model = FacialExpressionRecog(modelPath=args.model,
+                                      backendId=args.backend,
+                                      targetId=args.target)
+
+    # If input is an image
+    if args.input is not None:
+        image = cv.imread(args.input)
+
+        # Get detection and fer results
+        status, dets, fer_res = process(detect_model, fer_model, image)
+
+        if status:
+            # Draw results on the input image
+            image = visualize(image, dets, fer_res)
+
+        # Save results
+        if args.save:
+            cv.imwrite('result.jpg', image)
+            print('Results saved to result.jpg\n')
+
+        # Visualize results in a new window
+        if args.vis:
+            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
+            cv.imshow(args.input, image)
+            cv.waitKey(0)
+    else:  # Omit input to call default camera
+        deviceId = 0
+        cap = cv.VideoCapture(deviceId)
+
+        while cv.waitKey(1) < 0:
+            hasFrame, frame = cap.read()
+            if not hasFrame:
+                print('No frames grabbed!')
+                break
+
+            # Get detection and fer results
+            status, dets, fer_res = process(detect_model, fer_model, frame)
+
+            if status:
+                # Draw results on the input image
+                frame = visualize(frame, dets, fer_res)
+
+            # Visualize results in a new window
+            cv.imshow('FER Demo', frame)
diff --git a/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july-int8-quantized.onnx b/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july-int8-quantized.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:541597ca330e0e3babe883d0fa6ab121b0e3da65c9cc099c05ff274b3106a658
+size 1340132
diff --git a/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx b/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f61307602fc089ce20488a31d4e4614e3c9753a7d6c41578c854858b183e1a9
+size 4791892
diff --git a/models/facial_expression_recognition/facial_fer_model.py b/models/facial_expression_recognition/facial_fer_model.py
diff --git a/tools/quantize/inc_configs/fer.yaml b/tools/quantize/inc_configs/fer.yaml
diff --git a/tools/quantize/quantize-inc.py b/tools/quantize/quantize-inc.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:541597ca330e0e3babe883d0fa6ab121b0e3da65c9cc099c05ff274b3106a658`
	`3`	`+size 1340132`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:4f61307602fc089ce20488a31d4e4614e3c9753a7d6c41578c854858b183e1a9`
	`3`	`+size 4791892`