AOSSIE-Org · Rishab87 · Jan 19, 2025 · Jan 19, 2025
diff --git a/backend/app/facenet/facenet.py b/backend/app/facenet/facenet.py
@@ -7,8 +7,10 @@
 from app.yolov8.YOLOv8 import YOLOv8
 from app.database.faces import insert_face_embeddings
 
+providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if onnxruntime.get_device() == 'GPU' else ['CPUExecutionProvider']
+
 session = onnxruntime.InferenceSession(
-    DEFAULT_FACENET_MODEL, providers=["CPUExecutionProvider"]
+    DEFAULT_FACENET_MODEL, providers = providers
 )
 
 input_tensor_name = session.get_inputs()[0].name
@@ -23,7 +25,7 @@ def get_face_embedding(image):
 
 def detect_faces(img_path):
     yolov8_detector = YOLOv8(
-        DEFAULT_FACE_DETECTION_MODEL, conf_thres=0.2, iou_thres=0.3
+        DEFAULT_FACE_DETECTION_MODEL, conf_thres=0.35, iou_thres=0.45
     )
     img = cv2.imread(img_path)
     if img is None:
@@ -34,9 +36,13 @@ def detect_faces(img_path):
 
     processed_faces, embeddings = [], []
     for box, score in zip(boxes, scores):
-        if score > 0.5:
+        if score > 0.3:
             x1, y1, x2, y2 = map(int, box)
             face_img = img[y1:y2, x1:x2]
+            padding = 20
+            h, w = face_img.shape[:2]
+            face_img = img[max(0, y1-padding):min(img.shape[0], y2+padding), 
+                           max(0, x1-padding):min(img.shape[1], x2+padding)]
             processed_face = preprocess_image(face_img)
             processed_faces.append(processed_face)
             embedding = get_face_embedding(processed_face)

diff --git a/backend/app/facenet/preprocess.py b/backend/app/facenet/preprocess.py
@@ -8,12 +8,11 @@ def preprocess_image(image):
     image = image.transpose((2, 0, 1))
     image = np.expand_dims(image, axis=0)
     image = image.astype(np.float32)
-    image /= 255.0
+    image = (image - 127.5) / 128.0 
     return image
 
 def normalize_embedding(embedding):
     return embedding / np.linalg.norm(embedding)
 
 def cosine_similarity(embedding1, embedding2):
-    return np.dot(embedding1, embedding2)
-
+    return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
diff --git a/backend/app/routes/facetagging.py b/backend/app/routes/facetagging.py
@@ -23,7 +23,7 @@ def face_matching():
                 for embedding1 in img1_data["embeddings"]:
                     for embedding2 in img2_data["embeddings"]:
                         similarity = cosine_similarity(embedding1, embedding2)
-                        if similarity >= 0.5:
+                        if similarity >= 0.7:
                             img1 = img1_data["image_path"].split("/")[-1]
                             img2 = img2_data["image_path"].split("/")[-1]
                             similar_pairs.append(

diff --git a/backend/app/utils/classification.py b/backend/app/utils/classification.py
@@ -4,7 +4,7 @@
 
 
 def get_classes(img_path):
-    yolov8_detector = YOLOv8(DEFAULT_OBJ_DETECTION_MODEL, conf_thres=0.2, iou_thres=0.3)
+    yolov8_detector = YOLOv8(DEFAULT_OBJ_DETECTION_MODEL, conf_thres=0.4, iou_thres=0.5)
     img = cv2.imread(img_path)
     if img is None:
         print(f"Failed to load image: {img_path}")

diff --git a/backend/app/yolov8/YOLOv8.py b/backend/app/yolov8/YOLOv8.py
@@ -19,8 +19,9 @@ def __call__(self, image):
         return self.detect_objects(image)
 
     def initialize_model(self, path):
+        providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if onnxruntime.get_device() == 'GPU' else ['CPUExecutionProvider']
         self.session = onnxruntime.InferenceSession(path,
-                                                    providers=onnxruntime.get_available_providers())
+                                                    providers=providers)
         # Get model info
         self.get_input_details()
         self.get_output_details()

diff --git a/backend/app/yolov8/utils.py b/backend/app/yolov8/utils.py
@@ -82,7 +82,7 @@ def xywh2xyxy(x):
     return y
 
 
-def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
+def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3, confidence_threshold=0.3):
     det_img = image.copy()
 
     img_height, img_width = image.shape[:2]
@@ -93,11 +93,13 @@ def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
 
     # Draw bounding boxes and labels of detections
     for class_id, box, score in zip(class_ids, boxes, scores):
-        color = colors[class_id]
-
+        if score < confidence_threshold or class_id >= len(class_names) - 1:
+            color = colors[-1] 
+            label = "unknown"
+        else:
+            color = colors[class_id]
+            label = class_names[class_id]
         draw_box(det_img, box, color)
-
-        label = class_names[class_id]
         caption = f'{label} {int(score * 100)}%'
         draw_text(det_img, caption, box, color, font_size, text_thickness)