diff --git a/backend/app/facenet/facenet.py b/backend/app/facenet/facenet.py index 189c3448..df2f7e56 100644 --- a/backend/app/facenet/facenet.py +++ b/backend/app/facenet/facenet.py @@ -7,8 +7,10 @@ from app.yolov8.YOLOv8 import YOLOv8 from app.database.faces import insert_face_embeddings +providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if onnxruntime.get_device() == 'GPU' else ['CPUExecutionProvider'] + session = onnxruntime.InferenceSession( - DEFAULT_FACENET_MODEL, providers=["CPUExecutionProvider"] + DEFAULT_FACENET_MODEL, providers = providers ) input_tensor_name = session.get_inputs()[0].name @@ -23,7 +25,7 @@ def get_face_embedding(image): def detect_faces(img_path): yolov8_detector = YOLOv8( - DEFAULT_FACE_DETECTION_MODEL, conf_thres=0.2, iou_thres=0.3 + DEFAULT_FACE_DETECTION_MODEL, conf_thres=0.35, iou_thres=0.45 ) img = cv2.imread(img_path) if img is None: @@ -34,9 +36,13 @@ def detect_faces(img_path): processed_faces, embeddings = [], [] for box, score in zip(boxes, scores): - if score > 0.5: + if score > 0.3: x1, y1, x2, y2 = map(int, box) face_img = img[y1:y2, x1:x2] + padding = 20 + h, w = face_img.shape[:2] + face_img = img[max(0, y1-padding):min(img.shape[0], y2+padding), + max(0, x1-padding):min(img.shape[1], x2+padding)] processed_face = preprocess_image(face_img) processed_faces.append(processed_face) embedding = get_face_embedding(processed_face) diff --git a/backend/app/facenet/preprocess.py b/backend/app/facenet/preprocess.py index 34382b97..7a64ce2c 100644 --- a/backend/app/facenet/preprocess.py +++ b/backend/app/facenet/preprocess.py @@ -8,12 +8,11 @@ def preprocess_image(image): image = image.transpose((2, 0, 1)) image = np.expand_dims(image, axis=0) image = image.astype(np.float32) - image /= 255.0 + image = (image - 127.5) / 128.0 return image def normalize_embedding(embedding): return embedding / np.linalg.norm(embedding) def cosine_similarity(embedding1, embedding2): - return np.dot(embedding1, embedding2) - + return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2)) diff --git a/backend/app/routes/facetagging.py b/backend/app/routes/facetagging.py index 035c2130..bc1da76f 100644 --- a/backend/app/routes/facetagging.py +++ b/backend/app/routes/facetagging.py @@ -23,7 +23,7 @@ def face_matching(): for embedding1 in img1_data["embeddings"]: for embedding2 in img2_data["embeddings"]: similarity = cosine_similarity(embedding1, embedding2) - if similarity >= 0.5: + if similarity >= 0.7: img1 = img1_data["image_path"].split("/")[-1] img2 = img2_data["image_path"].split("/")[-1] similar_pairs.append( diff --git a/backend/app/utils/classification.py b/backend/app/utils/classification.py index b73c96b1..5a3ac2ed 100644 --- a/backend/app/utils/classification.py +++ b/backend/app/utils/classification.py @@ -4,7 +4,7 @@ def get_classes(img_path): - yolov8_detector = YOLOv8(DEFAULT_OBJ_DETECTION_MODEL, conf_thres=0.2, iou_thres=0.3) + yolov8_detector = YOLOv8(DEFAULT_OBJ_DETECTION_MODEL, conf_thres=0.4, iou_thres=0.5) img = cv2.imread(img_path) if img is None: print(f"Failed to load image: {img_path}") diff --git a/backend/app/yolov8/YOLOv8.py b/backend/app/yolov8/YOLOv8.py index 4a5c40e0..07f34252 100644 --- a/backend/app/yolov8/YOLOv8.py +++ b/backend/app/yolov8/YOLOv8.py @@ -19,8 +19,9 @@ def __call__(self, image): return self.detect_objects(image) def initialize_model(self, path): + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if onnxruntime.get_device() == 'GPU' else ['CPUExecutionProvider'] self.session = onnxruntime.InferenceSession(path, - providers=onnxruntime.get_available_providers()) + providers=providers) # Get model info self.get_input_details() self.get_output_details() diff --git a/backend/app/yolov8/utils.py b/backend/app/yolov8/utils.py index 3e4d0f71..5c5d53f9 100644 --- a/backend/app/yolov8/utils.py +++ b/backend/app/yolov8/utils.py @@ -82,7 +82,7 @@ def xywh2xyxy(x): return y -def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3): +def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3, confidence_threshold=0.3): det_img = image.copy() img_height, img_width = image.shape[:2] @@ -93,11 +93,13 @@ def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3): # Draw bounding boxes and labels of detections for class_id, box, score in zip(class_ids, boxes, scores): - color = colors[class_id] - + if score < confidence_threshold or class_id >= len(class_names) - 1: + color = colors[-1] + label = "unknown" + else: + color = colors[class_id] + label = class_names[class_id] draw_box(det_img, box, color) - - label = class_names[class_id] caption = f'{label} {int(score * 100)}%' draw_text(det_img, caption, box, color, font_size, text_thickness)