diff --git a/docker-compose.yml b/docker-compose.yml index 1e6fc6d4..6071fe82 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -181,7 +181,7 @@ services: image: ghcr.io/shared-reality-lab/image-preprocessor-object-detection-llm:${REGISTRY_TAG} restart: "no" environment: - - CONF_THRESHOLD=0.9 + - CONF_THRESHOLD=0.8 - PII_LOGGING_ENABLED=${PII_LOGGING_ENABLED} - WARMUP_ENABLED=true labels: diff --git a/preprocessors/object-detection-llm/Dockerfile b/preprocessors/object-detection-llm/Dockerfile index c39cbd74..834f5f03 100644 --- a/preprocessors/object-detection-llm/Dockerfile +++ b/preprocessors/object-detection-llm/Dockerfile @@ -25,4 +25,4 @@ ENV FLASK_APP=object-detection-llm.py HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=5 CMD curl -f http://localhost:5000/health || exit 1 -CMD [ "gunicorn", "object-detection-llm:app", "-b", "0.0.0.0:5000", "--capture-output", "--log-level=debug" ] \ No newline at end of file +CMD [ "gunicorn", "object-detection-llm:app", "-b", "0.0.0.0:5000", "--capture-output", "--log-level=debug", "--timeout", "75"] \ No newline at end of file diff --git a/preprocessors/object-detection-llm/object-detection-llm.py b/preprocessors/object-detection-llm/object-detection-llm.py index bd475166..76e0e366 100644 --- a/preprocessors/object-detection-llm/object-detection-llm.py +++ b/preprocessors/object-detection-llm/object-detection-llm.py @@ -68,33 +68,50 @@ def normalize_bbox(bbox, width, height): ] -def filter_objects_by_confidence(objects, threshold): +def process_objects(objects, threshold): """ - Filter objects based on confidence score threshold - and replace underscores in labels with spaces. + Process detected objects by filtering, transforming, and enriching them. + + - Filters objects by confidence threshold + - Normalizes labels (replaces underscores with spaces) + - Renumbers IDs sequentially + - Calculates geometric properties (area, centroid) Args: objects (list): List of detected objects with confidence scores threshold (float): Minimum confidence score (0-1) Returns: - list: Filtered list of objects meeting the confidence threshold + list: Processed objects with computed properties """ - filtered = [] + processed = [] for obj in objects: if obj.get("confidence", 0) >= threshold: obj['type'] = obj['type'].replace('_', ' ') - filtered.append(obj) + processed.append(obj) # Renumber IDs sequentially after filtering - for idx, obj in enumerate(filtered): + for idx, obj in enumerate(processed): obj['ID'] = idx + x1, y1, x2, y2 = obj["dimensions"] + + # Calculate area (width * height) + area = (x2 - x1) * (y2 - y1) + + # Calculate centroid + centroid_x = (x1 + x2) / 2 + centroid_y = (y1 + y2) / 2 + + # Create object entry according to schema + obj["area"] = area + obj["centroid"] = [centroid_x, centroid_y] + logging.debug( - f"Filtered {len(objects)} objects to {len(filtered)} " + f"Processed {len(objects)} objects to {len(processed)} " f"objects with confidence >= {threshold}" ) - return filtered + return processed @app.route("/preprocessor", methods=['POST']) @@ -148,8 +165,6 @@ def detect_objects(): parse_json=True ) - logging.pii(f"LLM object detection output: {object_json}") - if object_json is None or len(object_json.get("objects", [])) == 0: logging.error("Failed to extract objects from the graphic.") return jsonify({"error": "No objects extracted"}), 204 @@ -162,8 +177,9 @@ def detect_objects(): obj["dimensions"], width, height ) - # Filter objects by confidence threshold - object_json["objects"] = filter_objects_by_confidence( + # Filter objects by confidence threshold, add area and centroid, + # remove underscores from labels, and renumber IDs + object_json["objects"] = process_objects( object_json["objects"], CONF_THRESHOLD ) diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index 2e2d669f..c6b95124 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -45,6 +45,7 @@ 3. Use simple and common object labels (e.g., "car", "person", "tree"). 4. Include only objects that are clearly visible and identifiable. 5. Focus on the major and important objects in the image. +6. Multiple objects can have the same confidence score. """ ###