diff --git a/preprocessors/autour/Dockerfile b/preprocessors/autour/Dockerfile index a79f1c6bb..310573b66 100644 --- a/preprocessors/autour/Dockerfile +++ b/preprocessors/autour/Dockerfile @@ -11,6 +11,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/autour/ /app EXPOSE 5000 diff --git a/preprocessors/autour/autour.py b/preprocessors/autour/autour.py index 33b8a8656..fc1c9a708 100644 --- a/preprocessors/autour/autour.py +++ b/preprocessors/autour/autour.py @@ -15,20 +15,22 @@ # . import os -import json import time import logging -import jsonschema import requests from flask import Flask, request, jsonify from datetime import datetime from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) +# Initialize shared validator +VALIDATOR = Validator(data_schema='./schemas/preprocessors/autour.schema.json') + @app.route('/preprocessor', methods=['POST', 'GET']) def get_map_data(): @@ -36,35 +38,13 @@ def get_map_data(): Gets data on locations nearby a map from the Autour API """ logging.debug("Received request") - # Load schemas - with open('./schemas/preprocessors/autour.schema.json') as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definition_schema = json.load(jsonfile) - schema_store = { - data_schema['$id']: data_schema, - schema['$id']: schema, - definition_schema['$id']: definition_schema - } + content = request.get_json() - with open('./schemas/request.schema.json') as jsonfile: - request_schema = json.load(jsonfile) # Validate incoming request - resolver = jsonschema.RefResolver.from_schema( - request_schema, store=schema_store) - - validated = validate( - schema=request_schema, - data=content, - resolver=resolver, - json_message="Invalid Request JSON format", - error_code=400) - - if validated is not None: - return validated + ok, _ = VALIDATOR.check_request(content) + if not ok: + return jsonify("Invalid Request JSON format"), 400 # Check if request is for a map if 'coordinates' not in content and 'placeID' not in content: @@ -113,19 +93,10 @@ def get_map_data(): 'places': results, } - # Use response schema to validate response - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) - - validated = validate( - schema=data_schema, - data=data, - resolver=resolver, - json_message='Invalid Preprocessor JSON format', - error_code=500) - - if validated is not None: - return validated + # Validate preprocessor data against its schema + ok, _ = VALIDATOR.check_data(data) + if not ok: + return jsonify('Invalid Preprocessor JSON format'), 500 response = { 'request_uuid': request_uuid, @@ -134,45 +105,15 @@ def get_map_data(): 'data': data } - validated = validate( - schema=schema, - data=response, - resolver=resolver, - json_message='Invalid Preprocessor JSON format', - error_code=500) - - if validated is not None: - return validated + # Validate full response + ok, _ = VALIDATOR.check_response(response) + if not ok: + return jsonify('Invalid Preprocessor JSON format'), 500 logging.debug("Sending response") return response -def validate(schema, data, resolver, json_message, error_code): - """ - Validate a piece of data against a schema - - Args: - schema: a JSON schema to check against - data: the data to check - resolver: a JSON schema resolver - json_messaage: the error to jsonify and return - error_code: the error code to return - - Returns: - None or Tuple[flask.Response, int] - """ - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(data) - except jsonschema.exceptions.ValidationError as error: - logging.error("Validation error occurred") - logging.pii(f"Validation error: {error.message}") - return jsonify(json_message), error_code - - return None - - def get_coordinates(content): """ Retrieve the coordinates of a map from the diff --git a/preprocessors/celebrity-detector/Dockerfile b/preprocessors/celebrity-detector/Dockerfile index ea467b85b..cd961fd04 100644 --- a/preprocessors/celebrity-detector/Dockerfile +++ b/preprocessors/celebrity-detector/Dockerfile @@ -12,6 +12,7 @@ RUN pip3 install --upgrade pip && \ pip3 install -r /app/requirements.txt COPY /schemas /app/schemas +COPY /utils /app/utils COPY /preprocessors/celebrity-detector /app EXPOSE 5000 diff --git a/preprocessors/celebrity-detector/celebrity-detector.py b/preprocessors/celebrity-detector/celebrity-detector.py index 4d04242b5..720fdd72b 100644 --- a/preprocessors/celebrity-detector/celebrity-detector.py +++ b/preprocessors/celebrity-detector/celebrity-detector.py @@ -19,21 +19,23 @@ import operator import os -import json import time -import jsonschema import logging import base64 from flask import Flask, request, jsonify import cv2 import numpy as np from datetime import datetime +from utils.validation import Validator app = Flask(__name__) -# extract the required results from the API returned values +# Initialize shared validator once +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/celebrity.schema.json') +# extract the required results from the API returned values def process_results(response, labels): logging.debug(response) if not response["categories"]: @@ -120,30 +122,14 @@ def categorise(): logging.debug("Received request") # load the schema labels = ["other", "indoor", "outdoor", "people"] - with open('./schemas/preprocessors/celebrity.schema.json') \ - as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') \ - as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error(e) + + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 + request_uuid = content["request_uuid"] timestamp = time.time() preprocessor_name = "ca.mcgill.a11y.image.preprocessor.celebrityDetector" @@ -195,26 +181,24 @@ def categorise(): } final_data.append(celebrities) data = {"celebrities": final_data} - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(data) - except jsonschema.exceptions.ValidationError as e: - logging.error(e) + + # data schema validation + ok, _ = VALIDATOR.check_data(data) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + response = { "request_uuid": request_uuid, "timestamp": int(timestamp), "name": preprocessor_name, "data": data } - # validate the results to check if they are in correct format - try: - validator = jsonschema.Draft7Validator(schema, - resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error(e) + + # response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + logging.debug("Detected " + str(len(final_data)) + "celebrities out of " + str(len(objects)) + "objects") return response diff --git a/preprocessors/clothes-detector/Dockerfile b/preprocessors/clothes-detector/Dockerfile index ae8c19633..b163f71d7 100644 --- a/preprocessors/clothes-detector/Dockerfile +++ b/preprocessors/clothes-detector/Dockerfile @@ -31,6 +31,8 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas # COPY ./schemas /app/schemas +COPY /utils /app/utils + # good practice to remove the archive RUN wget https://image.a11y.mcgill.ca/models/clothesDetector/yolo.zip && \ unzip yolo.zip && \ diff --git a/preprocessors/clothes-detector/clothes.py b/preprocessors/clothes-detector/clothes.py index b6777681e..b9071f7b0 100644 --- a/preprocessors/clothes-detector/clothes.py +++ b/preprocessors/clothes-detector/clothes.py @@ -14,9 +14,7 @@ # If not, see # . -import json import time -import jsonschema import logging import base64 from flask import Flask, request, jsonify @@ -35,10 +33,15 @@ from yolo.utils.utils import load_classes from predictors.YOLOv3 import YOLOv3Predictor from datetime import datetime +from utils.validation import Validator app = Flask(__name__) logging.basicConfig(level=logging.NOTSET) +# Initialize shared validator +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/clothes.schema.json') + # code referred from # https://medium.com/codex/rgb-to-color-names-in-python-the-robust-way-ec4a9d97a01f @@ -102,30 +105,14 @@ def get_clothes(img): def categorise(): final_data = [] logging.debug("Received request") - with open('./schemas/preprocessors/clothes.schema.json') \ - as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') \ - as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error(e) + + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 + request_uuid = content["request_uuid"] timestamp = time.time() preprocessor_name = "ca.mcgill.a11y.image.preprocessor.clothesDetector" @@ -172,26 +159,23 @@ def categorise(): final_data.append(clothes) logging.info(final_data) data = {"clothes": final_data} - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(data) - except jsonschema.exceptions.ValidationError as e: - logging.error(e) + + # data schema validation + ok, _ = VALIDATOR.check_data(data) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + response = { "request_uuid": request_uuid, "timestamp": int(timestamp), "name": preprocessor_name, "data": data } - # validate the results to check if they are in correct format - try: - validator = jsonschema.Draft7Validator(schema, - resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error(e) + # response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + logging.debug("Sending response") return response diff --git a/preprocessors/collage-detector/Dockerfile b/preprocessors/collage-detector/Dockerfile index 6dccf794c..6a0857d70 100644 --- a/preprocessors/collage-detector/Dockerfile +++ b/preprocessors/collage-detector/Dockerfile @@ -12,6 +12,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/collage-detector/ /app EXPOSE 5000 diff --git a/preprocessors/collage-detector/detect.py b/preprocessors/collage-detector/detect.py index 0b0fb2515..ca94a5dab 100644 --- a/preprocessors/collage-detector/detect.py +++ b/preprocessors/collage-detector/detect.py @@ -19,51 +19,39 @@ import numpy as np import cv2 import base64 -import json -import jsonschema import logging import time from datetime import datetime from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) +# Initialize shared validator +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/collage-detector.schema.json' +) + @app.route('/preprocessor', methods=['POST']) def detect_collage(): logging.debug("Received request") - with open('./schemas/preprocessors/collage-detector.schema.json') \ - as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') \ - as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema(schema, store=schema_store) + content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message}") + + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 # check for image if "graphic" not in content: logging.info("Request is not a graphic. Skipping...") return "", 204 # No content + request_uuid = content["request_uuid"] timestamp = time.time() name = "ca.mcgill.a11y.image.preprocessor.collageDetector" @@ -73,18 +61,17 @@ def detect_collage(): binary = base64.b64decode(image_b64) image = np.asarray(bytearray(binary), dtype="uint8") img = cv2.imdecode(image, cv2.IMREAD_COLOR) + model = SbRIF(t=0.7) is_collage = model.inference(img) if is_collage: type = {"collage": True} else: type = {"collage": False} - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(type) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for model output") - logging.pii(f"Validation error: {e.message}") + + # data schema validation + ok, _ = VALIDATOR.check_data(type) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 response = { @@ -93,12 +80,10 @@ def detect_collage(): "name": name, "data": type } - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for response") - logging.pii(f"Validation error: {e.message} | Response: {response}") + + # response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.debug(type) diff --git a/preprocessors/content-categoriser/categoriser.py b/preprocessors/content-categoriser/categoriser.py index 95bb6f20c..28f335588 100644 --- a/preprocessors/content-categoriser/categoriser.py +++ b/preprocessors/content-categoriser/categoriser.py @@ -16,7 +16,6 @@ from flask import Flask, request, jsonify import time -import jsonschema import logging import sys from datetime import datetime @@ -25,7 +24,7 @@ LLMClient, CATEGORISER_PROMPT, POSSIBLE_CATEGORIES - ) +) from utils.validation import Validator import json @@ -55,9 +54,9 @@ def categorise(): # load the content and verify incoming data content = request.get_json() - try: - validator.validate_request(content) - except jsonschema.exceptions.ValidationError: + # request schema validation (check_* style) + ok, _ = validator.check_request(content) + if not ok: return jsonify({"error": "Invalid Preprocessor JSON format"}), 400 # check we received a graphic (e.g., not a map or chart request) @@ -92,9 +91,9 @@ def categorise(): # create data json and verify the content-categoriser schema is respected graphic_category_json = {"category": graphic_category} - try: - validator.validate_data(graphic_category_json) - except jsonschema.exceptions.ValidationError: + # data schema validation + ok, _ = validator.check_data(graphic_category_json) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 # create full response & check meets overall preprocessor response schema @@ -105,9 +104,9 @@ def categorise(): "data": graphic_category_json } - try: - validator.validate_response(response) - except jsonschema.exceptions.ValidationError: + # response envelope validation + ok, _ = validator.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 return response @@ -136,7 +135,7 @@ def warmup(): else: return jsonify( {"status": "error", "message": "Warmup failed"} - ), 500 + ), 500 except Exception as e: logging.error(f"Warmup endpoint failed: {str(e)}") return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/preprocessors/depth-map-gen/Dockerfile b/preprocessors/depth-map-gen/Dockerfile index 6c7fd40de..01a9806f8 100644 --- a/preprocessors/depth-map-gen/Dockerfile +++ b/preprocessors/depth-map-gen/Dockerfile @@ -10,7 +10,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y python3- RUN adduser --disabled-password python WORKDIR /app ENV PATH="/home/python/.local/bin:${PATH}" -ENV PYTHONPATH "${PYTHONPATH}:./AdelaiDepth/LeReS/Minist_Test/" +ENV PYTHONPATH="/app:${PYTHONPATH}:./AdelaiDepth/LeReS/Minist_Test/" ENV FLASK_APP=depth-map-generator.py ENV TORCH_HOME=/app @@ -23,6 +23,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas #config for pii COPY /config /app/config +COPY /utils /app/utils #model download, app code RUN git clone https://github.com/aim-uofa/AdelaiDepth && \ diff --git a/preprocessors/depth-map-gen/depth-map-generator.py b/preprocessors/depth-map-gen/depth-map-generator.py index 5424523d1..bda8d6342 100644 --- a/preprocessors/depth-map-gen/depth-map-generator.py +++ b/preprocessors/depth-map-gen/depth-map-generator.py @@ -24,19 +24,23 @@ import torchvision.transforms as transforms from flask import Flask, request, jsonify from collections import OrderedDict -import json import time -import jsonschema import logging import base64 from lib.multi_depth_model_woauxi import RelDepthModel from datetime import datetime from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() app = Flask(__name__) +# Initialize shared validator +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/depth-map-generator.schema.json' + ) + def parse_args(): parser = argparse.ArgumentParser( @@ -85,33 +89,13 @@ def scale_torch(img): @app.route("/preprocessor", methods=['POST', ]) def depthgenerator(): logging.debug("Received request") - # load the schema - with open('./schemas/preprocessors/depth-map-generator.schema.json') \ - as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') \ - as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - # Following 6 lines of code - # refered from - # https://stackoverflow.com/questions/42159346/jsonschema-refresolver-to-resolve-multiple-refs-in-python - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) + + # incoming request content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message}") + + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 # check content category from contentCategoriser @@ -131,6 +115,7 @@ def depthgenerator(): if "graphic" not in content: logging.info("Request is not a graphic. Skipping...") return "", 204 # No content + request_uuid = content["request_uuid"] timestamp = time.time() name = "ca.mcgill.a11y.image.preprocessor.depth-map-gen" @@ -184,12 +169,9 @@ def depthgenerator(): logging.pii(f"Inference error: {e}") return jsonify("Depth Model cannot complete"), 500 - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(depth) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for depth data") - logging.pii(f"Validation error: {e.message}") + # data schema validation + ok, _ = VALIDATOR.check_data(depth) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 response = { @@ -198,12 +180,10 @@ def depthgenerator(): "name": name, "data": depth } - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for final response") - logging.pii(f"Validation error: {e.message}") + + # response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 torch.cuda.empty_cache() diff --git a/preprocessors/graphic-caption/caption.py b/preprocessors/graphic-caption/caption.py index 14efb3957..4ae75488a 100644 --- a/preprocessors/graphic-caption/caption.py +++ b/preprocessors/graphic-caption/caption.py @@ -17,14 +17,13 @@ from flask import Flask, request, jsonify import sys import time -import jsonschema import logging from datetime import datetime from config.logging_utils import configure_logging from utils.llm import ( LLMClient, GRAPHIC_CAPTION_PROMPT - ) +) from utils.validation import Validator configure_logging() @@ -32,12 +31,11 @@ app = Flask(__name__) DATA_SCHEMA = './schemas/preprocessors/caption.schema.json' - PREPROCESSOR_NAME = "ca.mcgill.a11y.image.preprocessor.graphic-caption" try: llm_client = LLMClient() - validator = Validator(data_schema=DATA_SCHEMA) + VALIDATOR = Validator(data_schema=DATA_SCHEMA) logging.debug("LLM client and validator initialized") except Exception as e: logging.error(f"Failed to initialize clients: {e}") @@ -53,9 +51,9 @@ def categorise(): # load the content and verify incoming data content = request.get_json() - try: - validator.validate_request(content) - except jsonschema.exceptions.ValidationError: + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify({"error": "Invalid Preprocessor JSON format"}), 400 # check we received a graphic (e.g., not a map or chart request) @@ -87,9 +85,9 @@ def categorise(): # create data json and verify the content-categoriser schema is respected graphic_caption_json = {"caption": graphic_caption.strip()} - try: - validator.validate_data(graphic_caption_json) - except jsonschema.exceptions.ValidationError: + # data schema validation + ok, _ = VALIDATOR.check_data(graphic_caption_json) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 # create full response & check meets overall preprocessor response schema @@ -100,13 +98,13 @@ def categorise(): "data": graphic_caption_json } - try: - validator.validate_response(response) - except jsonschema.exceptions.ValidationError: + # response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 # all done; return to orchestrator - return jsonify(response) + return response @app.route("/health", methods=["GET"]) @@ -132,7 +130,7 @@ def warmup(): else: return jsonify( {"status": "error", "message": "Warmup failed"} - ), 500 + ), 500 except Exception as e: logging.error(f"Warmup endpoint failed: {str(e)}") return jsonify({"status": "error", "message": str(e)}), 500 diff --git a/preprocessors/graphic-tagger/Dockerfile b/preprocessors/graphic-tagger/Dockerfile index 7ed71a056..9a5060e46 100644 --- a/preprocessors/graphic-tagger/Dockerfile +++ b/preprocessors/graphic-tagger/Dockerfile @@ -12,6 +12,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/graphic-tagger /app EXPOSE 5000 diff --git a/preprocessors/graphic-tagger/azure_api.py b/preprocessors/graphic-tagger/azure_api.py index 1165d2bfd..3632c2432 100644 --- a/preprocessors/graphic-tagger/azure_api.py +++ b/preprocessors/graphic-tagger/azure_api.py @@ -19,24 +19,27 @@ import operator # import numpy as np -import json import time -import jsonschema import logging import base64 import os from flask import Flask, request, jsonify from datetime import datetime from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) -# extract the required results from the API returned values +# Initialize shared validator once +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/graphic-tagger.schema.json' +) +# extract the required results from the API returned values def process_results(response, labels): if not response["categories"]: return labels[0] @@ -51,10 +54,9 @@ def process_results(response, labels): else: return labels[0] + # this function takes in the image and send the image to Azure to get the # output - - def process_image(image, labels): region = "canadacentral" # For example, "westus" @@ -116,30 +118,12 @@ def categorise(): logging.debug("Received request") # load the schema labels = ["other", "indoor", "outdoor", "people"] - with open('./schemas/preprocessors/graphic-tagger.schema.json') \ - as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') \ - as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message}") + + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 request_uuid = content["request_uuid"] @@ -177,27 +161,22 @@ def categorise(): binary = base64.b64decode(image_b64) pred = process_image(image=binary, labels=labels) type = {"category": pred} - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(type) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for graphic tagger result") - logging.pii(f"Validation error: {e.message}") + + # data validation + ok, _ = VALIDATOR.check_data(type) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + response = { "request_uuid": request_uuid, "timestamp": int(timestamp), "name": name, "data": type } - # validate the results to check if they are in correct format - try: - validator = jsonschema.Draft7Validator(schema, - resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for final response") - logging.pii(f"Validation error: {e.message}") + + # response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.debug(type) diff --git a/preprocessors/grouping/Dockerfile b/preprocessors/grouping/Dockerfile index a43720de3..53585ee1d 100644 --- a/preprocessors/grouping/Dockerfile +++ b/preprocessors/grouping/Dockerfile @@ -11,6 +11,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/grouping/ /app EXPOSE 5000 diff --git a/preprocessors/grouping/grouping.py b/preprocessors/grouping/grouping.py index e7b1c1e58..dd1f75e85 100644 --- a/preprocessors/grouping/grouping.py +++ b/preprocessors/grouping/grouping.py @@ -15,20 +15,24 @@ # . from flask import Flask, request, jsonify -import json import time -import jsonschema import logging import collections from math import sqrt from operator import itemgetter from config.logging_utils import configure_logging from datetime import datetime +from utils.validation import Validator configure_logging() app = Flask(__name__) +# Initialize shared validator +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/grouping.schema.json' +) + def calculate_diagonal(x1, y1, x2, y2): # refered from @@ -44,30 +48,12 @@ def readImage(): dimensions = [] ungrouped = [] flag = 0 - with open('./schemas/preprocessors/grouping.schema.json') as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - # Following 6 lines refered from - # https://stackoverflow.com/questions/42159346/jsonschema-refresolver-to-resolve-multiple-refs-in-python - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) + content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message} | Data: {content}") + # request validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 preprocessor = content["preprocessors"] @@ -119,13 +105,11 @@ def readImage(): logging.debug("Number of ungrouped objects " + str(len(ungrouped))) data = {"grouped": final_group, "ungrouped": ungrouped} - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(data) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for grouped data") - logging.pii(f"Validation error: {e.message}") + # data validation + ok, _ = VALIDATOR.check_data(data) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + logging.debug("Sending response") response = { @@ -137,12 +121,9 @@ def readImage(): "data": data } - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for response schema") - logging.pii(f"Validation error: {e.message} | Response: {response}") + # response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 return response diff --git a/preprocessors/line-charts/Dockerfile b/preprocessors/line-charts/Dockerfile index 6d916110c..a5fd7ca34 100644 --- a/preprocessors/line-charts/Dockerfile +++ b/preprocessors/line-charts/Dockerfile @@ -14,6 +14,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/line-charts/ /app EXPOSE 5000 diff --git a/preprocessors/line-charts/charts.py b/preprocessors/line-charts/charts.py index ba26a4e3b..44c5eabb9 100644 --- a/preprocessors/line-charts/charts.py +++ b/preprocessors/line-charts/charts.py @@ -15,13 +15,11 @@ # . -import json import time import logging -import jsonschema from flask import Flask, request, jsonify from config.logging_utils import configure_logging - +from utils.validation import Validator from charts_utils import getLowerPointsOnLeft, getHigherPointsOnLeft from charts_utils import getLowerPointsOnRight, getHigherPointsOnRight from datetime import datetime @@ -31,6 +29,11 @@ app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) +# Initialize shared validator +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/line-charts.schema.json' +) + @app.route('/preprocessor', methods=['POST', 'GET']) def get_chart_info(): @@ -39,18 +42,7 @@ def get_chart_info(): functionality to be extended later """ logging.debug("Received request") - # Load schemas - with open('./schemas/preprocessors/line-charts.schema.json') as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definition_schema = json.load(jsonfile) - schema_store = { - data_schema['$id']: data_schema, - schema['$id']: schema, - definition_schema['$id']: definition_schema - } + content = request.get_json() # Check if request is for a chart @@ -58,24 +50,10 @@ def get_chart_info(): logging.info("Not a highcharts charts request. Skipping...") return "", 204 - with open('./schemas/request.schema.json') as jsonfile: - request_schema = json.load(jsonfile) - # Validate incoming request - resolver = jsonschema.RefResolver.from_schema( - request_schema, store=schema_store) - try: - validator = jsonschema.Draft7Validator( - request_schema, - resolver=resolver - ) - validator.validate(content) - except jsonschema.exceptions.ValidationError as error: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {error.message}") + # request validation (request.schema.json) + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Request JSON format"), 400 - # Use response schema to validate response - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) name = 'ca.mcgill.a11y.image.preprocessor.lineChart' request_uuid = content['request_uuid'] @@ -108,12 +86,9 @@ def get_chart_info(): data = {'dataPoints': series_data} - try: - validator = jsonschema.Draft7Validator(data_schema, resolver=resolver) - validator.validate(data) - except jsonschema.exceptions.ValidationError as error: - logging.error("Validation failed for proccessed data") - logging.pii(f"Validation error: {error.message}") + # data validation + ok, _ = VALIDATOR.check_data(data) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 response = { @@ -123,12 +98,9 @@ def get_chart_info(): 'data': data } - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as error: - logging.error("Validation failed for response") - logging.pii(f"Validation error: {error.message} | {response}") + # Validate response (preprocessor-response.schema.json) + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.debug("Sending response") diff --git a/preprocessors/mmsemseg/Dockerfile b/preprocessors/mmsemseg/Dockerfile index e5bda4535..6280418d5 100644 --- a/preprocessors/mmsemseg/Dockerfile +++ b/preprocessors/mmsemseg/Dockerfile @@ -43,6 +43,7 @@ RUN pip install --upgrade pip && \ COPY /schemas /app/schemas COPY /preprocessors/mmsemseg/ /app +COPY /utils /app/utils COPY /config /app/config # Download the model checkpoint diff --git a/preprocessors/mmsemseg/utils.py b/preprocessors/mmsemseg/mmseg_utils.py similarity index 100% rename from preprocessors/mmsemseg/utils.py rename to preprocessors/mmsemseg/mmseg_utils.py diff --git a/preprocessors/mmsemseg/segment.py b/preprocessors/mmsemseg/segment.py index 77689e860..7e41d1be6 100644 --- a/preprocessors/mmsemseg/segment.py +++ b/preprocessors/mmsemseg/segment.py @@ -19,8 +19,6 @@ from flask import Flask, request, jsonify import gc -import json -import jsonschema import base64 import torch @@ -29,13 +27,21 @@ import numpy as np import cv2 -from utils import visualize_result, findContour +from mmseg_utils import visualize_result, findContour from time import time import logging from config.logging_utils import configure_logging from datetime import datetime +from utils.validation import Validator +validator = Validator( + data_schema='./schemas/preprocessors/segmentation.schema.json', + request_schema='./schemas/request.schema.json', + response_schema='./schemas/preprocessor-response.schema.json', + definitions_schema='./schemas/definitions.json' +) + configure_logging() # configuration and checkpoint files BEIT_CONFIG = "/app/config/upernet_beit-base_8x2_640x640_160k_ade20k.py" @@ -120,25 +126,6 @@ def segment(): torch.cuda.empty_cache() dictionary = [] - # load the schemas - with open('./schemas/preprocessors/segmentation.schema.json') as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - # Following 6 lines refered from - # https://stackoverflow.com/questions/42159346/jsonschema-refresolver-to-resolve-multiple-refs-in-python - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) - logging.info("Schemas loaded") - # load the model try: model = init_segmentor(BEIT_CONFIG, BEIT_CHECKPOINT, device='cuda:0') @@ -152,12 +139,8 @@ def segment(): request_json = request.get_json() # validate the request - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(request_json) - except jsonschema.exceptions.ValidationError as e: - logging.error("Request validation failed") - logging.pii(f"Validation error: {e.message} | Data: {request_json}") + ok, _ = validator.check_request(request_json) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 if "graphic" not in request_json: @@ -180,9 +163,7 @@ def segment(): classifier_1_output = preprocess_output[classifier_1] classifier_1_label = classifier_1_output["category"] if classifier_1_label != "photograph": - logging.info( - "Not photograph content. Skipping...") - + logging.info("Not photograph content. Skipping...") return "", 204 if classifier_2 in preprocess_output: # classifier_2_output = preprocess_output[classifier_2] @@ -222,12 +203,8 @@ def segment(): torch.cuda.empty_cache() # validate the data format for the output - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(segment) - except jsonschema.exceptions.ValidationError as e: - logging.error("Data validation failed") - logging.pii(f"Validation error: {e.message} | Data: {segment}") + ok, _ = validator.check_data(segment) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 response = { @@ -238,24 +215,17 @@ def segment(): } # validate the output format - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Response validation failed") - logging.pii(f"Validation error: {e.message} | Response: {response}") + ok, _ = validator.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.info("Valid response generated") - return response @app.route("/health", methods=["GET"]) def health(): - """ - Health check endpoint to verify if the service is running - """ + """Health check endpoint to verify if the service is running""" return jsonify({ "status": "healthy", "timestamp": datetime.now().isoformat() @@ -264,9 +234,7 @@ def health(): @app.route("/warmup", methods=["GET"]) def warmup(): - """ - Warms up the segmentation model by running a dummy inference. - """ + """Warms up the segmentation model by running a dummy inference.""" try: # dummy black image (512×512) dummy_img = np.zeros((512, 512, 3), dtype=np.uint8) @@ -274,14 +242,11 @@ def warmup(): # runs inference_segmentor(): model weight loading/memory allocation model = init_segmentor(BEIT_CONFIG, BEIT_CHECKPOINT, device='cuda:0') _ = inference_segmentor(model, dummy_img) - torch.cuda.empty_cache() - return jsonify({ "status": "warmup successful", "timestamp": datetime.now().isoformat() }), 200 - except Exception as e: logging.pii(f"[WARMUP] Warmup failed: {e}") logging.exception("Warmup failed") diff --git a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py index 26a1ea1c5..aaa6fe4b8 100644 --- a/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py +++ b/preprocessors/multistage-diagram-segmentation/multistage-diagram-segmentation.py @@ -15,7 +15,6 @@ # If not, see # . -import jsonschema import logging import time from flask import Flask, request, jsonify @@ -93,10 +92,9 @@ def process_diagram(): ) return jsonify({"error": "Invalid request URL"}), 403 - # 1. Validate Incoming Request - try: - validator.validate_request(content) - except jsonschema.exceptions.ValidationError: + # 1. Validate Incoming Request via shared Validator + ok, _ = validator.check_request(content) + if not ok: return jsonify({"error": "Invalid Preprocessor JSON format"}), 400 request_uuid = content["request_uuid"] @@ -171,10 +169,9 @@ def process_diagram(): logging.info("Segmentation process did not yield any data.") final_data_json = base_json - # 7. Validate the Generated Data against its specific schema - try: - validator.validate_data(final_data_json) - except jsonschema.exceptions.ValidationError: + # 7. Data schema validation + ok, _ = validator.check_data(final_data_json) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 # 8. Construct the Final Response @@ -186,9 +183,8 @@ def process_diagram(): } # 9. Validate Final Response against System Schema - try: - validator.validate_response(response) - except jsonschema.exceptions.ValidationError: + ok, _ = validator.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.info( diff --git a/preprocessors/ner/Dockerfile b/preprocessors/ner/Dockerfile index 6e458acfd..28b8ba2a7 100644 --- a/preprocessors/ner/Dockerfile +++ b/preprocessors/ner/Dockerfile @@ -22,6 +22,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/ner/ /app EXPOSE 5000 diff --git a/preprocessors/ner/ner.py b/preprocessors/ner/ner.py index 768381010..1ccf71c67 100644 --- a/preprocessors/ner/ner.py +++ b/preprocessors/ner/ner.py @@ -21,11 +21,11 @@ import shutil import logging import tempfile -import jsonschema from bs4 import BeautifulSoup from flask import Flask, request, jsonify from config.logging_utils import configure_logging from datetime import datetime +from utils.validation import Validator import nltk import clipscore @@ -46,6 +46,8 @@ STANFORD_JAR = '/app/stanford-ner/stanford-ner.jar' STANFORD_MODEL = '/app/stanford-ner/ner-model-english.ser.gz' +VALIDATOR = Validator(data_schema='./schemas/preprocessors/ner.schema.json') + def save_image(my_html, name, out_dir): """ @@ -142,39 +144,13 @@ def find_first_index(word, arr): def main(): logging.debug("Received request") - with open('./schemas/preprocessors/ner.schema.json') as jsonfile: - data_schema = json.load(jsonfile) - - with open('./schemas/preprocessor-response.schema.json') as jsonfile: - schema = json.load(jsonfile) - - with open('./schemas/definitions.json') as jsonfile: - definition_schema = json.load(jsonfile) - - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - - schema_store = { - data_schema['$id']: data_schema, - schema['$id']: schema, - definition_schema['$id']: definition_schema - } - - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) - content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message}") + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 - # ------ START COMPUTATION ------ # - # check if 'graphic' and 'context' keys are in the content dict if 'graphic' in content and 'context' in content: name_ = "1" @@ -233,6 +209,11 @@ def main(): timestamp = time.time() name = "ca.mcgill.a11y.image.preprocessor.ner" + # data schema validation + ok, _ = VALIDATOR.check_data(data) + if not ok: + return jsonify("Invalid Preprocessor JSON format"), 500 + response = { 'request_uuid': request_uuid, 'timestamp': int(timestamp), @@ -240,20 +221,9 @@ def main(): 'data': data } - try: - validator = jsonschema.Draft7Validator(data_schema, resolver=resolver) - validator.validate(response['data']) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for response data") - logging.pii(f"Validation error: {e.message}") - return jsonify("Invalid Preprocessor JSON format"), 500 - - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for response") - logging.pii(f"Validation error: {e.message}") + # full response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.debug("Sending response") diff --git a/preprocessors/object-depth-calculator/Dockerfile b/preprocessors/object-depth-calculator/Dockerfile index 7ce833bbf..b09e438d8 100644 --- a/preprocessors/object-depth-calculator/Dockerfile +++ b/preprocessors/object-depth-calculator/Dockerfile @@ -11,6 +11,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/object-depth-calculator/ /app EXPOSE 5000 diff --git a/preprocessors/object-depth-calculator/object-depth-calculator.py b/preprocessors/object-depth-calculator/object-depth-calculator.py index f91777f2e..778ca5342 100644 --- a/preprocessors/object-depth-calculator/object-depth-calculator.py +++ b/preprocessors/object-depth-calculator/object-depth-calculator.py @@ -17,50 +17,32 @@ import cv2 import numpy as np from flask import Flask, request, jsonify -import json import time -import jsonschema import base64 import logging from datetime import datetime from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/object-depth-calculator.schema.json' +) + @app.route("/preprocessor", methods=['POST', ]) def objectdepth(): logging.debug("Received request") - # load the schema - with open('./schemas/preprocessors/object-depth-calculator.schema.json') \ - as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') \ - as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - # Following 6 lines of code - # refered from - # https://stackoverflow.com/questions/42159346/jsonschema-refresolver-to-resolve-multiple-refs-in-python - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) + content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message}") + + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 # check for depth-map @@ -88,13 +70,8 @@ def objectdepth(): "timestamp": int(time.time()), "renderings": [] } - try: - validator = jsonschema.Draft7Validator( - schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as error: - logging.error("Validation failed for response without dimensions") - logging.pii(f"Validation error: {error.message}") + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.debug("Sending response") return response @@ -146,26 +123,23 @@ def objectdepth(): obj_depth.append(dictionary) obj_depth_output = {"objects": obj_depth} - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(obj_depth_output) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for object depth output") - logging.pii(f"Validation error: {e.message}") + # data schema validation + ok, _ = VALIDATOR.check_data(obj_depth_output) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + response = { "request_uuid": request_uuid, "timestamp": int(timestamp), "name": name, "data": {"objects": obj_depth} } - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for final response") - logging.pii(f"Validation error: {e.message}") + + # envelope validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + return response diff --git a/preprocessors/object-detection-azure/Dockerfile b/preprocessors/object-detection-azure/Dockerfile index badfdaa5c..499bdc4db 100644 --- a/preprocessors/object-detection-azure/Dockerfile +++ b/preprocessors/object-detection-azure/Dockerfile @@ -14,6 +14,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/object-detection-azure /app EXPOSE 5000 diff --git a/preprocessors/object-detection-azure/objdetect.py b/preprocessors/object-detection-azure/objdetect.py index 463762aa9..2ebc10359 100644 --- a/preprocessors/object-detection-azure/objdetect.py +++ b/preprocessors/object-detection-azure/objdetect.py @@ -15,37 +15,25 @@ # . import requests # pip3 install requests -import json import time -import jsonschema - import logging import base64 import os from flask import Flask, request, jsonify from datetime import datetime from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/object-detection.schema.json' +) app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) -# load the schema -with open('./schemas/preprocessors/object-detection.schema.json') \ - as jsonfile: - data_schema = json.load(jsonfile) -with open('./schemas/preprocessor-response.schema.json') \ - as jsonfile: - schema = json.load(jsonfile) -with open('./schemas/definitions.json') as jsonfile: - definitionSchema = json.load(jsonfile) -with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) # normalise the xy coordinates - - def process_results(result): send = [] image_height = result["metadata"]["height"] @@ -128,21 +116,14 @@ def process_image(image): @app.route("/preprocessor", methods=['POST', ]) def categorise(): logging.debug("Received request") - schema_store = { - schema['$id']: schema, - definitionSchema['$id']: definitionSchema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) content = request.get_json() - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message}") + + # request schema validation + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 400 + request_uuid = content["request_uuid"] timestamp = time.time() name = "ca.mcgill.a11y.image.preprocessor.objectDetection" @@ -178,28 +159,24 @@ def categorise(): binary = base64.b64decode(image_b64) pred = process_image(image=binary) type = {"objects": pred} - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(type) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for processed image data") - logging.pii(f"Validation error: {e.message}") + + # data schema validation + ok, _ = VALIDATOR.check_data(type) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + response = { "request_uuid": request_uuid, "timestamp": int(timestamp), "name": name, "data": type } - # validate the output with schema - try: - validator = jsonschema.Draft7Validator(schema, - resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for final response") - logging.pii(f"Validation error: {e.message}") + + # validate full response + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 + logging.debug("Sending response") return response diff --git a/preprocessors/ocr/Dockerfile b/preprocessors/ocr/Dockerfile index 3743ce402..c76a81878 100644 --- a/preprocessors/ocr/Dockerfile +++ b/preprocessors/ocr/Dockerfile @@ -11,6 +11,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/ocr/ /app EXPOSE 5000 diff --git a/preprocessors/ocr/ocr.py b/preprocessors/ocr/ocr.py index 661db644b..4b4f9e55d 100644 --- a/preprocessors/ocr/ocr.py +++ b/preprocessors/ocr/ocr.py @@ -15,10 +15,8 @@ # . -import json import time import logging -import jsonschema import os import io import base64 @@ -33,12 +31,15 @@ process_azure_read_v4_preview ) from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) +VALIDATOR = Validator(data_schema='./schemas/preprocessors/ocr.schema.json') + @app.route('/preprocessor', methods=['POST', 'GET']) def get_ocr_text(): @@ -47,18 +48,6 @@ def get_ocr_text(): """ logging.debug("Received request") - # Load schemas - with open('./schemas/preprocessors/ocr.schema.json') as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definition_schema = json.load(jsonfile) - schema_store = { - data_schema['$id']: data_schema, - schema['$id']: schema, - definition_schema['$id']: definition_schema - } content = request.get_json() # Check if request is for a map @@ -66,24 +55,11 @@ def get_ocr_text(): logging.info("Map request. Skipping...") return "", 204 - with open('./schemas/request.schema.json') as jsonfile: - request_schema = json.load(jsonfile) # Validate incoming request - resolver = jsonschema.RefResolver.from_schema( - request_schema, store=schema_store) - try: - validator = jsonschema.Draft7Validator( - request_schema, - resolver=resolver - ) - validator.validate(content) - except jsonschema.exceptions.ValidationError as error: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {error.message}") + ok, _ = VALIDATOR.check_request(content) + if not ok: return jsonify("Invalid Request JSON format"), 400 - # Use response schema to validate response - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) + # Get OCR text response width = content['dimensions'][0] height = content['dimensions'][1] @@ -105,12 +81,9 @@ def get_ocr_text(): timestamp = int(time.time()) data = {'lines': ocr_result, 'cloud_service': cld_srv_optn} - try: - validator = jsonschema.Draft7Validator(data_schema, resolver=resolver) - validator.validate(data) - except jsonschema.exceptions.ValidationError as error: - logging.error("Validation failed for processed OCR data") - logging.pii(f"Validation error: {error.message}") + # data schema validation + ok, _ = VALIDATOR.check_data(data) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 response = { @@ -120,12 +93,9 @@ def get_ocr_text(): 'data': data } - try: - validator = jsonschema.Draft7Validator(schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as error: - logging.error("Validation failed for final response") - logging.pii(f"Validation error: {error.message}") + # full response validation + ok, _ = VALIDATOR.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.debug("Sending response") diff --git a/preprocessors/openstreetmap/Dockerfile b/preprocessors/openstreetmap/Dockerfile index 021e7cead..9bee0dd8d 100644 --- a/preprocessors/openstreetmap/Dockerfile +++ b/preprocessors/openstreetmap/Dockerfile @@ -12,6 +12,7 @@ RUN pip3 install --upgrade pip && \ COPY ./schemas /app/schemas COPY ./preprocessors/openstreetmap/ /app COPY ./config /app/config +COPY ./utils /app/utils EXPOSE 5000 diff --git a/preprocessors/openstreetmap/main.py b/preprocessors/openstreetmap/main.py index e9a4fac30..5487f1ae9 100644 --- a/preprocessors/openstreetmap/main.py +++ b/preprocessors/openstreetmap/main.py @@ -1,6 +1,4 @@ from flask import Flask, jsonify, request -import jsonschema -import json import logging from datetime import datetime from osm_service import ( @@ -13,10 +11,10 @@ get_amenities, enlist_POIs, OSM_preprocessor, - validate, get_coordinates, ) from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() @@ -24,6 +22,11 @@ app.config['JSON_SORT_KEYS'] = False logging.basicConfig(level=logging.DEBUG) +# Shared validator for this component +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/openstreetmap.schema.json' +) + @app.route('/preprocessor', methods=['POST', ]) def get_map_data(): @@ -31,46 +34,24 @@ def get_map_data(): Gets map data from OpenStreetMap """ logging.debug("Received request") - # Load schemas - with open('./schemas/preprocessors/openstreetmap.schema.json') as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definition_schema = json.load(jsonfile) - schema_store = { - data_schema['$id']: data_schema, - schema['$id']: schema, - definition_schema['$id']: definition_schema - } + try: content = request.get_json() logging.debug("Validating request") - with open('./schemas/request.schema.json') as jsonfile: - request_schema = json.load(jsonfile) - - # Validate incoming request - resolver = jsonschema.RefResolver.from_schema( - request_schema, store=schema_store) - - validated = validate( - schema=request_schema, - data=content, - resolver=resolver, - json_message="Invalid Request JSON format", - error_code=400 - ) - - if validated is not None: - return validated - except jsonschema.exceptions.ValidationError as e: + + # Validate incoming request (request schema) + ok, _ = VALIDATOR.check_request(content) + if not ok: + return jsonify("Invalid Request JSON format"), 400 + except Exception as e: logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message}") + logging.pii(f"Validation error: {e}") return jsonify("Invalid Request JSON format"), 400 time_stamp = int(get_timestamp()) name = "ca.mcgill.a11y.image.preprocessor.openstreetmap" request_uuid = content["request_uuid"] + # Check if this request is for an openstreetmap if 'coordinates' not in content and 'placeID' not in content: logging.info("Not map content. Skipping...") @@ -97,12 +78,14 @@ def get_map_data(): } OSM_data = get_streets(bbox_coordinates) amenity = get_amenities(bbox_coordinates) + # initialize empty response response = { "request_uuid": request_uuid, "timestamp": time_stamp, "name": name - } + } + if OSM_data is not None: processed_OSM_data = process_streets_data(OSM_data, bbox_coordinates) if processed_OSM_data is None: @@ -120,37 +103,34 @@ def get_map_data(): "points_of_interest": POIs, "streets": streets } - logging.debug("Validating response data") - validated = validate( - schema=data_schema, - data=response["data"], - resolver=resolver, - json_message='Invalid Preprocessor JSON format', - error_code=500) - if validated is not None: - return validated - """ - # 'streets', 'points_of_interest' and 'bounds' are - # required fields as per the schema - elif amenity is not None and len(amenity) != 0: - response = { - "request_uuid": request_uuid, - "timestamp": time_stamp, - "name": name, - "data": { - "bounds": header_info, - "points_of_interest": amenity - } + + # Validate data payload (component schema) + ok, _ = VALIDATOR.check_data(response["data"]) + if not ok: + return jsonify('Invalid Preprocessor JSON format'), 500 + + """ + # 'streets', 'points_of_interest' and 'bounds' are + # required fields as per the schema + elif amenity is not None and len(amenity) != 0: + response = { + "request_uuid": request_uuid, + "timestamp": time_stamp, + "name": name, + "data": { + "bounds": header_info, + "points_of_interest": amenity } - else: - response = { - "request_uuid": request_uuid, - "timestamp": time_stamp, - "name": name, - "data": { - "bounds": header_info - } + } + else: + response = { + "request_uuid": request_uuid, + "timestamp": time_stamp, + "name": name, + "data": { + "bounds": header_info } + } elif OSM_data is None and amenity is not None: response = { "request_uuid": request_uuid, @@ -162,18 +142,16 @@ def get_map_data(): } } """ + if "data" not in response: logging.debug("Map data is empty for location. Skipping...") return "", 204 - logging.debug("Validating response") - validated = validate( - schema=schema, - data=response, - resolver=resolver, - json_message='Invalid Preprocessor JSON format', - error_code=500) - if validated is not None: - return validated + + # Validate final response + ok, _ = VALIDATOR.check_response(response) + if not ok: + return jsonify('Invalid Preprocessor JSON format'), 500 + logging.debug("Sending final response") return response diff --git a/preprocessors/openstreetmap/osm_service.py b/preprocessors/openstreetmap/osm_service.py index 2084744a9..78f7a5f2d 100644 --- a/preprocessors/openstreetmap/osm_service.py +++ b/preprocessors/openstreetmap/osm_service.py @@ -985,6 +985,8 @@ def compute_street_length(processed_OSM_data): def validate(schema, data, resolver, json_message, error_code): """ + DEPRECATED: use utils.validation.Validator instead + Validate a piece of data against a schema Args: schema: a JSON schema to check against diff --git a/preprocessors/resize-graphic/Dockerfile b/preprocessors/resize-graphic/Dockerfile index 05c170b5e..cbfa60513 100644 --- a/preprocessors/resize-graphic/Dockerfile +++ b/preprocessors/resize-graphic/Dockerfile @@ -25,6 +25,7 @@ RUN pip3 install --upgrade pip && \ # Copy the schema and config files COPY /schemas /usr/src/app/schemas COPY /config /usr/src/app/config +COPY /utils /usr/src/app/utils # Copy your preprocessor application code COPY /preprocessors/resize-graphic /usr/src/app diff --git a/preprocessors/resize-graphic/resize-graphic.py b/preprocessors/resize-graphic/resize-graphic.py index c4e21d52c..9eed629d3 100644 --- a/preprocessors/resize-graphic/resize-graphic.py +++ b/preprocessors/resize-graphic/resize-graphic.py @@ -14,8 +14,6 @@ # If not, see # . -import jsonschema -import json import logging import time import base64 @@ -25,6 +23,7 @@ from datetime import datetime from config.logging_utils import configure_logging from config.process_image import process_image +from utils.validation import Validator configure_logging() @@ -33,26 +32,9 @@ # Preprocessor Name PREPROCESSOR_NAME = "ca.mcgill.a11y.image.request" # Required for pseudo -# Load schemas once at startup -with open('./schemas/preprocessors/modify-request.schema.json') as f: - DATA_SCHEMA = json.load(f) -with open('./schemas/preprocessor-response.schema.json') as f: - RESPONSE_SCHEMA = json.load(f) -with open('./schemas/definitions.json') as f: - DEFINITIONS_SCHEMA = json.load(f) -with open('./schemas/request.schema.json') as f: - REQUEST_SCHEMA = json.load(f) - -# Build resolver store using loaded schemas -# Following 7 lines of code are referred from -# https://stackoverflow.com/questions/42159346/jsonschema-refresolver-to-resolve-multiple-refs-in-python -SCHEMA_STORE = { - RESPONSE_SCHEMA['$id']: RESPONSE_SCHEMA, - DEFINITIONS_SCHEMA['$id']: DEFINITIONS_SCHEMA - } -RESOLVER = jsonschema.RefResolver.from_schema( - RESPONSE_SCHEMA, store=SCHEMA_STORE - ) +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/modify-request.schema.json' +) @app.route("/preprocessor", methods=['POST']) @@ -71,15 +53,9 @@ def resize_graphic(): return jsonify({"message": "No graphic content"}), 204 # 1. Validate Incoming Request - try: - # Validate input against REQUEST_SCHEMA - validator = jsonschema.Draft7Validator( - REQUEST_SCHEMA, resolver=RESOLVER - ) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message} | Data: {content}") + ok, _ = VALIDATOR.check_request(content) + if not ok: + logging.error("Request validation failed.") return jsonify({"error": "Request not in the appropriate format"}), 400 request_uuid = content["request_uuid"] @@ -91,15 +67,18 @@ def resize_graphic(): graphic_data = content["graphic"] if ',' in graphic_data: graphic_data = graphic_data.split(',', 1)[1] + try: new_graphic = process_image( graphic_data, (max_size, max_size), "PNG" ) - except Exception as e: - logging.error(f"Failed to process image: {str(e)}") + except Exception as err: + logging.error("Failed to process image") + logging.debug(f"[image.process] {err}") return jsonify({"error": "Failed to process image"}), 422 + # Convert image to base64 data URL format buffer = BytesIO() new_graphic.save(buffer, format='PNG') @@ -107,19 +86,12 @@ def resize_graphic(): encoded_data = base64.b64encode(buffer.getvalue()).decode('utf-8') new_b64_graphic = f"data:image/png;base64,{encoded_data}" - data = { - "graphic": new_b64_graphic - } + data = {"graphic": new_b64_graphic} - # 3. Check modification - try: - validator = jsonschema.Draft7Validator(DATA_SCHEMA) - validator.validate(data) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for request modification") - logging.pii( - f"Validation error: {e.message} | Data: {data}" - ) + # 3. Validate component data payload + ok, _ = VALIDATOR.check_data(data) + if not ok: + logging.error("Data validation failed for request modification.") return jsonify({"error": "Request not in the appropriate format"}), 400 # 4. Construct and check response @@ -130,22 +102,12 @@ def resize_graphic(): "data": data } - try: - validator = jsonschema.Draft7Validator( - RESPONSE_SCHEMA, resolver=RESOLVER - ) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed. Are schemas out of date?") - logging.pii( - f"Validation error: {e.message} | Response: {response}" - ) + ok, _ = VALIDATOR.check_response(response) + if not ok: + logging.error("Response validation failed. Are schemas out of date?") return jsonify({"error": "Failed to Create Response"}), 500 - logging.info( - f"Modified 'graphic' in request {request_uuid}." - ) - # logging.pii(response) + logging.info(f"Modified 'graphic' in request {request_uuid}.") return jsonify(response), 200 diff --git a/preprocessors/sorting/Dockerfile b/preprocessors/sorting/Dockerfile index 465713bd9..8201050b8 100644 --- a/preprocessors/sorting/Dockerfile +++ b/preprocessors/sorting/Dockerfile @@ -12,6 +12,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /app/schemas COPY /config /app/config +COPY /utils /app/utils COPY /preprocessors/sorting/ /app EXPOSE 5000 diff --git a/preprocessors/sorting/sorting.py b/preprocessors/sorting/sorting.py index c0a9cbca3..4b63509f3 100644 --- a/preprocessors/sorting/sorting.py +++ b/preprocessors/sorting/sorting.py @@ -15,13 +15,12 @@ # . from flask import Flask, request, jsonify -import json import time -import jsonschema import logging from math import sqrt from datetime import datetime from config.logging_utils import configure_logging +from utils.validation import Validator configure_logging() @@ -29,9 +28,12 @@ app = Flask(__name__) logging.basicConfig(level=logging.DEBUG) -# this function determines the size of bounding box +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/sorting.schema.json' +) +# this function determines the size of bounding box def calculate_diagonal(x1, y1, x2, y2): diag = sqrt((x2 - x1)**2 + (y2 - y1)**2) return diag @@ -50,31 +52,13 @@ def readImage(): left_id = [] small_id = [] centroid = [] - # loading schemas to check of the received and returned outputs are correct - with open('./schemas/preprocessors/sorting.schema.json') as jsonfile: - data_schema = json.load(jsonfile) - with open('./schemas/preprocessor-response.schema.json') as jsonfile: - schema = json.load(jsonfile) - with open('./schemas/definitions.json') as jsonfile: - definition_schema = json.load(jsonfile) - with open('./schemas/request.schema.json') as jsonfile: - first_schema = json.load(jsonfile) - # Following 6 lines of code are refered from - # https://stackoverflow.com/questions/42159346/jsonschema-refresolver-to-resolve-multiple-refs-in-python - schema_store = { - schema['$id']: schema, - definition_schema['$id']: definition_schema - } - resolver = jsonschema.RefResolver.from_schema( - schema, store=schema_store) + content = request.get_json() - # check if received input is correct - try: - validator = jsonschema.Draft7Validator(first_schema, resolver=resolver) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: + # request schema validation + ok, err = VALIDATOR.check_request(content) + if not ok: logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message}") + logging.debug(f"[request.validation] {err}") return jsonify("Invalid Preprocessor JSON format"), 400 preprocessor = content["preprocessors"] @@ -83,6 +67,7 @@ def readImage(): logging.info("Object detection output not " "available. Skipping...") return "", 204 + oDpreprocessor = \ preprocessor["ca.mcgill.a11y.image.preprocessor.objectDetection"] objects = oDpreprocessor["objects"] @@ -111,13 +96,12 @@ def readImage(): name = "ca.mcgill.a11y.image.preprocessor.sorting" data = {"leftToRight": left_id, "topToBottom": top_id, "smallToBig": small_id} - # verify the output format - try: - validator = jsonschema.Draft7Validator(data_schema) - validator.validate(data) - except jsonschema.exceptions.ValidationError as e: + + # data schema validation + ok, err = VALIDATOR.check_data(data) + if not ok: logging.error("Validation failed for processed data") - logging.pii(f"Validation error: {e.message}") + logging.debug(f"[data.validation] {err}") return jsonify("Invalid Preprocessor JSON format"), 500 response = { @@ -126,13 +110,12 @@ def readImage(): "name": name, "data": data } - try: - validator = jsonschema.Draft7Validator( - schema, resolver=resolver) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: + + # response schema validation + ok, err = VALIDATOR.check_response(response) + if not ok: logging.error("Validation failed for final response") - logging.pii(f"Validation error: {e.message}") + logging.debug(f"[response.validation] {err}") return jsonify("Invalid Preprocessor JSON format"), 500 logging.debug("Sending response") diff --git a/preprocessors/text-followup/text-followup.py b/preprocessors/text-followup/text-followup.py index 81e7a639e..dcc8b1d7c 100644 --- a/preprocessors/text-followup/text-followup.py +++ b/preprocessors/text-followup/text-followup.py @@ -17,7 +17,6 @@ from flask import Flask, request, jsonify import json import time -import jsonschema import logging import os import sys @@ -90,9 +89,8 @@ def followup(): # load the content and verify incoming data content = request.get_json() - try: - validator.validate_request(content) - except jsonschema.exceptions.ValidationError: + ok, _ = validator.check_request(content) + if not ok: return jsonify({"error": "Invalid Preprocessor JSON format"}), 400 # check we received a graphic (e.g., not a map or chart request) @@ -237,9 +235,9 @@ def followup(): ) # check if LLM returned valid json that follows schema - try: - validator.validate_data(followup_response_json) - except jsonschema.exceptions.ValidationError: + # validate data + ok, _ = validator.check_data(followup_response_json) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 # create full response & check meets overall preprocessor response schema @@ -250,9 +248,8 @@ def followup(): "data": followup_response_json } - try: - validator.validate_response(response) - except jsonschema.exceptions.ValidationError: + ok, _ = validator.check_response(response) + if not ok: return jsonify("Invalid Preprocessor JSON format"), 500 logging.debug("full response length: " + str(len(response))) diff --git a/preprocessors/yolo/Dockerfile b/preprocessors/yolo/Dockerfile index 799f9dc3e..197f3fa1f 100644 --- a/preprocessors/yolo/Dockerfile +++ b/preprocessors/yolo/Dockerfile @@ -15,6 +15,7 @@ RUN pip3 install --upgrade pip && \ COPY /schemas /usr/src/app/schemas COPY /config /usr/src/app/config +COPY /utils /usr/src/app/utils # Create model directory RUN mkdir -p /usr/src/app/models diff --git a/preprocessors/yolo/detect.py b/preprocessors/yolo/detect.py index 83cfea757..943df31c4 100644 --- a/preprocessors/yolo/detect.py +++ b/preprocessors/yolo/detect.py @@ -16,19 +16,18 @@ # . from datetime import datetime -import json import time import logging import os import traceback from flask import Flask, request, jsonify -import jsonschema import base64 import io from PIL import Image from ultralytics import YOLO import torch from config.logging_utils import configure_logging +from utils.validation import Validator # Create Flask app app = Flask(__name__) @@ -51,26 +50,9 @@ else: device, device_name = 'cpu', 'CPU' -# Load schemas once at startup -with open('./schemas/preprocessors/object-detection.schema.json') as f: - DATA_SCHEMA = json.load(f) -with open('./schemas/preprocessor-response.schema.json') as f: - RESPONSE_SCHEMA = json.load(f) -with open('./schemas/definitions.json') as f: - DEFINITIONS_SCHEMA = json.load(f) -with open('./schemas/request.schema.json') as f: - REQUEST_SCHEMA = json.load(f) - -# Build resolver store using loaded schemas -# Following 7 lines of code are referred from -# https://stackoverflow.com/questions/42159346/jsonschema-refresolver-to-resolve-multiple-refs-in-python -SCHEMA_STORE = { - RESPONSE_SCHEMA['$id']: RESPONSE_SCHEMA, - DEFINITIONS_SCHEMA['$id']: DEFINITIONS_SCHEMA - } -RESOLVER = jsonschema.RefResolver.from_schema( - RESPONSE_SCHEMA, store=SCHEMA_STORE - ) +VALIDATOR = Validator( + data_schema='./schemas/preprocessors/object-detection.schema.json' +) def decode_image(graphic_data): @@ -142,15 +124,11 @@ def format_detection_results(results): def detect(): # Get JSON content from the request content = request.get_json() - try: - # Validate input against REQUEST_SCHEMA - validator = jsonschema.Draft7Validator( - REQUEST_SCHEMA, resolver=RESOLVER - ) - validator.validate(content) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for incoming request") - logging.pii(f"Validation error: {e.message} | Data: {content}") + # request schema validation + ok, err = VALIDATOR.check_request(content) + if not ok: + logging.error("Request validation failed.") + logging.debug(f"[request.validation] {err}") return jsonify("Invalid Preprocessor JSON format"), 400 # Check if there is graphic content to process @@ -199,12 +177,10 @@ def detect(): return "", 204 # Validate YOLO output against the object detection data schema - try: - validator = jsonschema.Draft7Validator(DATA_SCHEMA) - validator.validate(objects) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for detection data") - logging.pii(f"Validation error: {e.message} | Data: {objects}") + ok, err = VALIDATOR.check_data(objects) + if not ok: + logging.error("Validation failed for detection data.") + logging.debug(f"[data.validation] {err}") return jsonify("Invalid Preprocessor JSON format"), 500 # Create full response following preprocessor response schema @@ -214,14 +190,11 @@ def detect(): "name": name, "data": objects } - try: - validator = jsonschema.Draft7Validator( - RESPONSE_SCHEMA, resolver=RESOLVER - ) - validator.validate(response) - except jsonschema.exceptions.ValidationError as e: - logging.error("Validation failed for full response") - logging.pii(f"Validation error: {e.message} | Response: {response}") + # response schema validation + ok, err = VALIDATOR.check_response(response) + if not ok: + logging.error("Response validation failed. Are schemas out of date?") + logging.debug(f"[response.validation] {err}") return jsonify("Invalid Preprocessor JSON format"), 500 logging.pii(response) diff --git a/utils/validation/validator.py b/utils/validation/validator.py index 51ccc4cde..e59a7bb3c 100644 --- a/utils/validation/validator.py +++ b/utils/validation/validator.py @@ -1,6 +1,16 @@ """ Simple validator module for IMAGE project components. Provides a lightweight validator class for JSON schema validation. + +validate_*: +- raises jsonschema.ValidationError on failure (try/except style) +- logs failures +check_*: +- calls validate_* +- catches the exception and instead returns a (ok, err) tuple +- avoids duplicate “Validation failed” lines (since validate_* logs). + +In the route you just map failure -> HTTP status. """ import json @@ -10,6 +20,7 @@ PREPROCESSOR_RESPONSE_SCHEMA = './schemas/preprocessor-response.schema.json' DEFINITIONS_SCHEMA = './schemas/definitions.json' REQUEST_SCHEMA = './schemas/request.schema.json' +HANDLER_RESPONSE_SCHEMA = './schemas/handler-response.schema.json' class Validator: @@ -29,6 +40,23 @@ def __init__( self._load_schemas() self._setup_resolver() + self._compile() + + def _compile(self): + """ + Create and cache Draft7Validator instances once. + To avoids re-creating validator objects on every call to + validate_request/validate_data/validate_response + """ + self._v_request = jsonschema.Draft7Validator( + self.request_schema, + resolver=self.resolver + ) + self._v_data = jsonschema.Draft7Validator(self.data_schema) + self._v_response = jsonschema.Draft7Validator( + self.response_schema, + resolver=self.resolver + ) def _load_schemas(self): """Load all required schemas.""" @@ -64,26 +92,21 @@ def _setup_resolver(self): } self.resolver = jsonschema.RefResolver.from_schema( - self.response_schema, store=schema_store + self.response_schema, + store=schema_store ) logging.debug("Schema resolver initialized") def validate_request(self, data): """ Validate request data. - Args: data: Request data to validate - Raises: - jsonschema.exceptions.ValidationError: If validation fails + jsonschema.exceptions.ValidationError: If validation fails. """ try: - validator = jsonschema.Draft7Validator( - self.request_schema, - resolver=self.resolver - ) - validator.validate(data) + self._v_request.validate(data) logging.debug("Request validation successful") except jsonschema.exceptions.ValidationError as e: logging.error("Validation failed for incoming request") @@ -93,16 +116,13 @@ def validate_request(self, data): def validate_data(self, data): """ Validate processed data. - Args: data: Processed data to validate - Raises: jsonschema.exceptions.ValidationError: If validation fails """ try: - validator = jsonschema.Draft7Validator(self.data_schema) - validator.validate(data) + self._v_data.validate(data) logging.debug("Data validation successful") except jsonschema.exceptions.ValidationError as e: logging.error("Validation failed for output data") @@ -111,22 +131,49 @@ def validate_data(self, data): def validate_response(self, data): """ - Validate final response. - + Validate final response Args: - data: Response data to validate - + data: Full response to validate Raises: - jsonschema.exceptions.ValidationError: If validation fails + jsonschema.exceptions.ValidationError: If validation fails. """ try: - validator = jsonschema.Draft7Validator( - self.response_schema, - resolver=self.resolver - ) - validator.validate(data) + self._v_response.validate(data) logging.debug("Response validation successful") except jsonschema.exceptions.ValidationError as e: logging.error("Validation failed for full response") logging.pii(f"Validation error: {e.message} | Response: {data}") raise + + def check_request(self, data): + """ + Validate request data; return (ok, err). + Logs on failure via validate_request(). + """ + try: + self.validate_request(data) + return True, None + except jsonschema.exceptions.ValidationError as e: + return False, str(e) + + def check_data(self, data): + """ + Validate component data payload; return (ok, err). + Logs on failure via validate_data(). + """ + try: + self.validate_data(data) + return True, None + except jsonschema.exceptions.ValidationError as e: + return False, str(e) + + def check_response(self, data): + """ + Validate final response envelope; return (ok, err). + Logs on failure via validate_response(). + """ + try: + self.validate_response(data) + return True, None + except jsonschema.exceptions.ValidationError as e: + return False, str(e)