From a44703bca66b391f777e8f3257312c1494dbf45e Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 12 Sep 2025 07:47:43 +0100
Subject: [PATCH 01/16] Removed old code + minor refactoring + added extra
 input handles to API.

---
 medcat-service/.gitignore                     |  13 +-
 medcat-service/README.md                      |  65 ++-
 medcat-service/env/app.env                    |   3 +-
 medcat-service/env/app_deid.env               |   5 +-
 medcat-service/env/general.env                |   2 +-
 medcat-service/env/medcat.env                 |  25 +-
 medcat-service/env/medcat_deid.env            |  25 +-
 medcat-service/export_env_vars.sh             |  26 +-
 medcat-service/medcat_service/config.py       | 108 ++++-
 medcat-service/medcat_service/main.py         |   4 +-
 .../nlp_processor/medcat_processor.py         | 428 +++++-------------
 .../medcat_service/routers/legacy.py          |  36 --
 .../medcat_service/routers/process.py         |  26 +-
 medcat-service/medcat_service/test/common.py  |   6 +-
 medcat-service/medcat_service/types.py        |   8 +-
 15 files changed, 329 insertions(+), 451 deletions(-)
 delete mode 100644 medcat-service/medcat_service/routers/legacy.py

diff --git a/medcat-service/.gitignore b/medcat-service/.gitignore
index 79caf4bf0..10609cd52 100644
--- a/medcat-service/.gitignore
+++ b/medcat-service/.gitignore
@@ -1,10 +1,13 @@
+# IDE envs and system folders
 .DS_Store
+**/.DS_Store
 .idea
 .vscode
+.venv
+.ruff_cache
+**__pycache__
 venv
 venv-test
-tmp_cat.log
-__pycache__
 .mypy_cache
 *.pyc
 .pyc
@@ -21,7 +24,11 @@ models/examples/example-deid-model-pack
 tmp/*
 
 # env folder
-env/*
+.env
+*.env
+**/.env
+**/*.env
 
 # log files
+tmp_cat.log
 medcat.log
diff --git a/medcat-service/README.md b/medcat-service/README.md
index dbd630f4e..fc1517160 100644
--- a/medcat-service/README.md
+++ b/medcat-service/README.md
@@ -1,26 +1,28 @@
-# Introduction
+# MedCAT Service
 
-This project implements the [MedCAT](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/) NLP application as a service behind a REST API. The general idea is to be able send the text to MedCAT NLP service and receive back the annotations. The REST API is built using [Flask](https://flask.palletsprojects.com/).
+## Introduction
+
+This project implements the [MedCAT](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/) NLP application as a service behind a REST API. The general idea is to be able send the text to MedCAT NLP service and receive back the annotations. The REST API is built using [FastAPI](https://github.com/fastapi).
 
 Git Branches:
-  - devel: development branch, latest updates and features, might be unstable.
-  - main: stable releases
-  - legacy: branch for old MedCAT version (pre v1.0, new models wont work, only v0.x models will)
 
+- devel: development branch, latest updates and features, might be unstable.
+- main: stable releases
+- legacy: branch for old MedCAT version (pre v1.0, new models wont work, only v0.x models will)
 
 Feel free to ask questions on the github issue tracker or on our [discourse website](https://discourse.cogstack.org) which is frequently used by our development team!
 
-# API specification
+## API specification
 
 The API definition follows the one defined in [CogStack GATE NLP Service](https://github.com/CogStack/gate-nlp-service/). Currently, there are 3 endpoints defined, that consume and return data in JSON format:
+
 - *GET* `/api/info` - displays general information about the MedCAT application,
 - *POST* `/api/process` - processes the provided documents and returns back the annotations,
 - *POST* `/api/process_bulk` - processes the provided list of documents and returns back the annotations.
 
 The full specification is available is [OpenAPI](https://github.com/CogStack/gate-nlp-service/tree/devel/api-specs) specification.
 
-
-# Running the application
+## Running the application
 
 The application can be run either as a standalone Python application or as running inside the Docker container (recommended).
 
@@ -29,12 +31,13 @@ The application can be run either as a standalone Python application or as runni
 Please note that prior running the application a number of requirements need to installed (see: `requirements.txt`).
 
 There are two scripts provided implementing starting the application:
+
 - `start_service_debug.sh` - starts the application in the development mode
 - `start_service_production.sh` - starts the application in 'production' mode and using `gunicorn` server.
 
 ## Running in a Docker container
 
-The recommended way to run the application is to use the provided Docker image. The Docker image can be either downloaded from the Docker Hub (`cogstacksystems/medcat-service:latest`) or build manually using the provided `Dockerfile`. 
+The recommended way to run the application is to use the provided Docker image. The Docker image can be either downloaded from the Docker Hub (`cogstacksystems/medcat-service:latest`) or build manually using the provided `Dockerfile`.
 Please note that by default the built docker image will run the Flask application in 'production' mode running `start_service_production.sh` script.
 
 To build the Docker image manually:
@@ -44,20 +47,21 @@ To build the Docker image manually:
 To run the container using the built image:
 
 ```
-docker run -it -p 5000:5000 \
-  --env-file=envs/env_app --env-file=envs/env_medcat \
-  -v <models-local-dir>:/cat/models:ro \
-  cogstacksystems/medcat-service:latest
+  docker run -it -p 5000:5000 \
+    --env-file=envs/env_app --env-file=envs/env_medcat \
+    -v <models-local-dir>:/cat/models:ro \
+    cogstacksystems/medcat-service:latest
 ```
 
-By default the MedCAT service will be running on port `5000`. MedCAT models will be mounted from local directory `<models-local-dir>` into the container at `/cat/models`. 
+By default the MedCAT service will be running on port `5000`. MedCAT models will be mounted from local directory `<models-local-dir>` into the container at `/cat/models`.
 
 ### GPU support
 
 If you have a gpu and wish to use it, please change the `docker/docker-compose.yml` file, use the `cogstacksystems/medcat-service-gpu:latest` image or change the `build:` directive to build `../Dockerfile_gpu`.
 
 ### <span style="color:red">IMPORTANT !</span>
-If you wish to run this docker service manually, use the docker/docker-compose.yml file, execute `docker compose up -d` whilst in the `docker` folder. 
+
+If you wish to run this docker service manually, use the docker/docker-compose.yml file, execute `docker compose up -d` whilst in the `docker` folder.
 
 Alternatively, an example script `./docker/run_example_medmen.sh` was provided to run the Docker container with MedCAT service. The script will download an example model (using the `./scripts/download_medmen.sh` script),it will use an example environment configuration, then it will build and start the service using the provided Docker Compose file, the service <b><span style="color:red">WONT WORK</span></b> without the model being present.
 
@@ -65,7 +69,8 @@ All models should be mounted from the `models/` folder.
 
 <br>
 
-### Manual docker start-up steps:
+### Manual docker start-up steps
+
 ```
   1. cd ./models/
   2. bash ./download_medmen.sh
@@ -73,7 +78,9 @@ All models should be mounted from the `models/` folder.
   4. docker compose up -d
   DONE!
 ```
+
 Or, if you wish to use the above mentioned script ( the sample model is downloaded via script, you don't need to do anything):
+
 ```
   1. cd ./docker/
   2. bash ./run_example_medmen.sh
@@ -83,6 +90,7 @@ Or, if you wish to use the above mentioned script ( the sample model is download
 # API Example use
 
 Assuming that the application is running on the `localhost` with the API exposed on port `5000`, one can run:
+
 ```
 curl -XPOST http://localhost:5000/api/process \
   -H 'Content-Type: application/json' \
@@ -106,9 +114,9 @@ and the received result:
 
 Additional DE-ID query sample (make sure you have a de-id model loaded):
 
-curl -XPOST http://localhost:5555/api/process \
+curl -XPOST <http://localhost:5555/api/process> \
   -H 'Content-Type: application/json' \
-  -d '{"content":{"text":"Patient Information: Full Name: John Michael Doe \n Gender: Male \n Date of Birth: January 15, 1975 (Age: 49) \n Patient ID: 567890123 \n Address: 1234 Elm Street, Springfield, IL 62701 \n Phone Number: (555) 123-4567 \n Email: johnmdoe@example.com \n Emergency Contact: Jane Doe (Wife) \n Phone: (555) 987-6543 \n Relationship: Spouse"}}'
+  -d '{"content":{"text":"Patient Information: Full Name: John Michael Doe \n Gender: Male \n Date of Birth: January 15, 1975 (Age: 49) \n Patient ID: 567890123 \n Address: 1234 Elm Street, Springfield, IL 62701 \n Phone Number: (555) 123-4567 \n Email: <johnmdoe@example.com> \n Emergency Contact: Jane Doe (Wife) \n Phone: (555) 987-6543 \n Relationship: Spouse"}}'
 
 Make sure you have the following option enabled in `envs/env_medcat` , `DEID_MODE=True`.
 
@@ -120,7 +128,7 @@ curl -XPOST http://localhost:5000/api/process_bulk \
  -d '{"content": [{"text":"The patient was diagnosed with leukemia."}, {"text": "The patient was diagnosed with cancer."}] }'
 ```
 
-example bulk result : 
+example bulk result :
 
 ```
 {
@@ -275,14 +283,18 @@ As the changes from MedCAT intoduced dictionary annotation/entity output.
 
 The mode in which annotation entities should be outputted in the JSON response,
    by default this was outputted as a "list" of dicts in older versions, so the output would be :
+
    ```
     {"annotations": [{"id": "0", "cui" : "C1X..", ..}, {"id":"1", "cui": "...."}]}
    ```
+
    newer versions of MedCAT (1.2+) output entities as a dict, where the id of the entity is a key and the rest of the data is a value, so for "dict",
    the output is
+
    ```
     {"annotations": [{"0": {"cui": "C0027361", "id": 0,.....}, "1": {"cui": "C001111", "id": 1......}}]}
    ```
+
 This setting can be configured in the ```./env/env_medcat``` file, using the ```ANNOTATIONS_ENTITY_OUTPUT_MODE``` variable.
 By default, the output of these entities is set to respect the output of the MedCAT package, hence the latter will be used. Please change the above mentioned env variable and make sure your CogStack-Nifi annotation script is adapted accordingly.
 <br>
@@ -290,33 +302,37 @@ Please note that the returned NLP annotations will depend on the underlying mode
 <br>
 <br>
 
-# Configuration
+## Configuration
 
 In the current implementation, configuration for both MedCAT Service application and MedCAT NLP library is based on environment variables. These will be provided usually in two files in `env` directory:
+
 - `env_app` - configuration of MedCAT Service app,
 - `env_medcat` - configuration of MedCAT library.
 
 Both files allow tailoring MedCAT for specific use-cases. When running MedCAT Service, these variables need to be loaded into the current working environment.
 
 ## spaCy models
+
 When using MedCAT for a different language than English, it can be useful to use a different spaCy model. A spaCy model can be included in the MedCAT model pack, but when not using this functionality, it can be useful to install models in the Docker image. This can be done by setting a build-time variable. See the `SPACY_MODELS` variable in [Dockerfile](Dockerfile) for default value and usage.
 
-## MedCAT Service
+## Service Environment vars
+
 MedCAT Service application are defined in `envs/env_app` file.
 
 The following environment variables are available for tailoring the MedCAT Service `gunicorn` server:
+
 - `SERVER_HOST` - specifies the host address (default: `0.0.0.0`),
 - `SERVER_PORT` - the port number used (default: `5000`),
 - `SERVER_WORKERS` - the number of workers serving the Flask app working in parallel (default: `1` ; only used in production server).
 - `SERVER_WORKER_TIMEOUT` - the max timeout (in sec) for receiving response from worker (default: `300` ; only used with production server).
 
 The following environment variables are available for tailoring the MedCAT Service wrapper:
-- `APP_MODEL_NAME` - an informative name of the model used by MedCAT (optional), 
+
+- `APP_MODEL_NAME` - an informative name of the model used by MedCAT (optional),
 - `APP_MODEL_CDB_PATH` - the path to the model's concept database,
 - `APP_MODEL_VOCAB_PATH` - the path to the model's vocabulary,
 - `APP_MODEL_META_PATH_LIST` - the list of paths to meta-annotation models, each separated by `:` character (optional),
 - `APP_BULK_NPROC` - the number of threads used in bulk processing (default: `8`),
-- `APP_TRAINING_MODE` - whether to run the application with MedCAT in training mode (default: `False`).
 - `APP_MEDCAT_MODEL_PACK` -  MedCAT Model Pack path, if this parameter has a value IT WILL BE LOADED FIRST OVER EVERYTHING ELSE (CDB, Vocab, MetaCATs, etc.) declared above.
 
 ## Performance Tuning
@@ -325,6 +341,7 @@ Theres a range of factors that might impact the performance of this service, the
 The main settings that can be used to improve the performance when querying large amounts of documents are : `SERVER_WORKERS` (number of flask web workers that chan handle parallel requests) and `APP_BULK_NPROC` (threads for annotation processing).
 
 ## MedCAT library
-MedCAT parameters are defined in selected `envs/env_medcat*`  file. 
+
+MedCAT parameters are defined in selected `envs/medcat*`  file.
 
 For details on available MedCAT parameters please refer to [the official GitHub repository](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/).
diff --git a/medcat-service/env/app.env b/medcat-service/env/app.env
index c9f83b23f..3e07246d8 100755
--- a/medcat-service/env/app.env
+++ b/medcat-service/env/app.env
@@ -13,6 +13,7 @@ APP_MODEL_VOCAB_PATH=/cat/models/medmen/vocab.dat
 
 # optionally, can include multiple models for meta tasks, separated using ':'
 APP_MODEL_META_PATH_LIST=/cat/models/medmen/Status
+APP_MODEL_REL_PATH_LIST=
 
 # MedCAT Model Pack path
 # IMPORTANT: if this parameter has value IT WILL BE LOADED FIRST OVER EVERYTHING ELSE (CDB, Vocab, MetaCATs, etc.) declared above.
@@ -39,4 +40,4 @@ APP_TORCH_THREADS=8
 
 # GPU SETTING
 # CAUTION, use only if you are using the GPU docker image.
-APP_CUDA_DEVICE_COUNT=1
\ No newline at end of file
+APP_CUDA_DEVICE_COUNT=1
diff --git a/medcat-service/env/app_deid.env b/medcat-service/env/app_deid.env
index 4a11422f6..3e07246d8 100755
--- a/medcat-service/env/app_deid.env
+++ b/medcat-service/env/app_deid.env
@@ -13,11 +13,12 @@ APP_MODEL_VOCAB_PATH=/cat/models/medmen/vocab.dat
 
 # optionally, can include multiple models for meta tasks, separated using ':'
 APP_MODEL_META_PATH_LIST=/cat/models/medmen/Status
+APP_MODEL_REL_PATH_LIST=
 
 # MedCAT Model Pack path
 # IMPORTANT: if this parameter has value IT WILL BE LOADED FIRST OVER EVERYTHING ELSE (CDB, Vocab, MetaCATs, etc.) declared above.
 # Respect the same paths as above : /cat/models/model_pack_name.zip
-APP_MEDCAT_MODEL_PACK=/cat/models/de_id_base.zip
+APP_MEDCAT_MODEL_PACK=
 
 # optionally, an filter the reported concepts by CUIs
 # APP_MODEL_CUI_FILTER_PATH=/cat/models/cui_filter.txt
@@ -39,4 +40,4 @@ APP_TORCH_THREADS=8
 
 # GPU SETTING
 # CAUTION, use only if you are using the GPU docker image.
-APP_CUDA_DEVICE_COUNT=1
\ No newline at end of file
+APP_CUDA_DEVICE_COUNT=1
diff --git a/medcat-service/env/general.env b/medcat-service/env/general.env
index ef7d85c46..d3f7612c3 100644
--- a/medcat-service/env/general.env
+++ b/medcat-service/env/general.env
@@ -5,4 +5,4 @@
 # remove if this causes issues on any other platform
 # possible values: amd64, arm64
 CPU_ARCHITECTURE=amd64
-DOCKER_DEFAULT_PLATFORM=linux/${CPU_ARCHITECTURE:-amd64}
\ No newline at end of file
+DOCKER_DEFAULT_PLATFORM=linux/${CPU_ARCHITECTURE:-amd64}
diff --git a/medcat-service/env/medcat.env b/medcat-service/env/medcat.env
index eb46fd382..446177197 100755
--- a/medcat-service/env/medcat.env
+++ b/medcat-service/env/medcat.env
@@ -1,25 +1,10 @@
-TYPE=NOT_UMLS
-
 # IMPORTANT : log level set 
 # CRITICAL - 50, ERROR - 40, WARNING - 30, INFO - 20, DEBUG - 10, NOTSET - 0
-LOG_LEVEL=40
-
-NESTED_ENTITIES=False
-
-CNTX_SPAN=9
-CNTX_SPAN_SHORT=3
-MIN_CUI_COUNT=30000
-MIN_CUI_COUNT_STRICT=-1
-MIN_ACC=0.2
-MIN_ACC_TH=0.2
-
-LEARNING_RATE=0.1
-ANNEAL=False
-KEEP_PUNCT=":|."
+MEDCAT_LOG_LEVEL=40
 
 # can be left empty if you want to use the CDB spacy model, otherwise, possible values are : en_core_sci_lg, en_core_sci_md, etc...
 # please note that only SpaCy models that are installed on the docker container are usable (take a look at the .Dockerfile and install any other custom models you may want to use)
-SPACY_MODEL=
+MEDCAT_SPACY_MODEL=
 
 # IMPORTANT:
 #   Mode in which annotation entities should be outputted in the JSON response,
@@ -30,7 +15,7 @@ SPACY_MODEL=
 #   Be mindful of this option as it can affect other services that rely directly on the responses of the service 
 #    (the NiFi groovy scripts and annotation ingester are two such services that process the output, and so they might require further customisation)
 # POSSIBLE VALUES: [list, dict], if left empty then "dict" is the default.
-ANNOTATIONS_ENTITY_OUTPUT_MODE=dict
+MEDCAT_ANNOTATIONS_ENTITY_OUTPUT_MODE=dict
 
-DEID_MODE=False
-DEID_REDACT=False
\ No newline at end of file
+MEDCAT_DEID_MODE=False
+MEDCAT_DEID_REDACT=False
diff --git a/medcat-service/env/medcat_deid.env b/medcat-service/env/medcat_deid.env
index 21c07a3eb..ebecb559a 100755
--- a/medcat-service/env/medcat_deid.env
+++ b/medcat-service/env/medcat_deid.env
@@ -1,25 +1,10 @@
-TYPE=NOT_UMLS
-
 # IMPORTANT : log level set 
 # CRITICAL - 50, ERROR - 40, WARNING - 30, INFO - 20, DEBUG - 10, NOTSET - 0
-LOG_LEVEL=40
-
-NESTED_ENTITIES=False
-
-CNTX_SPAN=9
-CNTX_SPAN_SHORT=3
-MIN_CUI_COUNT=30000
-MIN_CUI_COUNT_STRICT=-1
-MIN_ACC=0.2
-MIN_ACC_TH=0.2
-
-LEARNING_RATE=0.1
-ANNEAL=False
-KEEP_PUNCT=:|.
+MEDCAT_LOG_LEVEL=40
 
 # can be left empty if you want to use the CDB spacy model, otherwise, possible values are : en_core_sci_lg, en_core_sci_md, etc...
 # please note that only SpaCy models that are installed on the docker container are usable (take a look at the .Dockerfile and install any other custom models you may want to use)
-SPACY_MODEL=
+MEDCAT_SPACY_MODEL=
 
 # IMPORTANT:
 #   Mode in which annotation entities should be outputted in the JSON response,
@@ -30,7 +15,7 @@ SPACY_MODEL=
 #   Be mindful of this option as it can affect other services that rely directly on the responses of the service 
 #    (the NiFi groovy scripts and annotation ingester are two such services that process the output, and so they might require further customisation)
 # POSSIBLE VALUES: [list, dict], if left empty then "dict" is the default.
-ANNOTATIONS_ENTITY_OUTPUT_MODE=dict
+MEDCAT_ANNOTATIONS_ENTITY_OUTPUT_MODE=dict
 
-DEID_MODE=True
-DEID_REDACT=True
\ No newline at end of file
+MEDCAT_DEID_MODE=True
+MEDCAT_DEID_REDACT=True
diff --git a/medcat-service/export_env_vars.sh b/medcat-service/export_env_vars.sh
index f8f1f48d4..a22455b7f 100644
--- a/medcat-service/export_env_vars.sh
+++ b/medcat-service/export_env_vars.sh
@@ -1,4 +1,10 @@
-#!/bin/bash
+#!/usr/bin/env bash
+
+# Enable strict mode (without -e to avoid exit-on-error)
+set -uo pipefail
+
+
+echo "🔧 Running $(basename "${BASH_SOURCE[0]}")..."
 
 set -a
 
@@ -10,8 +16,20 @@ env_files=("env/general.env"
            )
 
 
-for env_file in ${env_files[@]}; do
-  source $env_file
+for env_file in "${env_files[@]}"; do
+  if [ -f "$env_file" ]; then
+    echo "✅ Sourcing $env_file"
+    # shellcheck disable=SC1090
+    source "$env_file"
+  else
+    echo "⚠️ Skipping missing env file: $env_file"
+  fi
 done
 
-set +a
\ No newline at end of file
+
+# Disable auto-export
+set +a
+
+# Restore safe defaults for interactive/dev shell
+set +u
+set +o pipefail
diff --git a/medcat-service/medcat_service/config.py b/medcat-service/medcat_service/config.py
index 0a9e9dec4..ea727c116 100644
--- a/medcat-service/medcat_service/config.py
+++ b/medcat-service/medcat_service/config.py
@@ -1,17 +1,111 @@
-from pydantic import Field
-from pydantic_settings import BaseSettings
+from typing import Any, Optional, Tuple, Union
+from pydantic import AliasChoices, Field, field_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+import logging
+import torch
+
+def _coerce_loglevel(v: Any) -> int:
+    """
+    Accept int or common strings like 'INFO', 'debug', etc.
+    """
+    if isinstance(v, int):
+        return v
+    if isinstance(v, str):
+        name = v.strip().upper()
+        # Map name to logging level; default INFO if unknown
+        return getattr(logging, name, logging.INFO)
+    return logging.INFO
 
 
 class Settings(BaseSettings):
-    class Config:
-        frozen = True
+
+    model_config = SettingsConfigDict(
+        frozen=True,
+        env_prefix="", # no prefix; we specify full env names via alias
+        case_sensitive=False,
+        populate_by_name=True
+    )
 
     app_root_path: str = Field(
-        default="/", description="The Root Path for the FastAPI App", examples=["/medcat-service"]
+        default="/",
+        description="The Root Path for the FastAPI App",
+        examples=["/medcat-service"],
     )
 
-    deid_mode: bool = Field(default=False, description="Enable DEID mode")
+    deid_mode: bool = Field(default=False,
+                            validation_alias=AliasChoices("deid_mode", "MEDCAT_DEID_MODE"),
+                            description="Enable DEID mode"
+                            )
     deid_redact: bool = Field(
         default=True,
-        description="Enable DEID redaction. Returns text like [***] instead of [ANNOTATION]",
+        validation_alias=AliasChoices("deid_redact", "MEDCAT_DEID_REDACT"),
+        description="Enable DEID redaction. Returns text like [***] instead of [ANNOTATION]"
     )
+
+    # Model paths
+    model_cdb_path: Optional[str] = Field("/cat/models/medmen/cdb.dat", alias="APP_MODEL_CDB_PATH")
+    model_vocab_path: Optional[str] = Field("/cat/models/medmen/vocab.dat", alias="APP_MODEL_VOCAB_PATH")
+    model_meta_path_list: Union[str, Tuple[str, ...]] = Field(default=(), alias="APP_MODEL_META_PATH_LIST")
+    model_rel_path_list: Union[str, Tuple[str, ...]] = Field(default=(), alias="APP_MODEL_REL_PATH_LIST")
+    medcat_model_pack: Optional[str] = Field("", alias="APP_MEDCAT_MODEL_PACK")
+    model_cui_filter_path: Optional[str] = Field("", alias="APP_MODEL_CUI_FILTER_PATH")
+    spacy_model: str = Field("", alias="MEDCAT_SPACY_MODEL")
+
+    # ---- App logging & MedCAT logging ----
+    app_log_level: int = Field(default="INFO", alias="APP_LOG_LEVEL")
+    medcat_log_level: int = Field(default="INFO", alias="MEDCAT_LOG_LEVEL")
+
+    # ---- App identity / model basics ----
+    app_name: str = Field(default="MedCAT", alias="APP_NAME")
+    app_model_language: str = Field(default="en", alias="APP_MODEL_LANGUAGE")
+    app_model_name: str = Field(default="unknown", alias="APP_MODEL_NAME")
+
+    # ---- Performance knobs ----
+    bulk_nproc: int = Field(8, alias="APP_BULK_NPROC")
+    torch_threads: int = Field(-1, alias="APP_TORCH_THREADS")
+
+    # ---- Output formatting ----
+    # e.g. "dict" | "list" | "json" (service currently uses "dict" default)
+    annotations_entity_output_mode: str = Field(default="dict", alias="MEDCAT_ANNOTATIONS_ENTITY_OUTPUT_MODE")
+
+    # ---- Normalizers ---------------------------------------------------------
+    @field_validator("app_log_level", "medcat_log_level", mode="before")
+    @classmethod
+    def _val_log_levels(cls, v: Any) -> int:
+        return _coerce_loglevel(v)
+    
+    @field_validator("annotations_entity_output_mode", mode="after")
+    @classmethod
+    def _lower_mode(cls, v: str) -> str:
+        return v.lower().strip()
+
+    @field_validator("model_meta_path_list", "model_rel_path_list", mode="before")
+    @classmethod
+    def _split_paths(cls, v):
+        if not v:
+            return ()
+        if isinstance(v, str):
+            return tuple(p.strip() for p in v.split(":") if p.strip())
+        if isinstance(v, (list, tuple)):
+            return tuple(v)
+        return ()
+
+    @classmethod
+    def env_name(cls, field: str) -> str:
+        """Return the env var name (alias) for a given field name."""
+        return cls.model_fields[field].alias or field
+
+    @field_validator("bulk_nproc", mode="before")
+    def adjust_bulk_nproc(cls, num_procs: int) -> int:
+        """ This method is used to adjust the number of processes to use for bulk processing.
+            Set number of processes to 1 if MPS (Apple Sillicon) is available, as MPS does not support multiprocessing.
+
+        Args:
+            num_procs (int): number of processes requested
+
+        Returns:
+            int: number of processes to use
+        """
+        if torch.backends.mps.is_available():
+            return 1
+        return num_procs
diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index f21887827..c80b2fbe1 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -1,11 +1,12 @@
 import gradio as gr
 import uvicorn
+import logging
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 
 from medcat_service.demo.gradio_demo import io
 from medcat_service.dependencies import get_settings
-from medcat_service.routers import admin, health, legacy, process
+from medcat_service.routers import admin, health, process
 from medcat_service.types import HealthCheckFailedException
 
 settings = get_settings()
@@ -28,7 +29,6 @@
 app.include_router(admin.router)
 app.include_router(health.router)
 app.include_router(process.router)
-app.include_router(legacy.router)
 
 gr.mount_gradio_app(app, io, path="/demo")
 
diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py
index 5b5b33b2f..e6d075b8c 100644
--- a/medcat-service/medcat_service/nlp_processor/medcat_processor.py
+++ b/medcat-service/medcat_service/nlp_processor/medcat_processor.py
@@ -6,13 +6,15 @@
 from datetime import datetime, timezone
 
 import numpy as np
-import simplejson as json
+import torch
 from medcat.cat import CAT
 from medcat.cdb import CDB
 from medcat.components.addons.meta_cat import MetaCATAddon
+from medcat.components.addons.relation_extraction.rel_cat import RelCATAddon
 from medcat.components.ner.trf.deid import DeIdModel
 from medcat.config import Config
 from medcat.config.config_meta_cat import ConfigMetaCAT
+from medcat.config.config_rel_cat import ConfigRelCAT
 from medcat.vocab import Vocab
 
 from medcat_service.config import Settings
@@ -26,48 +28,45 @@ class MedCatProcessor:
     """
 
     def __init__(self, settings: Settings):
-        app_log_level = os.getenv("APP_LOG_LEVEL", logging.INFO)
-        medcat_log_level = os.getenv("LOG_LEVEL", logging.INFO)
+
+        self.service_settings = settings
 
         self.log = logging.getLogger(self.__class__.__name__)
-        self.log.setLevel(level=app_log_level)
+        self.log.setLevel(level=self.service_settings.app_log_level)
 
-        self.log.debug("APP log level set to : " + str(app_log_level))
-        self.log.debug("MedCAT log level set to : " + str(medcat_log_level))
+        self.log.debug("APP log level set to : " + str(self.service_settings.app_log_level))
+        self.log.debug("MedCAT log level set to : " + str(self.service_settings.medcat_log_level))
 
         self.log.info("Initializing MedCAT processor ...")
         self._is_ready_flag = False
 
-        self.app_name = os.getenv("APP_NAME", "MedCAT")
-        self.app_lang = os.getenv("APP_MODEL_LANGUAGE", "en")
         self.app_version = MedCatProcessor._get_medcat_version()
-        self.app_model = os.getenv("APP_MODEL_NAME", "unknown")
-        self.entity_output_mode = os.getenv(
-            "ANNOTATIONS_ENTITY_OUTPUT_MODE", "dict").lower()
-
-        self.bulk_nproc = int(os.getenv("APP_BULK_NPROC", 8))
-        self.torch_threads = int(os.getenv("APP_TORCH_THREADS", -1))
-        self.DEID_MODE = settings.deid_mode
-        self.DEID_REDACT = settings.deid_redact
+
         self.model_card_info = ModelCardInfo(
-            ontologies=None, meta_cat_model_names=[], model_last_modified_on=None)
+            ontologies=None,
+            meta_cat_model_names=[],
+            rel_cat_model_names=[],
+            model_last_modified_on=None)
+
+        # disale torch gradients, we don't need them for inference
+        # this should also reduce memory consumption
+        torch.set_grad_enabled(False)
+        self.log.info("Torch autograd disabled (inference mode only)")
 
         # this is available to constrain torch threads when there
         # isn't a GPU
         # You probably want to set to 1
         # Not sure what happens if torch is using a cuda device
-        if self.torch_threads > 0:
-            import torch
-            torch.set_num_threads(self.torch_threads)
-            self.log.info("Torch threads set to " + str(self.torch_threads))
+        if self.service_settings.torch_threads > 0:
+            torch.set_num_threads(self.service_settings.torch_threads)
+            self.log.info("Torch threads set to " + str(self.service_settings.torch_threads))
 
-        self.cat = self._create_cat()
-        self.cat.train = os.getenv("APP_TRAINING_MODE", False)
+        self.cat: DeIdModel | CAT = self._create_cat()
 
         self._is_ready_flag = self._check_medcat_readiness()
 
     @staticmethod
-    def _get_timestamp():
+    def _get_timestamp() -> str:
         """
         Returns the current timestamp in ISO 8601 format. Formatted as "yyyy-MM-dd"T"HH:mm:ss.SSSXXX".
         :return: timestamp string
@@ -109,10 +108,10 @@ def get_app_info(self) -> ServiceInfo:
         Returns:
             dict: Application information stored as KVPs.
         """
-        return ServiceInfo(service_app_name=self.app_name,
-                           service_language=self.app_lang,
+        return ServiceInfo(service_app_name=self.service_settings.app_name,
+                           service_language=self.service_settings.app_model_language,
                            service_version=self.app_version,
-                           service_model=self.app_model,
+                           service_model=self.service_settings.app_model_name,
                            model_card_info=self.model_card_info
                            )
 
@@ -125,7 +124,7 @@ def process_entities(self, entities, *args, **kwargs):
 
             self._fix_floats(entities)
 
-            if self.entity_output_mode == "list":
+            if self.service_settings.annotations_entity_output_mode == "list":
                 entities = list(entities.values())
 
         yield entities
@@ -163,9 +162,9 @@ def process_content(self, content, *args, **kwargs):
 
         start_time_ns = time.time_ns()
 
-        if self.DEID_MODE:
+        if self.service_settings.deid_mode and isinstance(self.cat, DeIdModel):
             entities = self.cat.get_entities(text)
-            text = self.cat.deid_text(text, redact=self.DEID_REDACT)
+            text = self.cat.deid_text(text, redact=self.service_settings.deid_redact)
         else:
             if text is not None and len(text.strip()) > 0:
                 entities = self.cat.get_entities(text)
@@ -174,11 +173,15 @@ def process_content(self, content, *args, **kwargs):
 
         elapsed_time = (time.time_ns() - start_time_ns) / 10e8  # nanoseconds to seconds
 
-        if kwargs.get("meta_anns_filters"):
-            meta_anns_filters = kwargs.get("meta_anns_filters")
-            entities = [e for e in entities['entities'].values() if
-                        all(e['meta_anns'][task]['value'] in filter_values
-                            for task, filter_values in meta_anns_filters)]
+        meta_anns_filters = kwargs.get("meta_anns_filters")
+        if meta_anns_filters:
+            if isinstance(entities, dict):
+                entities = [ e for e in entities['entities'].values()
+                    if isinstance(e, dict) and all(
+                        task in e.get('meta_anns', {}) and e['meta_anns'][task]['value'] in filter_values
+                        for task, filter_values in meta_anns_filters
+                    )
+                ]
 
         entities = list(self.process_entities(entities, **kwargs))
 
@@ -210,17 +213,19 @@ def process_content_bulk(self, content):
         start_time_ns = time.time_ns()
 
         try:
+
             text_input = MedCatProcessor._generate_input_doc(content, invalid_doc_ids)
-            if self.DEID_MODE:
+            if self.service_settings.deid_mode and isinstance(self.cat, DeIdModel):
                 text_to_deid_from_tuple = (x[1] for x in text_input)
 
-                ann_res = self.cat.deid_multi_text(list(text_to_deid_from_tuple),
-                                                   redact=self.DEID_REDACT, n_process=self.bulk_nproc)
-            else:
+                ann_res = self.cat.deid_multi_texts(list(text_to_deid_from_tuple),
+                                                   redact=self.service_settings.deid_redact,
+                                                   n_process=self.service_settings.bulk_nproc)
+            elif isinstance(self.cat, CAT):
                 ann_res = {
                     ann_id: res for ann_id, res in
                     self.cat.get_entities_multi_texts(
-                        text_input, n_process=self.bulk_nproc)
+                        text_input, n_process=self.service_settings.bulk_nproc)
                 }
         except Exception as e:
             self.log.error("Unable to process data", exc_info=e)
@@ -229,34 +234,7 @@ def process_content_bulk(self, content):
 
         return self._generate_result(content, ann_res, elapsed_time)
 
-    def retrain_medcat(self, content, replace_cdb):
-        """Retrains Medcat and redeploys model.
-
-        Args:
-            content: Training data for retraining.
-            replace_cdb: Whether to replace the existing CDB.
-
-        Returns:
-            dict: Results containing precision, recall, F1 scores and error dictionaries.
-        """
-
-        with open("/cat/models/data.json", "w") as f:
-            json.dump(content, f)
-
-        DATA_PATH = "/cat/models/data.json"
-        CDB_PATH = "/cat/models/cdb.dat"
-        VOCAB_PATH = "/cat/models/vocab.dat"
-
-        self.log.info("Retraining Medcat Started...")
-
-        p, r, f1, tp_dict, fp_dict, fn_dict = MedCatProcessor._retrain_supervised(
-            self, CDB_PATH, DATA_PATH, VOCAB_PATH)
-
-        self.log.info("Retraining Medcat Completed...")
-
-        return {"results": [p, r, f1, tp_dict, fp_dict, fn_dict]}
-
-    def _populate_model_card_info(self, config: Config):
+    def _populate_model_card_info(self, config: Config) -> None:
         """Populates model card information from config.
 
         Args:
@@ -267,15 +245,16 @@ def _populate_model_card_info(self, config: Config):
         self.model_card_info.meta_cat_model_names = [
             cnf.general.category_name or "None" for cnf in config.components.addons
             if (isinstance(cnf, ConfigMetaCAT))]
+        self.model_card_info.rel_cat_model_names = [
+            str(cnf.general.labels2idx.values()) or "None" for cnf in config.components.addons
+            if (isinstance(cnf, ConfigRelCAT))]
         self.model_card_info.model_last_modified_on = config.meta.last_saved
 
-    # helper MedCAT methods
-    #
-    def _create_cat(self):
+    def _create_cat(self) -> DeIdModel | CAT:
         """Loads MedCAT resources and creates CAT instance.
 
         Returns:
-            CAT: Initialized MedCAT instance.
+            DeIdModel | CAT: Initialized MedCAT instance.
 
         Raises:
             ValueError: If required environment variables are not set.
@@ -283,98 +262,87 @@ def _create_cat(self):
         """
         cat, cdb, vocab, config = None, None, None, None
 
-        # Load CUIs to keep if provided
-        if os.getenv("APP_MODEL_CUI_FILTER_PATH", None) is not None:
+        # ---- CUI filter ----
+        cuis_to_keep: list[str] = []
+    
+        if self.service_settings.model_cui_filter_path:
             self.log.debug("Loading CUI filter ...")
-            with open(os.getenv("APP_MODEL_CUI_FILTER_PATH")) as cui_file:
-                all_lines = (line.rstrip() for line in cui_file)
-                # filter blank lines
-                cuis_to_keep = [line for line in all_lines if line]
-
-        model_pack_path = os.getenv("APP_MEDCAT_MODEL_PACK", "").strip()
+            with open(self.service_settings.model_cui_filter_path) as cui_file:
+                cuis_to_keep = [line.strip() for line in cui_file if line.strip()]
 
-        if model_pack_path != "":
+        # ---- Path 1: model pack ----
+        if self.service_settings.medcat_model_pack:
             self.log.info("Loading model pack...")
-            cat = CAT.load_model_pack(model_pack_path)
-
-            if self.DEID_MODE:
-                cat = DeIdModel.load_model_pack(model_pack_path)
+            if self.service_settings.deid_mode:
+                cat = DeIdModel.load_model_pack(self.service_settings.medcat_model_pack)
+            else:
+                cat = CAT.load_model_pack(self.service_settings.medcat_model_pack)
 
-            # Apply CUI filter if provided
-            if os.getenv("APP_MODEL_CUI_FILTER_PATH", None) is not None:
+            if cuis_to_keep:
                 self.log.debug("Applying CUI filter ...")
                 cat.cdb.filter_by_cui(cuis_to_keep)
+            
+            cat.config.general.log_level = self.service_settings.medcat_log_level
 
-            if self.app_model.lower() in ["", "unknown", "medmen"] and cat.config.meta.hash is not None:
-                self.app_model = cat.config.meta.hash
+            if not self.service_settings.app_model_name and cat.config.meta.hash:
+                self.service_settings.app_model_name = cat.config.meta.hash
 
             self._populate_model_card_info(cat.config)
-
             return cat
-        else:
-            self.log.info("APP_MEDCAT_MODEL_PACK not set, skipping....")
 
-        # Vocabulary and Concept Database are mandatory
-        if os.getenv("APP_MODEL_VOCAB_PATH", None) is None and cat is None:
+        self.log.info(f"{Settings.env_name('medcat_model_pack')} not set, skipping...")
+
+        # ---- Path 2: vocab + cdb ----
+        if not self.service_settings.model_vocab_path:
             raise ValueError(
-                "Vocabulary (env: APP_MODEL_VOCAB_PATH) not specified")
-        else:
-            self.log.debug("Loading VOCAB ...")
-            vocab = Vocab.load(os.getenv("APP_MODEL_VOCAB_PATH"))
+                f"Vocabulary (env {Settings.env_name('model_vocab_path')}) not specified"
+            )
+        self.log.debug("Loading VOCAB ...")
+        vocab = Vocab.load(self.service_settings.model_vocab_path)
 
-        if os.getenv("APP_MODEL_CDB_PATH", None) is None and cat is None:
-            raise Exception(
-                "Concept database (env: APP_MODEL_CDB_PATH) not specified")
-        else:
-            self.log.debug("Loading CDB ...")
-            cdb = CDB.load(os.getenv("APP_MODEL_CDB_PATH"))
+        if not self.service_settings.model_cdb_path:
+            raise ValueError(
+                f"Concept database (env {Settings.env_name('model_cdb_path')}) not specified"
+            )
+        self.log.debug("Loading CDB ...")
+        cdb = CDB.load(self.service_settings.model_cdb_path)
 
-        spacy_model = os.getenv("SPACY_MODEL", "")
+        # ---- SpaCy model ----
+        if self.service_settings.spacy_model:
+            cdb.config.general.nlp.provider = "spacy"
+            cdb.config.general.nlp.modelname = self.service_settings.spacy_model
 
-        if spacy_model != "":
-            cdb.config.general.nlp.modelname = spacy_model
+        elif not cdb.config.general.nlp.modelname:
+            raise ValueError(
+                f"No {Settings.env_name('spacy_model')} env var declared and "
+                "CDB has no spaCy model configured"
+            )
         else:
-            logging.warning("SPACY_MODEL environment var not set" +
-                            ", attempting to load the spacy model found within the CDB : "
-                            + cdb.config.general.nlp.modelname)
-
-            if cdb.config.general.nlp.modelname == "":
-                raise ValueError("No SPACY_MODEL env var declared, the CDB loaded does not have a\
-                     spacy_model set in the config variable! \
-                 To solve this declare the SPACY_MODEL in the env_medcat file.")
-
-        if cat is None:
-            # this is redundant as the config is already in the CDB
-            config = cdb.config
+            self.log.warning(   
+                f"{Settings.env_name('spacy_model')} not set, using spaCy model from CDB: "
+                f"{cdb.config.general.nlp.modelname}"
+            )
 
-        # Apply CUI filter if provided
-        if os.getenv("APP_MODEL_CUI_FILTER_PATH", None) is not None:
+        if cuis_to_keep:
             self.log.debug("Applying CUI filter ...")
             cdb.filter_by_cui(cuis_to_keep)
 
-        # Meta-annotation models are optional
-        meta_models = []
-        if os.getenv("APP_MODEL_META_PATH_LIST", None) is not None:
-            self.log.debug("Loading META annotations ...")
-            for model_path in os.getenv("APP_MODEL_META_PATH_LIST").split(":"):
-                m = MetaCATAddon.deserialise_from(model_path)
-                meta_models.append(m)
-
-        # if cat:
-        #     meta_models.extend(cat._meta_cats)
+        cat = CAT(cdb=cdb, config=cdb.config, vocab=vocab)
+        cat.config.general.log_level = self.service_settings.medcat_log_level
 
-        if self.app_model.lower() in [None, "unknown"] and cdb.config.meta.hash is not None:
-            self.app_model = cdb.config.meta.hash
+        # ---- CAT add-ons ----
+        for meta_model_path in self.service_settings.model_meta_path_list:
+            self.log.debug("Loading META annotations from %s", meta_model_path)
+            cat.add_addon(MetaCATAddon.deserialise_from(meta_model_path))
 
-        config.general.log_level = os.getenv("LOG_LEVEL", logging.INFO)
+        for rel_model_path in self.service_settings.model_rel_path_list:
+            self.log.debug("Loading RELATION annotations from %s", rel_model_path)
+            cat.add_addon(RelCATAddon.deserialise_from(rel_model_path))
 
-        cat = CAT(cdb=cdb, config=config, vocab=vocab)
-        # add MetaCATs
-        for mc in meta_models:
-            cat.add_addon(mc)
+        if not self.service_settings.app_model_name and cat.config.meta.hash:
+            self.service_settings.app_model_name = cat.config.meta.hash
 
         self._populate_model_card_info(cat.config)
-
         return cat
 
     # helper generator functions to avoid multiple copies of data
@@ -413,7 +381,7 @@ def _generate_result(self, in_documents, annotations, elapsed_time):
 
         for i in range(len(in_documents)):
             in_ct = in_documents[i]
-            if not self.DEID_MODE and i in annotations.keys():
+            if not self.service_settings.deid_mode and i in annotations.keys():
                 # generate output for valid annotations
 
                 entities = list(self.process_entities(annotations.get(i)))
@@ -426,7 +394,7 @@ def _generate_result(self, in_documents, annotations, elapsed_time):
                     elapsed_time=elapsed_time,
                     footer=in_ct.get("footer"),
                 )
-            elif self.DEID_MODE:
+            elif  self.service_settings.deid_mode:
                 out_res = ProcessResult(
                     # TODO: DEID mode is passing the resulting text in the annotations field here but shouldnt.
                     text=str(annotations[i]),
@@ -453,7 +421,7 @@ def _generate_result(self, in_documents, annotations, elapsed_time):
             yield out_res
 
     @staticmethod
-    def _get_medcat_version():
+    def _get_medcat_version() -> str:
         """Returns the version string of the MedCAT module as reported by pip.
 
         Returns:
@@ -469,182 +437,6 @@ def _get_medcat_version():
         except Exception:
             raise Exception("Cannot read the MedCAT library version")
 
-    def _retrain_supervised(self, cdb_path, data_path, vocab_path, cv=1, nepochs=1,
-                            test_size=0.1, lr=1, groups=None, **kwargs):
-        """Retrains MedCAT model using supervised learning.
-
-        Args:
-            cdb_path (str): Path to concept database.
-            data_path (str): Path to training data.
-            vocab_path (str): Path to vocabulary.
-            cv (int, optional): Number of cross-validation folds. Defaults to 1.
-            nepochs (int, optional): Number of training epochs. Defaults to 1.
-            test_size (float, optional): Size of test set. Defaults to 0.1.
-            lr (float, optional): Learning rate. Defaults to 1.
-            groups (list, optional): Training groups. Defaults to None.
-            **kwargs: Additional keyword arguments.
-
-        Returns:
-            tuple: Precision, recall, F1 score, and error dictionaries.
-        """
-
-        data = json.load(open(data_path))
-        correct_ids = MedCatProcessor._prepareDocumentsForPeformanceAnalysis(
-            data)
-
-        cat = MedCatProcessor._create_cat(self)
-
-        f1_base = MedCatProcessor._computeF1forDocuments(
-            self, data, self.cat, correct_ids)[2]
-        self.log.info("Base model F1: " + str(f1_base))
-
-        cat.train = True
-        cat.spacy_cat.MIN_ACC = os.getenv("MIN_ACC", 0.20)
-        cat.spacy_cat.MIN_ACC_TH = os.getenv("MIN_ACC_TH", 0.20)
-
-        self.log.info("Starting supervised training...")
-
-        try:
-            cat.train_supervised(data_path=data_path, lr=1,
-                                 test_size=0.1, use_groups=None, nepochs=3)
-        except Exception:
-            self.log.info("Did not complete all supervised training")
-
-        p, r, f1, tp_dict, fp_dict, fn_dict = MedCatProcessor._computeF1forDocuments(
-            self, data, cat, correct_ids)
-
-        self.log.info("Trained model F1: " + str(f1))
-
-        if MedCatProcessor._checkmodelimproved(f1, f1_base):
-            self.log.info("Model will be saved...")
-
-            cat.cdb.save_dict("/cat/models/cdb_new.dat")
-
-        self.log.info("Completed Retraining Medcat...")
-        return p, r, f1, tp_dict, fp_dict, fn_dict
-
-    def _computeF1forDocuments(self, data, cat, correct_ids):
-        """Computes F1 score and related metrics for documents.
-
-        Args:
-            data (dict): Input data containing projects and documents.
-            cat (CAT): MedCAT instance.
-            correct_ids (dict): Dictionary of correct annotations.
-
-        Returns:
-            tuple: Precision, recall, F1 score, and error dictionaries.
-        """
-
-        true_positives_dict, false_positives_dict, false_negatives_dict = {}, {}, {}
-        true_positive_no, false_positive_no, false_negative_no = 0, 0, 0
-
-        for project in data["projects"]:
-
-            predictions = {}
-            documents = project["documents"]
-            true_positives_dict[project["id"]] = {}
-            false_positives_dict[project["id"]] = {}
-            false_negatives_dict[project["id"]] = {}
-
-            for document in documents:
-                true_positives_dict[project["id"]][document["id"]] = {}
-                false_positives_dict[project["id"]][document["id"]] = {}
-                false_negatives_dict[project["id"]][document["id"]] = {}
-
-                results = cat.get_entities(document["text"])
-                predictions[document["id"]] = [
-                    [a["start"], a["end"], a["cui"]] for a in results]
-
-                tps, fps, fns = MedCatProcessor._getAccuraciesforDocument(
-                    predictions[document["id"]],
-                    correct_ids[project["id"]][document["id"]]
-                )
-                true_positive_no += len(tps)
-                false_positive_no += len(fps)
-                false_negative_no += len(fns)
-
-                true_positives_dict[project["id"]][document["id"]] = tps
-                false_positives_dict[project["id"]][document["id"]] = fps
-                false_negatives_dict[project["id"]][document["id"]] = fns
-
-        if (true_positive_no + false_positive_no) == 0:
-            precision = 0
-        else:
-            precision = true_positive_no / \
-                (true_positive_no + false_positive_no)
-        if (true_positive_no + false_negative_no) == 0:
-            recall = 0
-        else:
-            recall = true_positive_no / (true_positive_no + false_negative_no)
-        if (precision + recall) == 0:
-            f1 = 0
-        else:
-            f1 = 2*((precision*recall) / (precision + recall))
-
-        return precision, recall, f1, true_positives_dict, false_positives_dict, false_negatives_dict
-
-    @staticmethod
-    def _prepareDocumentsForPeformanceAnalysis(data):
-        """Prepares documents for performance analysis.
-
-        Args:
-            data (dict): Input data containing projects and documents.
-
-        Returns:
-            dict: Dictionary of correct annotations by project and document.
-        """
-        correct_ids = {}
-        for project in data["projects"]:
-            correct_ids[project["id"]] = {}
-
-            for document in project["documents"]:
-                for entry in document["annotations"]:
-                    if entry["correct"]:
-                        if document["id"] not in correct_ids[project["id"]]:
-                            correct_ids[project["id"]][document["id"]] = []
-                        correct_ids[project["id"]][document["id"]].append(
-                            [entry["start"], entry["end"], entry["cui"]])
-
-        return correct_ids
-
-    @staticmethod
-    def _getAccuraciesforDocument(prediction, correct_ids):
-        """Computes accuracy metrics for a single document.
-
-        Args:
-            prediction (list): List of predicted annotations.
-            correct_ids (list): List of correct annotations.
-
-        Returns:
-            tuple: True positives, false positives, and false negatives.
-        """
-
-        tup1 = list(map(tuple, correct_ids))
-        tup2 = list(map(tuple, prediction))
-
-        true_positives = list(map(list, set(tup1).intersection(tup2)))
-        false_positives = list(map(list, set(tup1).difference(tup2)))
-        false_negatives = list(map(list, set(tup2).difference(tup1)))
-
-        return true_positives, false_positives, false_negatives
-
-    @staticmethod
-    def _checkmodelimproved(f1_model_a, f1_model_b):
-        """Checks if model performance has improved.
-
-        Args:
-            f1_model_a (float): F1 score of first model.
-            f1_model_b (float): F1 score of second model.
-
-        Returns:
-            bool: True if first model has better F1 score, False otherwise.
-        """
-
-        if f1_model_a > f1_model_b:
-            return True
-        else:
-            return False
-
     # NOTE: numpy uses np.float32 and those are not json serialisable
     #       so we need to fix that
 
diff --git a/medcat-service/medcat_service/routers/legacy.py b/medcat-service/medcat_service/routers/legacy.py
deleted file mode 100644
index b2bbe1c8f..000000000
--- a/medcat-service/medcat_service/routers/legacy.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import logging
-import traceback
-
-from fastapi import APIRouter, HTTPException, Request
-from fastapi.responses import JSONResponse
-
-from medcat_service.dependencies import MedCatProcessorDep
-
-router = APIRouter(tags=["Legacy"])
-log = logging.getLogger(__name__)
-
-
-@router.post("/api/retrain_medcat")
-async def retrain_medcat(request: Request, medcat_processor: MedCatProcessorDep) -> JSONResponse:
-    """
-    Deprecated API.
-
-    Retrain Medcat.
-
-    This has been migrated from Flask to FastAPI without full testing. Contact CogStack if any issues are found.
-    """
-
-    payload = await request.json()
-
-    if payload is None or "content" not in payload or payload["content"] is None:
-        raise HTTPException(status_code=400, detail="Input Payload should be JSON with 'content'")
-
-    try:
-        result = medcat_processor.retrain_medcat(payload["content"], payload["replace_cdb"])
-        app_info = medcat_processor.get_app_info()
-        return JSONResponse(status_code=200, content={"result": result, "annotations": payload["content"],
-                                                      "medcat_info": app_info})
-
-    except Exception as e:
-        log.error(traceback.format_exc())
-        raise HTTPException(status_code=500, detail=f"Internal processing error {e}")
diff --git a/medcat-service/medcat_service/routers/process.py b/medcat-service/medcat_service/routers/process.py
index 6760f89b5..0318e5af5 100644
--- a/medcat-service/medcat_service/routers/process.py
+++ b/medcat-service/medcat_service/routers/process.py
@@ -1,7 +1,9 @@
 import logging
-from typing import Annotated
+from typing import Annotated, Union
 
 from fastapi import APIRouter, Body
+from fastapi.exceptions import RequestValidationError
+from pydantic import ValidationError
 
 from medcat_service.dependencies import MedCatProcessorDep
 from medcat_service.types import BulkProcessAPIInput, BulkProcessAPIResponse, ProcessAPIInput, ProcessAPIResponse
@@ -12,9 +14,9 @@
 
 
 @router.post("/api/process")
-def process(
+async def process(
     payload: Annotated[
-        ProcessAPIInput,
+        Union[ProcessAPIInput, dict],
         Body(
             openapi_examples={
                 "normal": {
@@ -46,9 +48,19 @@ def process(
     Returns the annotations extracted from a provided single document
     """
     try:
-        process_result = medcat_processor.process_content(
-            payload.content.model_dump(), meta_anns_filters=payload.meta_anns_filters
-        )
+        if isinstance(payload, ProcessAPIInput):
+            content = payload.content.model_dump()
+            meta_filters = payload.meta_anns_filters
+        elif isinstance(payload, dict):
+            try:
+                validated = ProcessAPIInput.model_validate(payload)
+                content = validated.content.model_dump()
+                meta_filters = validated.meta_anns_filters
+            except ValidationError as ve:
+                log.error("Invalid payload", exc_info=ve)
+                raise RequestValidationError(errors=ve.errors())
+
+        process_result = medcat_processor.process_content(content, meta_anns_filters=meta_filters)
         app_info = medcat_processor.get_app_info()
         return ProcessAPIResponse(result=process_result, medcat_info=app_info)
     except Exception as e:
@@ -57,7 +69,7 @@ def process(
 
 
 @router.post("/api/process_bulk")
-def process_bulk(payload: BulkProcessAPIInput, medcat_processor: MedCatProcessorDep) -> BulkProcessAPIResponse:
+async def process_bulk(payload: BulkProcessAPIInput, medcat_processor: MedCatProcessorDep) -> BulkProcessAPIResponse:
     """
     Returns the annotations extracted from the provided set of documents
     """
diff --git a/medcat-service/medcat_service/test/common.py b/medcat-service/medcat_service/test/common.py
index e72d9dcb5..4f1747868 100644
--- a/medcat-service/medcat_service/test/common.py
+++ b/medcat-service/medcat_service/test/common.py
@@ -84,7 +84,5 @@ def setup_medcat_processor():
             "OS ENV: APP_BULK_NPROC: not set -- setting to default: 8")
         os.environ["APP_BULK_NPROC"] = "8"
 
-    os.environ["APP_TRAINING_MODE"] = "False"
-
-    os.environ["DEID_MODE"] = "False"
-    os.environ["DEID_REDACT"] = "False"
+    os.environ["MEDCAT_DEID_MODE"] = "False"
+    os.environ["MEDCAT_DEID_REDACT"] = "False"
diff --git a/medcat-service/medcat_service/types.py b/medcat-service/medcat_service/types.py
index c723fa314..3baf40549 100644
--- a/medcat-service/medcat_service/types.py
+++ b/medcat-service/medcat_service/types.py
@@ -33,6 +33,7 @@ class NoProtectedBaseModel(BaseModel, protected_namespaces=()):
 class ModelCardInfo(NoProtectedBaseModel):
     ontologies: Union[str, List[str], None]
     meta_cat_model_names: list[str]
+    rel_cat_model_names: list[str]
     model_last_modified_on: Optional[datetime]
 
 
@@ -46,8 +47,11 @@ class ServiceInfo(NoProtectedBaseModel):
 
 class ProcessAPIInputContent(BaseModel):
     text: str = Field(examples=["Patient had been diagnosed with acute Kidney Failure the week before"])
-    footer: Optional[str] = None
+    footer: Optional[Union[str, Dict[str, Any]]] = None
 
+    class Config:
+        extra = "forbid" # Forbid extra fields not defined in the model
+    
 
 class ProcessAPIInput(BaseModel):
     content: ProcessAPIInputContent
@@ -79,7 +83,7 @@ class ProcessResult(BaseModel):
     success: bool
     timestamp: str
     elapsed_time: float
-    footer: Optional[str] = None
+    footer: Optional[Union[str, Dict[str, Any]]] = None
 
 
 class ProcessErrorsResult(BaseModel):

From 7cd6cc550c20f58185add62d7b37ab4d20745bd1 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 12 Sep 2025 11:54:02 +0100
Subject: [PATCH 02/16] Lint/type fixes + pyproject settings.

---
 medcat-service/medcat_service/config.py       | 14 ++++---
 medcat-service/medcat_service/main.py         |  1 -
 .../nlp_processor/medcat_processor.py         | 41 ++++++++++++-------
 medcat-service/{.ruff.toml => pyproject.toml} | 12 ++++--
 4 files changed, 43 insertions(+), 25 deletions(-)
 rename medcat-service/{.ruff.toml => pyproject.toml} (53%)

diff --git a/medcat-service/medcat_service/config.py b/medcat-service/medcat_service/config.py
index ea727c116..2a2fa7229 100644
--- a/medcat-service/medcat_service/config.py
+++ b/medcat-service/medcat_service/config.py
@@ -1,8 +1,10 @@
+import logging
 from typing import Any, Optional, Tuple, Union
+
+import torch
 from pydantic import AliasChoices, Field, field_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
-import logging
-import torch
+
 
 def _coerce_loglevel(v: Any) -> int:
     """
@@ -21,7 +23,7 @@ class Settings(BaseSettings):
 
     model_config = SettingsConfigDict(
         frozen=True,
-        env_prefix="", # no prefix; we specify full env names via alias
+        env_prefix="",  # no prefix; we specify full env names via alias
         case_sensitive=False,
         populate_by_name=True
     )
@@ -52,8 +54,8 @@ class Settings(BaseSettings):
     spacy_model: str = Field("", alias="MEDCAT_SPACY_MODEL")
 
     # ---- App logging & MedCAT logging ----
-    app_log_level: int = Field(default="INFO", alias="APP_LOG_LEVEL")
-    medcat_log_level: int = Field(default="INFO", alias="MEDCAT_LOG_LEVEL")
+    app_log_level: int = Field(default=logging.INFO, alias="APP_LOG_LEVEL")
+    medcat_log_level: int = Field(default=logging.INFO, alias="MEDCAT_LOG_LEVEL")
 
     # ---- App identity / model basics ----
     app_name: str = Field(default="MedCAT", alias="APP_NAME")
@@ -73,7 +75,7 @@ class Settings(BaseSettings):
     @classmethod
     def _val_log_levels(cls, v: Any) -> int:
         return _coerce_loglevel(v)
-    
+
     @field_validator("annotations_entity_output_mode", mode="after")
     @classmethod
     def _lower_mode(cls, v: str) -> str:
diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index c80b2fbe1..b40684dd6 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -1,6 +1,5 @@
 import gradio as gr
 import uvicorn
-import logging
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 
diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py
index e6d075b8c..25ff77ba7 100644
--- a/medcat-service/medcat_service/nlp_processor/medcat_processor.py
+++ b/medcat-service/medcat_service/nlp_processor/medcat_processor.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 
 import logging
-import os
 import time
 from datetime import datetime, timezone
 
@@ -176,9 +175,13 @@ def process_content(self, content, *args, **kwargs):
         meta_anns_filters = kwargs.get("meta_anns_filters")
         if meta_anns_filters:
             if isinstance(entities, dict):
-                entities = [ e for e in entities['entities'].values()
-                    if isinstance(e, dict) and all(
-                        task in e.get('meta_anns', {}) and e['meta_anns'][task]['value'] in filter_values
+                entities = [
+                    e
+                    for e in entities["entities"].values()
+                    if isinstance(e, dict)
+                    and all(
+                        task in e.get("meta_anns", {})
+                        and e["meta_anns"][task]["value"] in filter_values
                         for task, filter_values in meta_anns_filters
                     )
                 ]
@@ -218,9 +221,11 @@ def process_content_bulk(self, content):
             if self.service_settings.deid_mode and isinstance(self.cat, DeIdModel):
                 text_to_deid_from_tuple = (x[1] for x in text_input)
 
-                ann_res = self.cat.deid_multi_texts(list(text_to_deid_from_tuple),
-                                                   redact=self.service_settings.deid_redact,
-                                                   n_process=self.service_settings.bulk_nproc)
+                ann_res = self.cat.deid_multi_texts(
+                    list(text_to_deid_from_tuple),
+                    redact=self.service_settings.deid_redact,
+                    n_process=self.service_settings.bulk_nproc,
+                )
             elif isinstance(self.cat, CAT):
                 ann_res = {
                     ann_id: res for ann_id, res in
@@ -260,11 +265,13 @@ def _create_cat(self) -> DeIdModel | CAT:
             ValueError: If required environment variables are not set.
             Exception: If concept database path is not specified.
         """
-        cat, cdb, vocab, config = None, None, None, None
+
+        cdb, vocab = None, None
+        cat: DeIdModel | CAT
 
         # ---- CUI filter ----
         cuis_to_keep: list[str] = []
-    
+
         if self.service_settings.model_cui_filter_path:
             self.log.debug("Loading CUI filter ...")
             with open(self.service_settings.model_cui_filter_path) as cui_file:
@@ -281,11 +288,13 @@ def _create_cat(self) -> DeIdModel | CAT:
             if cuis_to_keep:
                 self.log.debug("Applying CUI filter ...")
                 cat.cdb.filter_by_cui(cuis_to_keep)
-            
+
             cat.config.general.log_level = self.service_settings.medcat_log_level
 
             if not self.service_settings.app_model_name and cat.config.meta.hash:
-                self.service_settings.app_model_name = cat.config.meta.hash
+                self.service_settings = self.service_settings.model_copy(
+                    update={"app_model_name": cat.config.meta.hash}
+                )
 
             self._populate_model_card_info(cat.config)
             return cat
@@ -318,7 +327,7 @@ def _create_cat(self) -> DeIdModel | CAT:
                 "CDB has no spaCy model configured"
             )
         else:
-            self.log.warning(   
+            self.log.warning(
                 f"{Settings.env_name('spacy_model')} not set, using spaCy model from CDB: "
                 f"{cdb.config.general.nlp.modelname}"
             )
@@ -340,7 +349,9 @@ def _create_cat(self) -> DeIdModel | CAT:
             cat.add_addon(RelCATAddon.deserialise_from(rel_model_path))
 
         if not self.service_settings.app_model_name and cat.config.meta.hash:
-            self.service_settings.app_model_name = cat.config.meta.hash
+            self.service_settings = self.service_settings.model_copy(
+                update={"app_model_name": cat.config.meta.hash}
+            )
 
         self._populate_model_card_info(cat.config)
         return cat
@@ -394,12 +405,12 @@ def _generate_result(self, in_documents, annotations, elapsed_time):
                     elapsed_time=elapsed_time,
                     footer=in_ct.get("footer"),
                 )
-            elif  self.service_settings.deid_mode:
+            elif self.service_settings.deid_mode:
                 out_res = ProcessResult(
                     # TODO: DEID mode is passing the resulting text in the annotations field here but shouldnt.
                     text=str(annotations[i]),
                     # TODO: DEID bulk mode should also be able to return the list of annotations found,
-                    #  to match the features of the singular api. CU-869a6wc6z
+                    #  to match the features of the singular api, this needs to be matched by MedCAT. CU-869a6wc6z
                     annotations=[],
                     success=True,
                     timestamp=self._get_timestamp(),
diff --git a/medcat-service/.ruff.toml b/medcat-service/pyproject.toml
similarity index 53%
rename from medcat-service/.ruff.toml
rename to medcat-service/pyproject.toml
index 4a52c445a..be23b6bc7 100644
--- a/medcat-service/.ruff.toml
+++ b/medcat-service/pyproject.toml
@@ -1,8 +1,9 @@
+[tool.ruff]
 line-length = 120
 indent-width = 4
 
-[lint]
-# 1. Enable flake8-bugbear (`B`) rules, in addition to the defaults.
+[tool.ruff.lint]
+# Enable flake8-bugbear (`B`) rules, in addition to the defaults.
 select = [
     # pycodestyle
     "E",
@@ -16,4 +17,9 @@ select = [
     "SIM",
     # isort
     "I",
-]
\ No newline at end of file
+]
+
+[tool.mypy]
+plugins = ["pydantic.mypy"]
+ignore_missing_imports = true
+strict = false

From bdaeaf7dced578e83e8a08aa59639ecf957a68a8 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 12 Sep 2025 12:03:36 +0100
Subject: [PATCH 03/16] Updated types.

---
 medcat-service/medcat_service/types.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/medcat-service/medcat_service/types.py b/medcat-service/medcat_service/types.py
index 3baf40549..55beecb42 100644
--- a/medcat-service/medcat_service/types.py
+++ b/medcat-service/medcat_service/types.py
@@ -50,8 +50,8 @@ class ProcessAPIInputContent(BaseModel):
     footer: Optional[Union[str, Dict[str, Any]]] = None
 
     class Config:
-        extra = "forbid" # Forbid extra fields not defined in the model
-    
+        extra = "forbid"
+
 
 class ProcessAPIInput(BaseModel):
     content: ProcessAPIInputContent

From 6428a163b5d4415308a163aac1ae66acf31a18d7 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 12 Sep 2025 16:47:32 +0100
Subject: [PATCH 04/16] MedCAT v2 version update to 2.1.0.

---
 medcat-service/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/medcat-service/requirements.txt b/medcat-service/requirements.txt
index 460cb9d53..becf7bb77 100644
--- a/medcat-service/requirements.txt
+++ b/medcat-service/requirements.txt
@@ -2,7 +2,7 @@ gunicorn==23.0.0
 setuptools==78.1.1
 simplejson==3.19.3
 setuptools-rust==1.11.0
-medcat[meta-cat,spacy,deid]~=2.0.0b
+medcat[meta-cat,spacy,deid]~=2.1.0
 # pinned because of issues with de-id models and past models (it will not do any de-id)
 transformers>=4.34.0,<5.0.0
 requests==2.32.4

From 6a6eef4815d4757eab43305ddac399c1c3668063 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 12 Sep 2025 17:15:41 +0100
Subject: [PATCH 05/16] Updated test scripts.

---
 medcat-service/run_tests.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/medcat-service/run_tests.sh b/medcat-service/run_tests.sh
index 284ff2a7c..7a714e449 100644
--- a/medcat-service/run_tests.sh
+++ b/medcat-service/run_tests.sh
@@ -10,15 +10,15 @@ python3 -m spacy download en_core_web_lg
 
 # download the test MedCAT model
 bash ./scripts/download_medmen.sh
-export APP_CDB_MODEL="$PWD/models/medmen/cdb.dat"
-export APP_VOCAB_MODEL="$PWD/models/medmen/vocab.dat"
+export APP_MODEL_CDB_PATH="$PWD/models/medmen/cdb.dat"
+export APP_MODEL_VOCAB_PATH="$PWD/models/medmen/vocab.dat"
 
 # proceed with the tests
 #
 echo "Starting the tests ..."
 
 # run the python tests
-python -m unittest discover -s medcat_service/test
+python3 -m unittest discover -s medcat_service/test
 if [ "$?" -ne "0" ]; then
     echo "Error: one or more tests failed"
     exit 1

From db6a131b11aaef7cb1b2c417da2cf99cd6931172 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 12 Sep 2025 17:36:12 +0100
Subject: [PATCH 06/16] Updated test for meta_ann.

---
 medcat-service/medcat_service/test/test_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/medcat-service/medcat_service/test/test_service.py b/medcat-service/medcat_service/test/test_service.py
index 6d3bab2f2..92362fe99 100644
--- a/medcat-service/medcat_service/test/test_service.py
+++ b/medcat-service/medcat_service/test/test_service.py
@@ -127,7 +127,7 @@ def testProcessMetaAnnsFilter(self):
         a_client = TestClient(app, raise_server_exceptions=False)
         response = a_client.post(self.ENDPOINT_PROCESS_SINGLE, json=payload)
         self.assertEqual(
-            response.status_code, 500,
+            response.status_code, 200,
             """
             This test currently shows that there is a bug with the meta anns filter.
             Correct version should return 200.

From 6dc73f88b60d4e7fe50d794d57a410bb3f8dc1f6 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Mon, 15 Sep 2025 23:06:19 +0100
Subject: [PATCH 07/16] Bugfix: fixed app crash in bulk processing mode when
 loading large models (docker) + deid bulk processing OOB anns.

---
 medcat-service/README.md                      | 66 ++++++++++++-------
 medcat-service/docker/docker-compose-dev.yml  |  4 +-
 .../docker/docker-compose-gpu-dev.yml         |  2 +
 medcat-service/docker/docker-compose-gpu.yml  |  6 +-
 medcat-service/docker/docker-compose.yml      |  6 +-
 medcat-service/env/app.env                    |  3 +
 medcat-service/env/app_deid.env               |  5 +-
 medcat-service/env/general.env                |  2 +
 medcat-service/env/medcat_deid.env            |  2 +-
 medcat-service/medcat_service/main.py         |  6 +-
 .../nlp_processor/medcat_processor.py         | 11 +++-
 medcat-service/start_service_production.sh    | 13 +++-
 12 files changed, 86 insertions(+), 40 deletions(-)

diff --git a/medcat-service/README.md b/medcat-service/README.md
index abc9732aa..95b89a9de 100644
--- a/medcat-service/README.md
+++ b/medcat-service/README.md
@@ -38,7 +38,7 @@ To build the Docker image manually:
 
 To run the container using the built image:
 
-```
+```bash
   docker run -it -p 5000:5000 \
     --env-file=envs/env_app --env-file=envs/env_medcat \
     -v <models-local-dir>:/cat/models:ro \
@@ -51,39 +51,37 @@ By default the MedCAT service will be running on port `5000`. MedCAT models will
 
 If you have a gpu and wish to use it, please change the `docker/docker-compose.yml` file, use the `cogstacksystems/medcat-service-gpu:latest` image or change the `build:` directive to build `../Dockerfile_gpu`.
 
-### <span style="color:red">IMPORTANT !</span>
+### IMPORTANT
 
 If you wish to run this docker service manually, use the docker/docker-compose.yml file, execute `docker compose up -d` whilst in the `docker` folder.
 
-Alternatively, an example script `./docker/run_example_medmen.sh` was provided to run the Docker container with MedCAT service. The script will download an example model (using the `./scripts/download_medmen.sh` script),it will use an example environment configuration, then it will build and start the service using the provided Docker Compose file, the service <b><span style="color:red">WONT WORK</span></b> without the model being present.
+Alternatively, an example script `./docker/run_example_medmen.sh` was provided to run the Docker container with MedCAT service. The script will download an example model (using the `./scripts/download_medmen.sh` script),it will use an example environment configuration, then it will build and start the service using the provided Docker Compose file, the service ***WONT WORK*** without the model being present.
 
 All models should be mounted from the `models/` folder.
 
-<br>
-
 ### Manual docker start-up steps
 
-```
+```bash
   1. cd ./models/
   2. bash ./download_medmen.sh
   3. cd ../docker/
   4. docker compose up -d
-  DONE!
+  5. echo "DONE!"
 ```
 
 Or, if you wish to use the above mentioned script ( the sample model is downloaded via script, you don't need to do anything):
 
-```
+```bash
   1. cd ./docker/
   2. bash ./run_example_medmen.sh
   DONE!
 ```
 
-# API Example use
+## API Example use
 
 Assuming that the application is running on the `localhost` with the API exposed on port `5000`, one can run:
 
-```
+```bash
 curl -XPOST http://localhost:5000/api/process \
   -H 'Content-Type: application/json' \
   -d '{"content":{"text":"The patient was diagnosed with leukemia."}}'
@@ -91,7 +89,7 @@ curl -XPOST http://localhost:5000/api/process \
 
 and the received result:
 
-```
+```json
 {
  "result": {"text": "The patient was diagnosed with leukemia.",
  
@@ -106,15 +104,17 @@ and the received result:
 
 Additional DE-ID query sample (make sure you have a de-id model loaded):
 
+```bash
 curl -XPOST <http://localhost:5555/api/process> \
   -H 'Content-Type: application/json' \
   -d '{"content":{"text":"Patient Information: Full Name: John Michael Doe \n Gender: Male \n Date of Birth: January 15, 1975 (Age: 49) \n Patient ID: 567890123 \n Address: 1234 Elm Street, Springfield, IL 62701 \n Phone Number: (555) 123-4567 \n Email: <johnmdoe@example.com> \n Emergency Contact: Jane Doe (Wife) \n Phone: (555) 987-6543 \n Relationship: Spouse"}}'
+```
 
-Make sure you have the following option enabled in `envs/env_medcat` , `DEID_MODE=True`.
+Make sure you have the following option enabled in `envs/(medcat|medcat_deid).env` , `DEID_MODE=True`.
 
 process_bulk example :
 
-```
+```bash
 curl -XPOST http://localhost:5000/api/process_bulk \
  -H 'Content-Type: application/json' \
  -d '{"content": [{"text":"The patient was diagnosed with leukemia."}, {"text": "The patient was diagnosed with cancer."}] }'
@@ -122,7 +122,7 @@ curl -XPOST http://localhost:5000/api/process_bulk \
 
 example bulk result :
 
-```
+```json
 {
   "result": [
     {
@@ -270,36 +270,35 @@ example bulk result :
 
 ```
 
-<strong>IMPORTANT info regarding annotation output style</strong><br>
+<strong>IMPORTANT info regarding annotation output style</strong>
 As the changes from MedCAT intoduced dictionary annotation/entity output.
 
 The mode in which annotation entities should be outputted in the JSON response,
    by default this was outputted as a "list" of dicts in older versions, so the output would be :
 
-   ```
+   ```json
     {"annotations": [{"id": "0", "cui" : "C1X..", ..}, {"id":"1", "cui": "...."}]}
    ```
 
    newer versions of MedCAT (1.2+) output entities as a dict, where the id of the entity is a key and the rest of the data is a value, so for "dict",
    the output is
 
-   ```
+   ```json
     {"annotations": [{"0": {"cui": "C0027361", "id": 0,.....}, "1": {"cui": "C001111", "id": 1......}}]}
    ```
 
-This setting can be configured in the ```./env/env_medcat``` file, using the ```ANNOTATIONS_ENTITY_OUTPUT_MODE``` variable.
+This setting can be configured in the ```./env/medcat.env``` file, using the ```MEDCAT_ANNOTATIONS_ENTITY_OUTPUT_MODE``` variable.
 By default, the output of these entities is set to respect the output of the MedCAT package, hence the latter will be used. Please change the above mentioned env variable and make sure your CogStack-Nifi annotation script is adapted accordingly.
-<br>
+
 Please note that the returned NLP annotations will depend on the underlying model used. For evaluation, we can only provide a very basic model trained on [MedMentions](https://github.com/chanzuckerberg/MedMentions). Models utilising [SNOMED CT](https://www.england.nhs.uk/digitaltechnology/digital-primary-care/snomed-ct/) or [UMLS](https://www.nlm.nih.gov/research/umls/index.html) may require applying for licenses from the copyright holders.
-<br>
-<br>
 
 ## Configuration
 
-In the current implementation, configuration for both MedCAT Service application and MedCAT NLP library is based on environment variables. These will be provided usually in two files in `env` directory:
+In the current implementation, configuration for both MedCAT Service application and MedCAT NLP library is based on environment variables. These will be provided usually in the files in the `env` directory:
 
-- `env_app` - configuration of MedCAT Service app,
-- `env_medcat` - configuration of MedCAT library.
+- `env/(app|app_deid).env` - configuration of MedCAT Service app,
+- `env/(medcat|medcat_deid).env` - configuration of MedCAT library.
+- `env/general.env` - configuration of general DOCKER related variables, CPU architecture, shared memory size etc, this is part of all of the major services services across CogStack, also set in the master repo in NiFi.
 
 Both files allow tailoring MedCAT for specific use-cases. When running MedCAT Service, these variables need to be loaded into the current working environment.
 
@@ -309,7 +308,7 @@ When using MedCAT for a different language than English, it can be useful to use
 
 ## Service Environment vars
 
-MedCAT Service application are defined in `envs/env_app` file.
+MedCAT Service application are defined in `envs/(app|app_deid).env` file.
 
 The following environment variables are available for tailoring the MedCAT Service `gunicorn` server:
 
@@ -327,6 +326,23 @@ The following environment variables are available for tailoring the MedCAT Servi
 - `APP_BULK_NPROC` - the number of threads used in bulk processing (default: `8`),
 - `APP_MEDCAT_MODEL_PACK` -  MedCAT Model Pack path, if this parameter has a value IT WILL BE LOADED FIRST OVER EVERYTHING ELSE (CDB, Vocab, MetaCATs, etc.) declared above.
 
+### Shared Memory (`DOCKER_SHM_SIZE`)
+
+The MedCAT service uses PyTorch multiprocessing and memory-mapped models, which rely on Linux shared memory (`/dev/shm`).  
+By default, Docker limits this to **64 MB**, which is insufficient for NLP models.
+
+Use the environment variable `DOCKER_SHM_SIZE` to control the size of shared memory inside the container. 
+You can set this variable in the `env/general.env` file.
+
+- **Recommended**: `8g` for bulk inference (`APP_BULK_NPROC > 1`)  
+- **Minimum**: `1g` for single-process inference (`APP_BULK_NPROC=1`)  
+
+Example:
+
+```env
+DOCKER_SHM_SIZE=8g
+```
+
 ## Performance Tuning
 
 Theres a range of factors that might impact the performance of this service, the most obvious being the size of the processed documents (amount of text per document) as well as the resources of the machine on which the service operates.
diff --git a/medcat-service/docker/docker-compose-dev.yml b/medcat-service/docker/docker-compose-dev.yml
index 6c3e64e30..51dec8385 100644
--- a/medcat-service/docker/docker-compose-dev.yml
+++ b/medcat-service/docker/docker-compose-dev.yml
@@ -18,6 +18,7 @@ services:
     env_file:
       - ../env/app.env
       - ../env/medcat.env
+    shm_size: "${DOCKER_SHM_SIZE:-1g}"
     volumes:
     - ../models:/cat/models/:rw
     ports:
@@ -44,13 +45,14 @@ services:
     env_file:
       - ../env/app_deid.env
       - ../env/medcat_deid.env
+    shm_size: "${DOCKER_SHM_SIZE:-1g}"
     volumes:
     - ../models:/cat/models/:rw
     ports:
       - "5556:5000"
     networks:
       - cognet
-      
+
 networks:
   cognet:
     driver: bridge
diff --git a/medcat-service/docker/docker-compose-gpu-dev.yml b/medcat-service/docker/docker-compose-gpu-dev.yml
index a2a74e7c8..3882c0328 100644
--- a/medcat-service/docker/docker-compose-gpu-dev.yml
+++ b/medcat-service/docker/docker-compose-gpu-dev.yml
@@ -18,6 +18,7 @@ services:
     env_file:
       - ../env/app.env
       - ../env/medcat.env
+    shm_size: "${DOCKER_SHM_SIZE:-1g}"
     volumes:
     - ../models:/cat/models/:rw
     ports:
@@ -53,6 +54,7 @@ services:
     env_file:
       - ../env/app_deid.env
       - ../env/medcat_deid.env
+    shm_size: "${DOCKER_SHM_SIZE:-1g}"
     volumes:
     - ../models:/cat/models/:rw
     ports:
diff --git a/medcat-service/docker/docker-compose-gpu.yml b/medcat-service/docker/docker-compose-gpu.yml
index ceb7af852..98ad8e33d 100755
--- a/medcat-service/docker/docker-compose-gpu.yml
+++ b/medcat-service/docker/docker-compose-gpu.yml
@@ -4,7 +4,7 @@ services:
     ### Multiple images available:
     ## default image, only CPU support: cogstacksystems/medcat-service:latest 
     ## GPU support: cogstacksystems/medcat-service-gpu:latest
-    image: cogstacksystems/medcat-service-gpu:latest
+    image: ${MEDCAT_SERVICE_DOCKER_IMAGE:-cogstacksystems/medcat-service-gpu:${MEDCAT_SERVICE_VERSION:-latest}}
     restart: always
     environment:
       - http_proxy=$HTTP_PROXY
@@ -13,6 +13,7 @@ services:
     env_file:
       - ../env/app.env
       - ../env/medcat.env
+    shm_size: "${DOCKER_SHM_SIZE:-1g}"
     volumes:
     - ../models:/cat/models/:rw
     ports:
@@ -34,7 +35,7 @@ services:
     ### Multiple images available:
     ## default image, only CPU support: cogstacksystems/medcat-service:latest 
     ## GPU support: cogstacksystems/medcat-service-gpu:latest
-    image: cogstacksystems/medcat-service-gpu:latest
+    image: ${MEDCAT_SERVICE_DOCKER_IMAGE:-cogstacksystems/medcat-service-gpu:${MEDCAT_SERVICE_VERSION:-latest}}
     restart: always
     environment:
       - http_proxy=$HTTP_PROXY
@@ -43,6 +44,7 @@ services:
     env_file:
       - ../env/app_deid.env
       - ../env/medcat_deid.env
+    shm_size: "${DOCKER_SHM_SIZE:-1g}"
     volumes:
     - ../models:/cat/models/:rw
     ports:
diff --git a/medcat-service/docker/docker-compose.yml b/medcat-service/docker/docker-compose.yml
index 4d08728b5..46a49801b 100755
--- a/medcat-service/docker/docker-compose.yml
+++ b/medcat-service/docker/docker-compose.yml
@@ -4,7 +4,7 @@ services:
     ### Multiple images available:
     ## default image, only CPU support: cogstacksystems/medcat-service:latest 
     ## GPU support: cogstacksystems/medcat-service-gpu:latest
-    image: cogstacksystems/medcat-service:latest
+    image: ${MEDCAT_SERVICE_DOCKER_IMAGE:-cogstacksystems/medcat-service:${MEDCAT_SERVICE_VERSION:-latest}}
     restart: always
     environment:
       - http_proxy=$HTTP_PROXY
@@ -13,6 +13,7 @@ services:
     env_file:
       - ../env/app.env
       - ../env/medcat.env
+    shm_size: "${DOCKER_SHM_SIZE:-1g}"
     volumes:
     - ../models:/cat/models/:rw
     ports:
@@ -34,13 +35,14 @@ services:
     env_file:
       - ../env/app_deid.env
       - ../env/medcat_deid.env
+    shm_size: "${DOCKER_SHM_SIZE:-1g}"
     volumes:
     - ../models:/cat/models/:rw
     ports:
       - "5556:5000"
     networks:
       - cognet
-      
+
 networks:
   cognet:
     driver: bridge
diff --git a/medcat-service/env/app.env b/medcat-service/env/app.env
index 3e07246d8..cb68c397c 100755
--- a/medcat-service/env/app.env
+++ b/medcat-service/env/app.env
@@ -1,5 +1,8 @@
 # Additional settings
 
+MEDCAT_SERVICE_IMAGE_RELEASE_VERSION=latest
+MEDCAT_SERVICE_DOCKER_IMAGE=cogstacksystems/medcat-service:${MEDCAT_SERVICE_IMAGE_RELEASE_VERSION:-latest}
+
 # IMPORTANT : log level set 
 # CRITICAL - 50, ERROR - 40, WARNING - 30, INFO - 20, DEBUG - 10, NOTSET - 0
 APP_LOG_LEVEL=INFO
diff --git a/medcat-service/env/app_deid.env b/medcat-service/env/app_deid.env
index 3e07246d8..56607c72b 100755
--- a/medcat-service/env/app_deid.env
+++ b/medcat-service/env/app_deid.env
@@ -1,5 +1,8 @@
 # Additional settings
 
+MEDCAT_SERVICE_IMAGE_RELEASE_VERSION=latest
+MEDCAT_SERVICE_DOCKER_IMAGE=cogstacksystems/medcat-service:${MEDCAT_SERVICE_IMAGE_RELEASE_VERSION:-latest}
+
 # IMPORTANT : log level set 
 # CRITICAL - 50, ERROR - 40, WARNING - 30, INFO - 20, DEBUG - 10, NOTSET - 0
 APP_LOG_LEVEL=INFO
@@ -40,4 +43,4 @@ APP_TORCH_THREADS=8
 
 # GPU SETTING
 # CAUTION, use only if you are using the GPU docker image.
-APP_CUDA_DEVICE_COUNT=1
+APP_CUDA_DEVICE_COUNT=-1
diff --git a/medcat-service/env/general.env b/medcat-service/env/general.env
index d3f7612c3..dbefe43f8 100644
--- a/medcat-service/env/general.env
+++ b/medcat-service/env/general.env
@@ -6,3 +6,5 @@
 # possible values: amd64, arm64
 CPU_ARCHITECTURE=amd64
 DOCKER_DEFAULT_PLATFORM=linux/${CPU_ARCHITECTURE:-amd64}
+
+DOCKER_SHM_SIZE=1g
diff --git a/medcat-service/env/medcat_deid.env b/medcat-service/env/medcat_deid.env
index ebecb559a..cd5bc3875 100755
--- a/medcat-service/env/medcat_deid.env
+++ b/medcat-service/env/medcat_deid.env
@@ -1,6 +1,6 @@
 # IMPORTANT : log level set 
 # CRITICAL - 50, ERROR - 40, WARNING - 30, INFO - 20, DEBUG - 10, NOTSET - 0
-MEDCAT_LOG_LEVEL=40
+MEDCAT_LOG_LEVEL=10
 
 # can be left empty if you want to use the CDB spacy model, otherwise, possible values are : en_core_sci_lg, en_core_sci_md, etc...
 # please note that only SpaCy models that are installed on the docker container are usable (take a look at the .Dockerfile and install any other custom models you may want to use)
diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index b40684dd6..945d6255b 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -1,5 +1,4 @@
 import gradio as gr
-import uvicorn
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 
@@ -38,4 +37,7 @@ async def healthcheck_failed_exception_handler(request: Request, exc: HealthChec
 
 
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    # Only run this when directly executing `python main.py` for local dev.
+    import os
+    import uvicorn
+    uvicorn.run("medcat_service.main:app", host="0.0.0.0", port=int(os.environ.get("SERVER_PORT", 8000)))
diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py
index 25ff77ba7..0b2a426c3 100644
--- a/medcat-service/medcat_service/nlp_processor/medcat_processor.py
+++ b/medcat-service/medcat_service/nlp_processor/medcat_processor.py
@@ -31,7 +31,11 @@ def __init__(self, settings: Settings):
         self.service_settings = settings
 
         self.log = logging.getLogger(self.__class__.__name__)
-        self.log.setLevel(level=self.service_settings.app_log_level)
+        if not self.log.handlers:
+            handler = logging.StreamHandler()
+            handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
+            self.log.addHandler(handler)
+        self.log.setLevel(self.service_settings.app_log_level)
 
         self.log.debug("APP log level set to : " + str(self.service_settings.app_log_level))
         self.log.debug("MedCAT log level set to : " + str(self.service_settings.medcat_log_level))
@@ -58,7 +62,7 @@ def __init__(self, settings: Settings):
         # Not sure what happens if torch is using a cuda device
         if self.service_settings.torch_threads > 0:
             torch.set_num_threads(self.service_settings.torch_threads)
-            self.log.info("Torch threads set to " + str(self.service_settings.torch_threads))
+        self.log.info("Torch threads set to " + str(self.service_settings.torch_threads))
 
         self.cat: DeIdModel | CAT = self._create_cat()
 
@@ -406,9 +410,10 @@ def _generate_result(self, in_documents, annotations, elapsed_time):
                     footer=in_ct.get("footer"),
                 )
             elif self.service_settings.deid_mode:
+                out_text = str(annotations[i]) if i < len(annotations) else str(in_ct["text"])
                 out_res = ProcessResult(
                     # TODO: DEID mode is passing the resulting text in the annotations field here but shouldnt.
-                    text=str(annotations[i]),
+                    text=out_text,
                     # TODO: DEID bulk mode should also be able to return the list of annotations found,
                     #  to match the features of the singular api, this needs to be matched by MedCAT. CU-869a6wc6z
                     annotations=[],
diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh
index 0ec8a5fdf..5009c521f 100644
--- a/medcat-service/start_service_production.sh
+++ b/medcat-service/start_service_production.sh
@@ -40,8 +40,15 @@ SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)s
 #
 # Using Gunicorn, even though FastAPI recommends Uvicorn, to keep support for the post_fork config
 echo "Starting up the service using gunicorn server ..."
-gunicorn --bind $SERVER_HOST:$SERVER_PORT --workers=$SERVER_WORKERS --threads=$SERVER_THREADS --timeout=$SERVER_WORKER_TIMEOUT \
-	 --access-logformat="$SERVER_ACCESS_LOG_FORMAT" --access-logfile=- --log-file=- --log-level info \
-	 --config /cat/config.py \
+exec gunicorn \
+  --bind "$SERVER_HOST:$SERVER_PORT" \
+  --workers="$SERVER_WORKERS" \
+  --threads="$SERVER_THREADS" \
+  --timeout="$SERVER_WORKER_TIMEOUT" \
+  --access-logformat="$SERVER_ACCESS_LOG_FORMAT" \
+  --access-logfile=- \
+  --error-logfile=- \
+  --log-level info \
+  --config /cat/config.py \
   --worker-class uvicorn.workers.UvicornWorker \
   medcat_service.main:app

From 393d18298ce91ce12f5f4961a1b3638f6de31b4f Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Wed, 17 Sep 2025 16:47:36 +0100
Subject: [PATCH 08/16] Bugfix: memory leak when caching app processor state
 (CU-869ah00vt).

---
 medcat-service/medcat_service/dependencies.py |  4 ++--
 medcat-service/medcat_service/main.py         | 12 ++++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/medcat-service/medcat_service/dependencies.py b/medcat-service/medcat_service/dependencies.py
index f9cbab061..d130966b1 100644
--- a/medcat-service/medcat_service/dependencies.py
+++ b/medcat-service/medcat_service/dependencies.py
@@ -10,14 +10,14 @@
 log = logging.getLogger(__name__)
 
 
-@lru_cache
+@lru_cache(maxsize=1)
 def get_settings() -> Settings:
     settings = Settings()
     log.debug("Using settings: %s", settings)
     return settings
 
 
-@lru_cache
+@lru_cache(maxsize=1)
 def get_medcat_processor(settings: Annotated[Settings, Depends(get_settings)]) -> MedCatProcessor:
     log.debug("Creating new Medcat Processsor using settings: %s", settings)
     return MedCatProcessor(settings)
diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index 945d6255b..98976edaa 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -2,8 +2,10 @@
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 
+from medcat_service.config import Settings
 from medcat_service.demo.gradio_demo import io
 from medcat_service.dependencies import get_settings
+from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 from medcat_service.routers import admin, health, process
 from medcat_service.types import HealthCheckFailedException
 
@@ -28,6 +30,16 @@
 app.include_router(health.router)
 app.include_router(process.router)
 
+
+@app.on_event("startup")
+def load_medcat():
+    app.state.settings = Settings()
+    app.state.medcat = MedCatProcessor(app.state.settings)
+
+@app.on_event("shutdown")
+def unload_medcat():
+    app.state.medcat.close()
+
 gr.mount_gradio_app(app, io, path="/demo")
 
 

From f573b4d993bfaf10b4447bf6fc52410ddde16395 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Wed, 17 Sep 2025 16:57:28 +0100
Subject: [PATCH 09/16] Undo state changes (Not Implemented yet).

---
 medcat-service/medcat_service/main.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index 98976edaa..8947b1f28 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -30,16 +30,6 @@
 app.include_router(health.router)
 app.include_router(process.router)
 
-
-@app.on_event("startup")
-def load_medcat():
-    app.state.settings = Settings()
-    app.state.medcat = MedCatProcessor(app.state.settings)
-
-@app.on_event("shutdown")
-def unload_medcat():
-    app.state.medcat.close()
-
 gr.mount_gradio_app(app, io, path="/demo")
 
 

From 9493de46bfa7ef7829f25a8d45e71453e3590bf1 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Wed, 17 Sep 2025 17:06:33 +0100
Subject: [PATCH 10/16] Linting.

---
 medcat-service/medcat_service/main.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index 8947b1f28..945d6255b 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -2,10 +2,8 @@
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 
-from medcat_service.config import Settings
 from medcat_service.demo.gradio_demo import io
 from medcat_service.dependencies import get_settings
-from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 from medcat_service.routers import admin, health, process
 from medcat_service.types import HealthCheckFailedException
 

From 1c75c6805a378acd9fff42b31f423572c0125b33 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Thu, 18 Sep 2025 09:55:46 +0100
Subject: [PATCH 11/16] Added gunicorn settings to app env files.

---
 medcat-service/env/app.env                 |  2 ++
 medcat-service/env/app_deid.env            |  4 +++-
 medcat-service/start_service_production.sh | 11 +++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/medcat-service/env/app.env b/medcat-service/env/app.env
index cb68c397c..cc8fe55be 100755
--- a/medcat-service/env/app.env
+++ b/medcat-service/env/app.env
@@ -36,6 +36,8 @@ SERVER_PORT=5000
 SERVER_WORKERS=1
 SERVER_WORKER_TIMEOUT=300
 SERVER_THREADS=1
+SERVER_GUNICORN_MAX_REQUESTS=1000
+SERVER_GUNICORN_MAX_REQUESTS_JITTER=50
 
 # set the number of torch threads, this should be used ONLY if you are using CPUs and the default image
 # set to -1 or 0 if you are using GPU
diff --git a/medcat-service/env/app_deid.env b/medcat-service/env/app_deid.env
index 56607c72b..09cb6313c 100755
--- a/medcat-service/env/app_deid.env
+++ b/medcat-service/env/app_deid.env
@@ -21,7 +21,7 @@ APP_MODEL_REL_PATH_LIST=
 # MedCAT Model Pack path
 # IMPORTANT: if this parameter has value IT WILL BE LOADED FIRST OVER EVERYTHING ELSE (CDB, Vocab, MetaCATs, etc.) declared above.
 # Respect the same paths as above : /cat/models/model_pack_name.zip
-APP_MEDCAT_MODEL_PACK=
+APP_MEDCAT_MODEL_PACK=/cat/models/medcat_v2_deid_model_691c3f6a6e5400e7_686dfbf9c3c664e0.zip
 
 # optionally, an filter the reported concepts by CUIs
 # APP_MODEL_CUI_FILTER_PATH=/cat/models/cui_filter.txt
@@ -36,6 +36,8 @@ SERVER_PORT=5000
 SERVER_WORKERS=1
 SERVER_WORKER_TIMEOUT=300
 SERVER_THREADS=1
+SERVER_GUNICORN_MAX_REQUESTS=1000
+SERVER_GUNICORN_MAX_REQUESTS_JITTER=50
 
 # set the number of torch threads, this should be used ONLY if you are using CPUs and the default image
 # set to -1 or 0 if you are using GPU
diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh
index 5009c521f..294bb417a 100644
--- a/medcat-service/start_service_production.sh
+++ b/medcat-service/start_service_production.sh
@@ -33,6 +33,15 @@ if [ -z ${SERVER_WORKER_TIMEOUT+x} ]; then
   echo "SERVER_WORKER_TIMEOUT is unset -- setting to default (sec): $SERVER_WORKER_TIMEOUT";
 fi
 
+if [ -z ${SERVER_GUNICORN_MAX_REQUESTS+x} ]; then
+  SERVER_WORKER_TIMSERVER_GUNICORN_MAX_REQUESTSEOUT=3600;
+  echo "SERVER_GUNICORN_MAX_REQUESTS is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS";
+fi
+
+if [ -z ${SERVER_GUNICORN_MAX_REQUESTS_JITTER+x} ]; then
+  SERVER_GUNICORN_MAX_REQUESTS_JITTER=50;
+  echo "SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS_JITTER";
+fi
 
 SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)s\""
 
@@ -50,5 +59,7 @@ exec gunicorn \
   --error-logfile=- \
   --log-level info \
   --config /cat/config.py \
+  --max-requests="$SERVER_GUNICORN_MAX_REQUESTS" \
+  --max-requests-jitter="$SERVER_GUNICORN_MAX_REQUESTS_JITTER" \
   --worker-class uvicorn.workers.UvicornWorker \
   medcat_service.main:app

From d40b261e5d0c484b5697c5d67e612d24e549ba89 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Thu, 18 Sep 2025 15:53:55 +0100
Subject: [PATCH 12/16] Env file update + bash prod startup script update.

---
 medcat-service/env/app.env                 | 2 +-
 medcat-service/env/app_deid.env            | 2 +-
 medcat-service/start_service_production.sh | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/medcat-service/env/app.env b/medcat-service/env/app.env
index cc8fe55be..9a7641611 100755
--- a/medcat-service/env/app.env
+++ b/medcat-service/env/app.env
@@ -45,4 +45,4 @@ APP_TORCH_THREADS=8
 
 # GPU SETTING
 # CAUTION, use only if you are using the GPU docker image.
-APP_CUDA_DEVICE_COUNT=1
+APP_CUDA_DEVICE_COUNT=-1
\ No newline at end of file
diff --git a/medcat-service/env/app_deid.env b/medcat-service/env/app_deid.env
index 09cb6313c..e59c7ad2f 100755
--- a/medcat-service/env/app_deid.env
+++ b/medcat-service/env/app_deid.env
@@ -21,7 +21,7 @@ APP_MODEL_REL_PATH_LIST=
 # MedCAT Model Pack path
 # IMPORTANT: if this parameter has value IT WILL BE LOADED FIRST OVER EVERYTHING ELSE (CDB, Vocab, MetaCATs, etc.) declared above.
 # Respect the same paths as above : /cat/models/model_pack_name.zip
-APP_MEDCAT_MODEL_PACK=/cat/models/medcat_v2_deid_model_691c3f6a6e5400e7_686dfbf9c3c664e0.zip
+APP_MEDCAT_MODEL_PACK=
 
 # optionally, an filter the reported concepts by CUIs
 # APP_MODEL_CUI_FILTER_PATH=/cat/models/cui_filter.txt
diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh
index 294bb417a..0f5b1437a 100644
--- a/medcat-service/start_service_production.sh
+++ b/medcat-service/start_service_production.sh
@@ -34,7 +34,7 @@ if [ -z ${SERVER_WORKER_TIMEOUT+x} ]; then
 fi
 
 if [ -z ${SERVER_GUNICORN_MAX_REQUESTS+x} ]; then
-  SERVER_WORKER_TIMSERVER_GUNICORN_MAX_REQUESTSEOUT=3600;
+  SERVER_GUNICORN_MAX_REQUESTS=1000;
   echo "SERVER_GUNICORN_MAX_REQUESTS is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS";
 fi
 

From 9cbe7fdbcb9c684ec01691197b3cc67363c37482 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Thu, 18 Sep 2025 15:55:04 +0100
Subject: [PATCH 13/16] Lifespan usage instead of cache for mc-processor.

---
 .../medcat_service/demo/gradio_demo.py        |  5 ++-
 medcat-service/medcat_service/dependencies.py | 23 +++++++----
 medcat-service/medcat_service/main.py         | 39 +++++++++++++------
 3 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py
index fbe7db96a..280309570 100644
--- a/medcat-service/medcat_service/demo/gradio_demo.py
+++ b/medcat-service/medcat_service/demo/gradio_demo.py
@@ -2,8 +2,9 @@
 
 import gradio as gr
 from pydantic import BaseModel
+import logging
 
-from medcat_service.dependencies import get_medcat_processor, get_settings
+from medcat_service.dependencies import get_global_processor
 from medcat_service.types import ProcessAPIInputContent
 from medcat_service.types_entities import Entity
 
@@ -96,7 +97,7 @@ def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnno
 
 
 def process_input(input_text: str):
-    processor = get_medcat_processor(get_settings())
+    processor = get_global_processor()
     input = ProcessAPIInputContent(text=input_text)
 
     result = processor.process_content(input.model_dump())
diff --git a/medcat-service/medcat_service/dependencies.py b/medcat-service/medcat_service/dependencies.py
index d130966b1..bfe295848 100644
--- a/medcat-service/medcat_service/dependencies.py
+++ b/medcat-service/medcat_service/dependencies.py
@@ -1,26 +1,33 @@
 import logging
-from functools import lru_cache
 from typing import Annotated
 
-from fastapi import Depends
+from fastapi import Depends, Request
 
 from medcat_service.config import Settings
 from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 
 log = logging.getLogger(__name__)
 
+def get_medcat_processor(request: Request) -> MedCatProcessor:
+    proc = getattr(request.app.state, "medcat", None)
+    if proc is None:
+        raise RuntimeError("MedCatProcessor is not initialised on app.state")
+    return proc
 
-@lru_cache(maxsize=1)
 def get_settings() -> Settings:
     settings = Settings()
     log.debug("Using settings: %s", settings)
     return settings
 
+processor_singleton: MedCatProcessor | None = None
 
-@lru_cache(maxsize=1)
-def get_medcat_processor(settings: Annotated[Settings, Depends(get_settings)]) -> MedCatProcessor:
-    log.debug("Creating new Medcat Processsor using settings: %s", settings)
-    return MedCatProcessor(settings)
+def set_global_processor(proc: MedCatProcessor):
+    global processor_singleton
+    processor_singleton = proc
 
+def get_global_processor() -> MedCatProcessor:
+    if processor_singleton is None:
+        raise RuntimeError("MedCatProcessor has not been initialised yet")
+    return processor_singleton
 
-MedCatProcessorDep = Annotated[MedCatProcessor, Depends(get_medcat_processor)]
+MedCatProcessorDep = Annotated[MedCatProcessor, Depends(get_medcat_processor)]
\ No newline at end of file
diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index 945d6255b..8e4f03f6f 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -1,28 +1,45 @@
 import gradio as gr
-from fastapi import FastAPI, Request
+import logging
+
+from contextlib import asynccontextmanager
+from fastapi import Depends, FastAPI, Request
 from fastapi.responses import JSONResponse
 
+from medcat_service.config import Settings
 from medcat_service.demo.gradio_demo import io
-from medcat_service.dependencies import get_settings
+
+from medcat_service.dependencies import set_global_processor
+from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 from medcat_service.routers import admin, health, process
 from medcat_service.types import HealthCheckFailedException
 
-settings = get_settings()
 
-app = FastAPI(
-    title="MedCAT Service",
-    summary="MedCAT Service",
-    contact={
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+
+    log = logging.getLogger(__name__)
+    settings = Settings()
+    medcat = MedCatProcessor(settings)
+    log.debug("Using settings: %s", settings)
+
+    app.state.settings = settings
+    app.state.medcat = medcat
+    app.state.title="MedCAT Service",
+    app.state.summary="MedCAT Service",
+    app.state.contact={
         "name": "CogStack Org",
         "url": "https://cogstack.org/",
         "email": "contact@cogstack.org",
     },
-    license_info={
+    app.state.license_info={
         "name": "Apache 2.0",
         "identifier": "Apache-2.0",
     },
-    root_path=settings.app_root_path,
-)
+    app.state.root_path=settings.app_root_path
+    set_global_processor(medcat)
+    yield
+
+app = FastAPI(lifespan=lifespan)
 
 app.include_router(admin.router)
 app.include_router(health.router)
@@ -30,12 +47,10 @@
 
 gr.mount_gradio_app(app, io, path="/demo")
 
-
 @app.exception_handler(HealthCheckFailedException)
 async def healthcheck_failed_exception_handler(request: Request, exc: HealthCheckFailedException):
     return JSONResponse(status_code=503, content=exc.reason.model_dump())
 
-
 if __name__ == "__main__":
     # Only run this when directly executing `python main.py` for local dev.
     import os

From ea8f55d3364de3dffbdd1e89847ede6f75eb09a6 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Thu, 18 Sep 2025 19:52:04 +0100
Subject: [PATCH 14/16] Singleton implementation.

---
 .../medcat_service/demo/gradio_demo.py        |  1 -
 medcat-service/medcat_service/dependencies.py | 43 +++++++++++++------
 medcat-service/medcat_service/main.py         | 23 +++++-----
 3 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py
index 280309570..0ef5fe797 100644
--- a/medcat-service/medcat_service/demo/gradio_demo.py
+++ b/medcat-service/medcat_service/demo/gradio_demo.py
@@ -2,7 +2,6 @@
 
 import gradio as gr
 from pydantic import BaseModel
-import logging
 
 from medcat_service.dependencies import get_global_processor
 from medcat_service.types import ProcessAPIInputContent
diff --git a/medcat-service/medcat_service/dependencies.py b/medcat-service/medcat_service/dependencies.py
index bfe295848..ce4ba5626 100644
--- a/medcat-service/medcat_service/dependencies.py
+++ b/medcat-service/medcat_service/dependencies.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Annotated
+from typing import Annotated, Optional
 
 from fastapi import Depends, Request
 
@@ -8,26 +8,45 @@
 
 log = logging.getLogger(__name__)
 
-def get_medcat_processor(request: Request) -> MedCatProcessor:
-    proc = getattr(request.app.state, "medcat", None)
-    if proc is None:
-        raise RuntimeError("MedCatProcessor is not initialised on app.state")
-    return proc
+processor_singleton: Optional[MedCatProcessor] = None
+settings_singleton: Optional[Settings] = None
+
+
+def get_settings(request: Request) -> Settings:
+    _settings = request.app.state.settings
+    log.debug("Using settings: %s", _settings)
+    return _settings
+
+
+def set_global_settings(settings: Settings) -> None:
+    global settings_singleton
+    settings_singleton = settings
+
 
-def get_settings() -> Settings:
-    settings = Settings()
-    log.debug("Using settings: %s", settings)
-    return settings
+def get_global_settings() -> Settings:
+    if settings_singleton is None:
+        raise RuntimeError("Settings have not been initialised yet")
+    return settings_singleton
 
-processor_singleton: MedCatProcessor | None = None
 
 def set_global_processor(proc: MedCatProcessor):
     global processor_singleton
     processor_singleton = proc
 
+
+def get_medcat_processor(request: Request) -> MedCatProcessor:
+    proc = getattr(request.app.state, "medcat", None)
+    log.debug("Getting MedCatProcessor from app.state: %s", proc)
+    if proc is None:
+        raise RuntimeError("MedCatProcessor is not initialised on app.state")
+    return proc
+
+
 def get_global_processor() -> MedCatProcessor:
     if processor_singleton is None:
         raise RuntimeError("MedCatProcessor has not been initialised yet")
     return processor_singleton
 
-MedCatProcessorDep = Annotated[MedCatProcessor, Depends(get_medcat_processor)]
\ No newline at end of file
+
+SettingsDep = Annotated[Settings, Depends(get_settings)]
+MedCatProcessorDep = Annotated[MedCatProcessor, Depends(get_medcat_processor)]
diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index 8e4f03f6f..7b27e717b 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -1,13 +1,12 @@
-import gradio as gr
 import logging
-
 from contextlib import asynccontextmanager
-from fastapi import Depends, FastAPI, Request
+
+import gradio as gr
+from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 
 from medcat_service.config import Settings
 from medcat_service.demo.gradio_demo import io
-
 from medcat_service.dependencies import set_global_processor
 from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 from medcat_service.routers import admin, health, process
@@ -20,23 +19,25 @@ async def lifespan(app: FastAPI):
     log = logging.getLogger(__name__)
     settings = Settings()
     medcat = MedCatProcessor(settings)
-    log.debug("Using settings: %s", settings)
 
     app.state.settings = settings
     app.state.medcat = medcat
-    app.state.title="MedCAT Service",
-    app.state.summary="MedCAT Service",
-    app.state.contact={
+    app.state.title = "MedCAT Service",
+    app.state.summary = "MedCAT Service",
+    app.state.contact = {
         "name": "CogStack Org",
         "url": "https://cogstack.org/",
         "email": "contact@cogstack.org",
     },
-    app.state.license_info={
+    app.state.license_info = {
         "name": "Apache 2.0",
         "identifier": "Apache-2.0",
     },
-    app.state.root_path=settings.app_root_path
+    app.state.root_path = settings.app_root_path
+
     set_global_processor(medcat)
+    log.debug("MedCAT Service lifespan setup complete")
+
     yield
 
 app = FastAPI(lifespan=lifespan)
@@ -47,6 +48,7 @@ async def lifespan(app: FastAPI):
 
 gr.mount_gradio_app(app, io, path="/demo")
 
+
 @app.exception_handler(HealthCheckFailedException)
 async def healthcheck_failed_exception_handler(request: Request, exc: HealthCheckFailedException):
     return JSONResponse(status_code=503, content=exc.reason.model_dump())
@@ -54,5 +56,6 @@ async def healthcheck_failed_exception_handler(request: Request, exc: HealthChec
 if __name__ == "__main__":
     # Only run this when directly executing `python main.py` for local dev.
     import os
+
     import uvicorn
     uvicorn.run("medcat_service.main:app", host="0.0.0.0", port=int(os.environ.get("SERVER_PORT", 8000)))

From 8d2947f82a7e46dd45390a34bcda0228f9bb83f1 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 19 Sep 2025 08:27:35 +0100
Subject: [PATCH 15/16] Updated tests.

---
 medcat-service/medcat_service/main.py         | 14 +++++++--
 medcat-service/medcat_service/test/common.py  | 11 +++++++
 .../medcat_service/test/test_admin.py         |  3 +-
 .../medcat_service/test/test_deid.py          | 29 +++++++++++--------
 .../medcat_service/test/test_service.py       | 13 ++++++++-
 5 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index 7b27e717b..c83ae9457 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -17,8 +17,18 @@
 async def lifespan(app: FastAPI):
 
     log = logging.getLogger(__name__)
-    settings = Settings()
-    medcat = MedCatProcessor(settings)
+    log.debug("Starting MedCAT Service lifespan setup")
+
+    # allow overriding settings and medcat processor for testing
+    settings = getattr(app.state, "settings", None)
+    if settings is None:
+        settings = Settings()
+        app.state.settings = settings
+
+    medcat = getattr(app.state, "medcat", None)
+    if medcat is None:
+        medcat = MedCatProcessor(settings)
+        app.state.medcat = medcat
 
     app.state.settings = settings
     app.state.medcat = medcat
diff --git a/medcat-service/medcat_service/test/common.py b/medcat-service/medcat_service/test/common.py
index 4f1747868..2f3efd5a7 100644
--- a/medcat-service/medcat_service/test/common.py
+++ b/medcat-service/medcat_service/test/common.py
@@ -3,9 +3,20 @@
 import logging
 import os
 
+from medcat_service.config import Settings
+
 log = logging.getLogger(__name__)
 
 
+def get_settings_override_deid():
+    return Settings(
+        deid_mode=True,
+        deid_redact=True,
+        APP_LOG_LEVEL=10,
+        MEDCAT_LOG_LEVEL=10
+    )  # type: ignore
+
+
 def get_example_short_document():
     """
     Returns an example short document to be processed with possibly minimal set of annotations to be validated
diff --git a/medcat-service/medcat_service/test/test_admin.py b/medcat-service/medcat_service/test/test_admin.py
index b4b82b048..22cbebd8b 100644
--- a/medcat-service/medcat_service/test/test_admin.py
+++ b/medcat-service/medcat_service/test/test_admin.py
@@ -11,7 +11,8 @@ class TestAdminApi(unittest.TestCase):
 
     def setUp(self):
         setup_medcat_processor()
-        self.client = TestClient(app)
+        self._client_ctx = TestClient(app)
+        self.client = self._client_ctx.__enter__()
 
     def testGetInfo(self):
         response = self.client.get(self.ENDPOINT_INFO_ENDPOINT)
diff --git a/medcat-service/medcat_service/test/test_deid.py b/medcat-service/medcat_service/test/test_deid.py
index 24fe8063a..b24a6c340 100644
--- a/medcat-service/medcat_service/test/test_deid.py
+++ b/medcat-service/medcat_service/test/test_deid.py
@@ -4,13 +4,8 @@
 from fastapi.testclient import TestClient
 
 import medcat_service.test.common as common
-from medcat_service.config import Settings
-from medcat_service.dependencies import get_settings
 from medcat_service.main import app
-
-
-def get_settings_override():
-    return Settings(deid_mode=True, deid_redact=True)
+from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 
 
 class TestMedcatServiceDeId(unittest.TestCase):
@@ -25,14 +20,27 @@ def setUpClass(cls):
         if "APP_MEDCAT_MODEL_PACK" not in os.environ:
             os.environ["APP_MEDCAT_MODEL_PACK"] = "./models/examples/example-deid-model-pack.zip"
 
-        app.dependency_overrides[get_settings] = get_settings_override
-        cls.client = TestClient(app)
+        test_settings = common.get_settings_override_deid()
+        app.state.settings = test_settings
+        app.state.medcat = MedCatProcessor(test_settings)
+
+        cls._client_ctx = TestClient(app)
+        cls.client = cls._client_ctx.__enter__()
+
+    @classmethod
+    def tearDownClass(cls):
+        # exit context so shutdown runs
+        cls._client_ctx.__exit__(None, None, None)
+        app.dependency_overrides.clear()
+
+    def test_settings_override_applied(self):
+        assert app.state.settings.deid_mode is True
+        assert app.state.settings.deid_redact is True
 
     def test_deid_process_api(self):
         payload = common.create_payload_content_from_doc_single(
             "John had been diagnosed with acute Kidney Failure the week before"
         )
-        app.dependency_overrides[get_settings] = get_settings_override
 
         response = self.client.post(self.ENDPOINT_PROCESS_SINGLE, json=payload)
         self.assertEqual(response.status_code, 200)
@@ -54,13 +62,11 @@ def test_deid_process_api(self):
         self.assertEqual(ann["pretty_name"], expected["pretty_name"])
         self.assertEqual(ann["source_value"], expected["source_value"])
         self.assertEqual(ann["cui"], expected["cui"])
-        app.dependency_overrides = {}
 
     def test_deid_process_bulk_api(self):
         payload = common.create_payload_content_from_doc_bulk([
             "John had been diagnosed with acute Kidney Failure the week before"
         ])
-        app.dependency_overrides[get_settings] = get_settings_override
 
         response = self.client.post(self.ENDPOINT_PROCESS_BULK, json=payload)
         self.assertEqual(response.status_code, 200)
@@ -87,4 +93,3 @@ def test_deid_process_bulk_api(self):
         # self.assertEqual(ann["pretty_name"], expected["pretty_name"])
         # self.assertEqual(ann["source_value"], expected["source_value"])
         # self.assertEqual(ann["cui"], expected["cui"])
-        app.dependency_overrides = {}
diff --git a/medcat-service/medcat_service/test/test_service.py b/medcat-service/medcat_service/test/test_service.py
index 92362fe99..f3160f47c 100644
--- a/medcat-service/medcat_service/test/test_service.py
+++ b/medcat-service/medcat_service/test/test_service.py
@@ -6,7 +6,9 @@
 from fastapi.testclient import TestClient
 
 import medcat_service.test.common as common
+from medcat_service.config import Settings
 from medcat_service.main import app
+from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 
 
 class TestMedcatService(unittest.TestCase):
@@ -31,7 +33,11 @@ def setUpClass(cls):
         """
         cls._setup_logging(cls)
         common.setup_medcat_processor()
-        cls.client = TestClient(app)
+        test_settings = Settings()
+        app.state.settings = test_settings
+        app.state.medcat = MedCatProcessor(test_settings)
+        cls._client_ctx = TestClient(app)
+        cls.client = cls._client_ctx.__enter__()
 
     @staticmethod
     def _setup_logging(cls):
@@ -39,6 +45,11 @@ def _setup_logging(cls):
         logging.basicConfig(format=log_format, level=logging.INFO)
         cls.log = logging.getLogger(__name__)
 
+    @classmethod
+    def tearDownClass(cls):
+        # exit context so shutdown runs
+        cls._client_ctx.__exit__(None, None, None)
+
     # unit test helper methods
     #
     def _testProcessSingleDoc(self, doc):

From 4534172d8ff75ccf6683c59eb0dd2b9c036873f9 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 19 Sep 2025 09:44:24 +0100
Subject: [PATCH 16/16] Cleanup.

---
 medcat-service/medcat_service/main.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py
index c83ae9457..0f4d5f9c3 100644
--- a/medcat-service/medcat_service/main.py
+++ b/medcat-service/medcat_service/main.py
@@ -7,7 +7,7 @@
 
 from medcat_service.config import Settings
 from medcat_service.demo.gradio_demo import io
-from medcat_service.dependencies import set_global_processor
+from medcat_service.dependencies import set_global_processor, set_global_settings
 from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 from medcat_service.routers import admin, health, process
 from medcat_service.types import HealthCheckFailedException
@@ -30,8 +30,6 @@ async def lifespan(app: FastAPI):
         medcat = MedCatProcessor(settings)
         app.state.medcat = medcat
 
-    app.state.settings = settings
-    app.state.medcat = medcat
     app.state.title = "MedCAT Service",
     app.state.summary = "MedCAT Service",
     app.state.contact = {
@@ -45,6 +43,7 @@ async def lifespan(app: FastAPI):
     },
     app.state.root_path = settings.app_root_path
 
+    set_global_settings(settings)
     set_global_processor(medcat)
     log.debug("MedCAT Service lifespan setup complete")