Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion medcat-service/env/app.env
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,13 @@ SERVER_PORT=5000
SERVER_WORKERS=1
SERVER_WORKER_TIMEOUT=300
SERVER_THREADS=1
SERVER_GUNICORN_MAX_REQUESTS=1000
SERVER_GUNICORN_MAX_REQUESTS_JITTER=50

# set the number of torch threads, this should be used ONLY if you are using CPUs and the default image
# set to -1 or 0 if you are using GPU
APP_TORCH_THREADS=8

# GPU SETTING
# CAUTION, use only if you are using the GPU docker image.
APP_CUDA_DEVICE_COUNT=1
APP_CUDA_DEVICE_COUNT=-1
2 changes: 2 additions & 0 deletions medcat-service/env/app_deid.env
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ SERVER_PORT=5000
SERVER_WORKERS=1
SERVER_WORKER_TIMEOUT=300
SERVER_THREADS=1
SERVER_GUNICORN_MAX_REQUESTS=1000
SERVER_GUNICORN_MAX_REQUESTS_JITTER=50

# set the number of torch threads, this should be used ONLY if you are using CPUs and the default image
# set to -1 or 0 if you are using GPU
Expand Down
4 changes: 2 additions & 2 deletions medcat-service/medcat_service/demo/gradio_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import gradio as gr
from pydantic import BaseModel

from medcat_service.dependencies import get_medcat_processor, get_settings
from medcat_service.dependencies import get_global_processor
from medcat_service.types import ProcessAPIInputContent
from medcat_service.types_entities import Entity

Expand Down Expand Up @@ -96,7 +96,7 @@ def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnno


def process_input(input_text: str):
processor = get_medcat_processor(get_settings())
processor = get_global_processor()
input = ProcessAPIInputContent(text=input_text)

result = processor.process_content(input.model_dump())
Expand Down
50 changes: 38 additions & 12 deletions medcat-service/medcat_service/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,52 @@
import logging
from functools import lru_cache
from typing import Annotated
from typing import Annotated, Optional

from fastapi import Depends
from fastapi import Depends, Request

from medcat_service.config import Settings
from medcat_service.nlp_processor.medcat_processor import MedCatProcessor

log = logging.getLogger(__name__)

processor_singleton: Optional[MedCatProcessor] = None
settings_singleton: Optional[Settings] = None

@lru_cache
def get_settings() -> Settings:
settings = Settings()
log.debug("Using settings: %s", settings)
return settings

def get_settings(request: Request) -> Settings:
_settings = request.app.state.settings
log.debug("Using settings: %s", _settings)
return _settings

@lru_cache
def get_medcat_processor(settings: Annotated[Settings, Depends(get_settings)]) -> MedCatProcessor:
log.debug("Creating new Medcat Processsor using settings: %s", settings)
return MedCatProcessor(settings)

def set_global_settings(settings: Settings) -> None:
global settings_singleton
settings_singleton = settings


def get_global_settings() -> Settings:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed? Doesn't seem to be called anywhere?

if settings_singleton is None:
raise RuntimeError("Settings have not been initialised yet")
return settings_singleton


def set_global_processor(proc: MedCatProcessor):
global processor_singleton
processor_singleton = proc


def get_medcat_processor(request: Request) -> MedCatProcessor:
proc = getattr(request.app.state, "medcat", None)
log.debug("Getting MedCatProcessor from app.state: %s", proc)
if proc is None:
raise RuntimeError("MedCatProcessor is not initialised on app.state")
return proc


def get_global_processor() -> MedCatProcessor:
if processor_singleton is None:
raise RuntimeError("MedCatProcessor has not been initialised yet")
return processor_singleton


SettingsDep = Annotated[Settings, Depends(get_settings)]
MedCatProcessorDep = Annotated[MedCatProcessor, Depends(get_medcat_processor)]
47 changes: 37 additions & 10 deletions medcat-service/medcat_service/main.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,55 @@
import logging
from contextlib import asynccontextmanager

import gradio as gr
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse

from medcat_service.config import Settings
from medcat_service.demo.gradio_demo import io
from medcat_service.dependencies import get_settings
from medcat_service.dependencies import set_global_processor, set_global_settings
from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
from medcat_service.routers import admin, health, process
from medcat_service.types import HealthCheckFailedException

settings = get_settings()

app = FastAPI(
title="MedCAT Service",
summary="MedCAT Service",
contact={
@asynccontextmanager
async def lifespan(app: FastAPI):
Copy link
Collaborator

@alhendrickson alhendrickson Sep 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the more general comment:

Is this really the way to go?

I really don't understand how marts suggestion wouldn't have worked to make a single global settings object, and then stick with the lru_cache from before. I don't see how that could ever make a new Settings object and trigger the cache with a new input

Reason I'm really hesitant is that your change here is basically saying "Following FastAPI documentation causes memory leaks" which I dont think is correct. It's also saying "Dont use FastAPI dependencies" in this project, which also doesnt seem right - it's really not a unique project...

_def_settings: Optional[Settings] = None

def get_settings() -> Settings:
    global _def_settings
    if _def_settings is None:
        _def_settings = Settings()
    return settings

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not suggesting FastAPI deps and guides are wrong or that the cache cant work. Since we already converted m-cat proc to singleton and got rid of the cache, I opted to do the same for settings (as per Mart's suggestion), thats all, but if we want to revert to cache for settings only thats fine. We cannot have both singleton and @lru_cache, that would not be consistent and would not add any value. Regarding the lifespan, I opted for it because we got explicit control over when and where the processor gets created ( there is the potential problem of having the model loaded as soon as the app starts though).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify - I mean, taking a step back, is it not possible to keep it all basically how it was before, just with the settings being explicitly global?

dependencies.py

_def_settings: Optional[Settings] = None

def get_settings() -> Settings:
    global _def_settings
    if _def_settings is None:
        _def_settings = Settings()
    return settings
...
@lru_cache
def get_medcat_processor(settings: Annotated[Settings, Depends(get_settings)]) -> MedCatProcessor:
    log.debug("Creating new Medcat Processsor using settings: %s", settings)
    return MedCatProcessor(settings)

Then there are no changes needed anywhere else (I think?)


log = logging.getLogger(__name__)
log.debug("Starting MedCAT Service lifespan setup")

# allow overriding settings and medcat processor for testing
settings = getattr(app.state, "settings", None)
if settings is None:
settings = Settings()
app.state.settings = settings

medcat = getattr(app.state, "medcat", None)
if medcat is None:
medcat = MedCatProcessor(settings)
app.state.medcat = medcat

app.state.title = "MedCAT Service",
app.state.summary = "MedCAT Service",
app.state.contact = {
"name": "CogStack Org",
"url": "https://cogstack.org/",
"email": "[email protected]",
},
license_info={
app.state.license_info = {
"name": "Apache 2.0",
"identifier": "Apache-2.0",
},
root_path=settings.app_root_path,
)
app.state.root_path = settings.app_root_path

set_global_settings(settings)
set_global_processor(medcat)
log.debug("MedCAT Service lifespan setup complete")

yield

app = FastAPI(lifespan=lifespan)

app.include_router(admin.router)
app.include_router(health.router)
Expand All @@ -35,9 +62,9 @@
async def healthcheck_failed_exception_handler(request: Request, exc: HealthCheckFailedException):
return JSONResponse(status_code=503, content=exc.reason.model_dump())


if __name__ == "__main__":
# Only run this when directly executing `python main.py` for local dev.
import os

import uvicorn
uvicorn.run("medcat_service.main:app", host="0.0.0.0", port=int(os.environ.get("SERVER_PORT", 8000)))
11 changes: 11 additions & 0 deletions medcat-service/medcat_service/test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,20 @@
import logging
import os

from medcat_service.config import Settings

log = logging.getLogger(__name__)


def get_settings_override_deid():
return Settings(
deid_mode=True,
deid_redact=True,
APP_LOG_LEVEL=10,
MEDCAT_LOG_LEVEL=10
) # type: ignore


def get_example_short_document():
"""
Returns an example short document to be processed with possibly minimal set of annotations to be validated
Expand Down
3 changes: 2 additions & 1 deletion medcat-service/medcat_service/test/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ class TestAdminApi(unittest.TestCase):

def setUp(self):
setup_medcat_processor()
self.client = TestClient(app)
self._client_ctx = TestClient(app)
self.client = self._client_ctx.__enter__()

def testGetInfo(self):
response = self.client.get(self.ENDPOINT_INFO_ENDPOINT)
Expand Down
29 changes: 17 additions & 12 deletions medcat-service/medcat_service/test/test_deid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,8 @@
from fastapi.testclient import TestClient

import medcat_service.test.common as common
from medcat_service.config import Settings
from medcat_service.dependencies import get_settings
from medcat_service.main import app


def get_settings_override():
return Settings(deid_mode=True, deid_redact=True)
from medcat_service.nlp_processor.medcat_processor import MedCatProcessor


class TestMedcatServiceDeId(unittest.TestCase):
Expand All @@ -25,14 +20,27 @@ def setUpClass(cls):
if "APP_MEDCAT_MODEL_PACK" not in os.environ:
os.environ["APP_MEDCAT_MODEL_PACK"] = "./models/examples/example-deid-model-pack.zip"

app.dependency_overrides[get_settings] = get_settings_override
cls.client = TestClient(app)
test_settings = common.get_settings_override_deid()
app.state.settings = test_settings
app.state.medcat = MedCatProcessor(test_settings)

cls._client_ctx = TestClient(app)
cls.client = cls._client_ctx.__enter__()

@classmethod
def tearDownClass(cls):
# exit context so shutdown runs
cls._client_ctx.__exit__(None, None, None)
app.dependency_overrides.clear()

def test_settings_override_applied(self):
assert app.state.settings.deid_mode is True
assert app.state.settings.deid_redact is True

def test_deid_process_api(self):
payload = common.create_payload_content_from_doc_single(
"John had been diagnosed with acute Kidney Failure the week before"
)
app.dependency_overrides[get_settings] = get_settings_override

response = self.client.post(self.ENDPOINT_PROCESS_SINGLE, json=payload)
self.assertEqual(response.status_code, 200)
Expand All @@ -54,13 +62,11 @@ def test_deid_process_api(self):
self.assertEqual(ann["pretty_name"], expected["pretty_name"])
self.assertEqual(ann["source_value"], expected["source_value"])
self.assertEqual(ann["cui"], expected["cui"])
app.dependency_overrides = {}

def test_deid_process_bulk_api(self):
payload = common.create_payload_content_from_doc_bulk([
"John had been diagnosed with acute Kidney Failure the week before"
])
app.dependency_overrides[get_settings] = get_settings_override

response = self.client.post(self.ENDPOINT_PROCESS_BULK, json=payload)
self.assertEqual(response.status_code, 200)
Expand All @@ -87,4 +93,3 @@ def test_deid_process_bulk_api(self):
# self.assertEqual(ann["pretty_name"], expected["pretty_name"])
# self.assertEqual(ann["source_value"], expected["source_value"])
# self.assertEqual(ann["cui"], expected["cui"])
app.dependency_overrides = {}
13 changes: 12 additions & 1 deletion medcat-service/medcat_service/test/test_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
from fastapi.testclient import TestClient

import medcat_service.test.common as common
from medcat_service.config import Settings
from medcat_service.main import app
from medcat_service.nlp_processor.medcat_processor import MedCatProcessor


class TestMedcatService(unittest.TestCase):
Expand All @@ -31,14 +33,23 @@ def setUpClass(cls):
"""
cls._setup_logging(cls)
common.setup_medcat_processor()
cls.client = TestClient(app)
test_settings = Settings()
app.state.settings = test_settings
app.state.medcat = MedCatProcessor(test_settings)
cls._client_ctx = TestClient(app)
cls.client = cls._client_ctx.__enter__()

@staticmethod
def _setup_logging(cls):
log_format = '[%(asctime)s] [%(levelname)s] %(name)s: %(message)s'
logging.basicConfig(format=log_format, level=logging.INFO)
cls.log = logging.getLogger(__name__)

@classmethod
def tearDownClass(cls):
# exit context so shutdown runs
cls._client_ctx.__exit__(None, None, None)

# unit test helper methods
#
def _testProcessSingleDoc(self, doc):
Expand Down
11 changes: 11 additions & 0 deletions medcat-service/start_service_production.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ if [ -z ${SERVER_WORKER_TIMEOUT+x} ]; then
echo "SERVER_WORKER_TIMEOUT is unset -- setting to default (sec): $SERVER_WORKER_TIMEOUT";
fi

if [ -z ${SERVER_GUNICORN_MAX_REQUESTS+x} ]; then
SERVER_GUNICORN_MAX_REQUESTS=1000;
echo "SERVER_GUNICORN_MAX_REQUESTS is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS";
fi

if [ -z ${SERVER_GUNICORN_MAX_REQUESTS_JITTER+x} ]; then
SERVER_GUNICORN_MAX_REQUESTS_JITTER=50;
echo "SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS_JITTER";
fi

SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)s\""

Expand All @@ -50,5 +59,7 @@ exec gunicorn \
--error-logfile=- \
--log-level info \
--config /cat/config.py \
--max-requests="$SERVER_GUNICORN_MAX_REQUESTS" \
--max-requests-jitter="$SERVER_GUNICORN_MAX_REQUESTS_JITTER" \
--worker-class uvicorn.workers.UvicornWorker \
medcat_service.main:app
Loading