add changes based on feedback

jsntsay · jsntsay · commit 99da2deed403 · 2025-10-27T11:36:53.000-04:00
Signed-off-by: Jason Tsay &lt;jason.tsay@ibm.com&gt;
diff --git a/plugins/altk_json_processor/README.md b/plugins/altk_json_processor/README.md
@@ -5,15 +5,16 @@
 
 Uses JSON Processor from ALTK to extract data from long JSON responses. See the [ALTK](https://altk.ai/) and the [JSON Processor component in the ALTK repo](https://github.com/AgentToolkit/agent-lifecycle-toolkit/tree/main/altk/post_tool/code_generation) for more details on how the component works.
 
+Note that this plugin will require calling an LLM and will therefore require configuring an LLM provider as described below. This plugin will also incure some cost in terms of time and money to do its LLM calls. This can be adjusted via the length threshold in the configuration, such that the plugin only activates and calls an LLM on JSON responses of a particular length (default: 100,000 characters).
+
 ## Hooks
 - `tool_post_invoke` - Detects long JSON responses and processes as necessary
 
 ## Installation
 
-1. Copy .env.example .env
-2. Enable plugins in `.env`
-3. Enable the "ALTKJsonProcessor" plugin in `plugins/config.yaml`.
-4. Install the optional dependency `altk` (i.e. `pip install mcp-context-forge[altk]`)
+1. Enable the "ALTKJsonProcessor" plugin in `plugins/config.yaml`.
+2. Install the optional dependency `altk` (i.e. `pip install mcp-context-forge[altk]`)
+3. Configure a LLM provider as described below.
 
 ## Configuration
 
@@ -29,7 +30,7 @@ Uses JSON Processor from ALTK to extract data from long JSON responses. See the
     config:
       jsonprocessor_query: ""
       llm_provider: "watsonx" # one of watsonx, ollama, openai, anthropic
-      watsonx:
+      watsonx: # each section of providers is optional
         wx_api_key: "" # optional, can define WX_API_KEY instead
         wx_project_id: "" # optional, can define WX_PROJECT_ID instead
         wx_url: "https://us-south.ml.cloud.ibm.com"
@@ -45,3 +46,22 @@ Uses JSON Processor from ALTK to extract data from long JSON responses. See the
 
 - `length_threshold` is the minimum number of characters before activating this component
 - `jsonprocessor_query` is a natural language statement of what the long response should be processed for. For an example of a long response for a musical artist: "get full metadata for all albums from the artist's discography in json format"
+
+### LLM Provider Configuration
+
+In the configuration, select an LLM Provider via `llm_provider`, the current options are WatsonX, Ollama, OpenAI, or Anthropic.
+Then fill out the corresponding provider section in the plugin config. For many of the api key-related fields, an environment variable
+can also be used instead. If the field is set in both the plugin config and in an environment variable, the plugin config takes priority.
+
+### JSON Processor Query
+
+To guide the JSON Processor, an optional but recommended `jsonprocessor_query` can be provided that is a natural language statement of what the long response should be processed for.
+
+Example queries:
+
+- For an API endpoint such as [this Spotify artist overview](https://rapidapi.com/DataFanatic/api/spotify-scraper/playground/apiendpoint_fd33b4eb-d258-437e-af85-c244904acefc) that returns a large response, if you only want the discography of the artist, use a query such as: "get full metadata for all albums from the artist's discography in json format"
+- For a shopping API endpoint that returns a [response like this](https://raw.githubusercontent.com/AgentToolkit/agent-lifecycle-toolkit/refs/heads/main/examples/codegen_long_response_example.json), if you only want the sizes of hte sneakers, use a query such as: "get the sizes for all products"
+
+## Testing
+
+Unit tests: `tests/unit/mcpgateway/plugins/plugins/altk_json_processor/test_json_processor.py`
diff --git a/plugins/altk_json_processor/json_processor.py b/plugins/altk_json_processor/json_processor.py
@@ -71,21 +71,24 @@ async def tool_post_invoke(self, payload: ToolPostInvokePayload, context: Plugin
             if len(self._cfg["watsonx"]["wx_api_key"]) > 0:
                 api_key = self._cfg["watsonx"]["wx_api_key"]
             else:
-                # Note that we assume here this env var exists and should throw an error if not
-                api_key = os.environ["WX_API_KEY"]
+                api_key = os.getenv("WX_API_KEY")
+                if not api_key:
+                    raise ValueError("WatsonX api key not found, provide WX_API_KEY either in the plugin config or as an env var.")
             if len(self._cfg["watsonx"]["wx_project_id"]) > 0:
                 project_id = self._cfg["watsonx"]["wx_project_id"]
             else:
-                # Note that we assume here this env var exists and should throw an error if not
-                project_id = os.environ["WX_PROJECT_ID"]
+                project_id = os.getenv("WX_PROJECT_ID")
+                if not project_id:
+                    raise ValueError("WatsonX project id not found, project WX_PROJECT_ID either in the plugin config or as an env var.")
             llm_client = watsonx_client(model_id=self._cfg["model_id"], api_key=api_key, project_id=project_id, url=self._cfg["watsonx"]["wx_url"])
         elif provider == "openai":
             openai_client = get_llm("openai.sync")
             if len(self._cfg["openai"]["api_key"]) > 0:
                 api_key = self._cfg["openai"]["api_key"]
             else:
-                # Note that we assume here this env var exists and should throw an error if not
-                api_key = os.environ["OPENAI_API_KEY"]
+                api_key = os.getenv("OPENAI_API_KEY")
+                if not api_key:
+                    raise ValueError("OpenAI api key not found, provide OPENAI_API_KEY either in the plugin config or as an env var.")
             llm_client = openai_client(api_key=api_key, model=self._cfg["model_id"])
         elif provider == "ollama":
             ollama_client = get_llm("litellm.ollama")
@@ -96,9 +99,13 @@ async def tool_post_invoke(self, payload: ToolPostInvokePayload, context: Plugin
             if len(self._cfg["anthropic"]["api_key"]) > 0:
                 api_key = self._cfg["anthropic"]["api_key"]
             else:
-                # Note that we assume here this env var exists and should throw an error if not
-                api_key = os.environ["ANTHROPIC_API_KEY"]
+                api_key = os.getenv("ANTHROPIC_API_KEY")
+                if not api_key:
+                    raise ValueError("Anthropic api key not found, provide ANTHROPIC_API_KEY either in the plugin config or as an env var.")
             llm_client = anthropic_client(model_name=model_path, api_key=api_key)
+        elif provider == "pytestmock":
+            # only meant to be used for unit tests
+            llm_client = None
         else:
             raise ValueError("Unknown provider given for 'llm_provider' in plugin config!")
 
@@ -111,21 +118,28 @@ async def tool_post_invoke(self, payload: ToolPostInvokePayload, context: Plugin
                 content = payload.result["content"][0]
                 if "type" in content and content["type"] == "text":
                     response_str = content["text"]
-                    try:
-                        response_json = json.loads(response_str)
-                    except json.decoder.JSONDecodeError:
-                        # ignore anything that's not json
-                        pass
 
-        if response_json and response_str and len(response_str) > self._cfg["length_threshold"]:
+                    if len(response_str) > self._cfg["length_threshold"]:
+                        try:
+                            response_json = json.loads(response_str)
+                        except json.decoder.JSONDecodeError:
+                            # ignore anything that's not json
+                            pass
+
+        # Should only get here if response is long enough and is valid JSON
+        if response_json:
             logger.info("Long JSON response detected, using ALTK JSON Processor...")
-            codegen = CodeGenerationComponent(config=config)
-            nl_query = self._cfg["jsonprocessor_query"]
-            input_data = CodeGenerationRunInput(messages=[], nl_query=nl_query, tool_response=response_json)
-            output = codegen.process(input_data, AgentPhase.RUNTIME)
-            output = cast(CodeGenerationRunOutput, output)
-            payload.result["content"][0]["text"] = output.result
-            logger.debug(f"ALTK processed response: {output.result}")
+            if provider == "pytestmock":
+                # only meant for unit testing
+                payload.result["content"][0]["text"] = "(filtered response)"
+            else:
+                codegen = CodeGenerationComponent(config=config)
+                nl_query = self._cfg.get("jsonprocessor_query", "")
+                input_data = CodeGenerationRunInput(messages=[], nl_query=nl_query, tool_response=response_json)
+                output = codegen.process(input_data, AgentPhase.RUNTIME)
+                output = cast(CodeGenerationRunOutput, output)
+                payload.result["content"][0]["text"] = output.result
+                logger.debug(f"ALTK processed response: {output.result}")
             return ToolPostInvokeResult(continue_processing=True, modified_payload=payload)
 
         return ToolPostInvokeResult(continue_processing=True)
diff --git a/plugins/altk_json_processor/plugin-manifest.yaml b/plugins/altk_json_processor/plugin-manifest.yaml
@@ -2,5 +2,6 @@ description: "Uses JSON Processor from ALTK to extract data from long JSON respo
 author: "Jason Tsay"
 version: "0.1.0"
 available_hooks:
-  - "tool_post_hook"
+  - "tool_post_invoke"
 default_configs:
+  length_threshold: 100000
diff --git a/plugins/config.yaml b/plugins/config.yaml
@@ -887,7 +887,7 @@ plugins:
     config:
       jsonprocessor_query: ""
       llm_provider: "watsonx" # one of watsonx, ollama, openai, anthropic
-      watsonx:
+      watsonx: # each section of providers is optional
         wx_api_key: "" # optional, can define WX_API_KEY instead
         wx_project_id: "" # optional, can define WX_PROJECT_ID instead
         wx_url: "https://us-south.ml.cloud.ibm.com"
diff --git a/pyproject.toml b/pyproject.toml
@@ -116,7 +116,7 @@ dev = [
     "pydocstyle>=6.3.0",
     "pylint>=3.3.9",
     "pylint-pydantic>=0.3.5",
-    #"pyre-check>=0.9.25", # incompatible with altk, superceded by pyrefly?
+    #"pyre-check>=0.9.25", # unused, conflicts with altk, superceded by pyrefly
     "pyrefly>=0.35.0",
     "pyright>=1.1.406",
     "pyroma>=5.0",
diff --git a/tests/unit/mcpgateway/plugins/plugins/altk_json_processor/test_json_processor.py b/tests/unit/mcpgateway/plugins/plugins/altk_json_processor/test_json_processor.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+"""Location: ./tests/unit/mcpgateway/plugins/plugins/altk_json_processor/test_json_processor.py
+Copyright 2025
+SPDX-License-Identifier: Apache-2.0
+Authors: Jason Tsay
+
+Tests for ALTKJsonProcessor.
+"""
+
+# Standard
+import json
+
+# Third-Party
+import pytest
+
+# First-Party
+from mcpgateway.plugins.framework.models import (
+    GlobalContext,
+    HookType,
+    PluginConfig,
+    PluginContext,
+    ToolPostInvokePayload,
+)
+
+try:
+    # ALTK may not be available due to being an optional dependency, skip if not available
+    # First-Party
+    from plugins.altk_json_processor.json_processor import ALTKJsonProcessor
+except ModuleNotFoundError:
+    pytest.mark.skip(reason="altk not available")
+
+
+@pytest.mark.asyncio
+async def test_threshold():
+    plugin = ALTKJsonProcessor(  # type: ignore
+        PluginConfig(
+            name="jsonprocessor", kind="plugins.altk_json_processor.json_processor.ALTKJsonProcessor", hooks=[HookType.TOOL_POST_INVOKE], config={"llm_provider": "pytestmock", "length_threshold": 50}
+        )
+    )
+    ctx = PluginContext(global_context=GlobalContext(request_id="r1"))
+    # below threshold, so the plugin should not activate
+    too_short = {"a": "1", "b": "2"}
+    too_short_payload = {"content": [{"type": "text", "text": json.dumps(too_short)}]}
+    res = await plugin.tool_post_invoke(ToolPostInvokePayload(name="x1", result=too_short_payload), ctx)
+    assert res.modified_payload is None
+    long_enough = {
+        "a": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
+        "b": "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.",
+        "c": "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.",
+        "d": "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
+    }
+    # above threshold, so the plugin should activate
+    long_enough_payload = {"content": [{"type": "text", "text": json.dumps(long_enough)}]}
+    res = await plugin.tool_post_invoke(ToolPostInvokePayload(name="x2", result=long_enough_payload), ctx)
+    assert res.modified_payload is not None
+    assert res.modified_payload.result["content"][0]["text"] == "(filtered response)"