BerriAI
diff --git a/‎litellm/integrations/gitlab/gitlab_prompt_manager.py‎
Lines changed: 47 additions & 11 deletions b/‎litellm/integrations/gitlab/gitlab_prompt_manager.py‎
Lines changed: 47 additions & 11 deletions
diff --git a/‎litellm/proxy/prompts/prompt_endpoints.py‎
Lines changed: 184 additions & 2 deletions b/‎litellm/proxy/prompts/prompt_endpoints.py‎
Lines changed: 184 additions & 2 deletions
@@ -12,10 +12,25 @@
 )
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import StandardCallbackDynamicParams
-
+from litellm._logging import verbose_proxy_logger
 from litellm.integrations.gitlab.gitlab_client import GitLabClient
 
 
+GITLAB_PREFIX = "gitlab::"
+
+def encode_prompt_id(raw_id: str) -> str:
+    """Convert GitLab path IDs like 'invoice/extract' → 'gitlab::invoice::extract'"""
+    if raw_id.startswith(GITLAB_PREFIX):
+        return raw_id  # already encoded
+    return f"{GITLAB_PREFIX}{raw_id.replace('/', '::')}"
+
+def decode_prompt_id(encoded_id: str) -> str:
+    """Convert 'gitlab::invoice::extract' → 'invoice/extract'"""
+    if not encoded_id.startswith(GITLAB_PREFIX):
+        return encoded_id
+    return encoded_id[len(GITLAB_PREFIX):].replace("::", "/")
+
+
 class GitLabPromptTemplate:
     def __init__(
             self,
@@ -87,6 +102,7 @@ def __init__(
 
     def _id_to_repo_path(self, prompt_id: str) -> str:
         """Map a prompt_id to a repo path (respects prompts_path and adds .prompt)."""
+        prompt_id = decode_prompt_id(prompt_id)
         if self.prompts_path:
             return f"{self.prompts_path}/{prompt_id}.prompt"
         return f"{prompt_id}.prompt"
@@ -101,26 +117,27 @@ def _repo_path_to_id(self, repo_path: str) -> str:
             path = path[len(self.prompts_path.strip("/")) + 1 :]
         if path.endswith(".prompt"):
             path = path[: -len(".prompt")]
-        return path
+        return encode_prompt_id(path)
 
     # ---------- loading ----------
 
     def _load_prompt_from_gitlab(self, prompt_id: str, *, ref: Optional[str] = None) -> None:
         """Load a specific .prompt file from GitLab (scoped under prompts_path if set)."""
         try:
+            # prompt_id = decode_prompt_id(prompt_id)
             file_path = self._id_to_repo_path(prompt_id)
             prompt_content = self.gitlab_client.get_file_content(file_path, ref=ref)
             if prompt_content:
                 template = self._parse_prompt_file(prompt_content, prompt_id)
                 self.prompts[prompt_id] = template
         except Exception as e:
-            raise Exception(f"Failed to load prompt '{prompt_id}' from GitLab: {e}")
+            raise Exception(f"Failed to load prompt '{encode_prompt_id(prompt_id)}' from GitLab: {e}")
 
     def load_all_prompts(self, *, recursive: bool = True) -> List[str]:
         """
         Eagerly load all .prompt files from prompts_path. Returns loaded IDs.
         """
-        files = self.list_templates(recursive=recursive)  # reuse logic
+        files = self.list_templates(recursive=recursive)
         loaded: List[str] = []
         for pid in files:
             if pid not in self.prompts:
@@ -195,9 +212,6 @@ def get_template(self, template_id: str) -> Optional[GitLabPromptTemplate]:
         return self.prompts.get(template_id)
 
     def list_templates(self, *, recursive: bool = True) -> List[str]:
-        """
-        List available prompt IDs discovered under prompts_path (no extension, relative to prompts_path).
-        """
         """
         List available prompt IDs under prompts_path (no extension).
         Compatible with both list_files signatures:
@@ -438,13 +452,20 @@ def _compile_prompt_helper(
             prompt_version: Optional[int] = None,
     ) -> PromptManagementClient:
         try:
-            if prompt_id not in self.prompt_manager.prompts:
+            verbose_proxy_logger.debug(f"GitLabPromptManager._compile_prompt_helper called with "
+                                       f"prompt_id={prompt_id}, prompt_variables={prompt_variables}, ")
+            decoded_id = decode_prompt_id(prompt_id)
+            verbose_proxy_logger.debug(f"Decoded prompt_id: {decoded_id}")
+            if decoded_id not in self.prompt_manager.prompts:
                 git_ref = getattr(dynamic_callback_params, "extra", {}).get("git_ref") if hasattr(dynamic_callback_params, "extra") else None
-                self.prompt_manager._load_prompt_from_gitlab(prompt_id, ref=git_ref)
+                self.prompt_manager._load_prompt_from_gitlab(decoded_id, ref=git_ref)
+
 
             rendered_prompt, prompt_metadata = self.get_prompt_template(
                 prompt_id, prompt_variables
             )
+            verbose_proxy_logger.debug(f"Rendered prompt: {rendered_prompt}")
+            verbose_proxy_logger.debug(f"Prompt metadata: {prompt_metadata}")
 
             messages = self._parse_prompt_to_messages(rendered_prompt)
             template_model = prompt_metadata.get("model")
@@ -475,6 +496,11 @@ def get_chat_completion_prompt(
             prompt_label: Optional[str] = None,
             prompt_version: Optional[int] = None,
     ) -> Tuple[str, List[AllMessageValues], dict]:
+        verbose_proxy_logger.debug(f"GitLabPromptManager.get_chat_completion_prompt "
+                                   f"called with prompt_id={prompt_id},"
+                                   f" prompt_variables={prompt_variables}, "
+                                   f"dynamic_callback_params={dynamic_callback_params},"
+                                   f" prompt_label={prompt_label}, prompt_version={prompt_version}")
         return PromptManagementBase.get_chat_completion_prompt(
             self,
             model,
@@ -568,7 +594,10 @@ def load_all(self, *, recursive: bool = True) -> Dict[str, Dict[str, Any]]:
             entry = self._template_to_json(pid, tmpl)
 
             self._by_file[file_path] = entry
-            self._by_id[pid] = entry
+            # prefixed_id = pid if pid.startswith("gitlab::") else f"gitlab::{pid}"
+            encoded_id = encode_prompt_id(pid)
+            self._by_id[encoded_id] = entry
+            # self._by_id[pid] = entry
 
         return self._by_id
 
@@ -592,7 +621,14 @@ def get_by_file(self, file_path: str) -> Optional[Dict[str, Any]]:
 
     def get_by_id(self, prompt_id: str) -> Optional[Dict[str, Any]]:
         """Get a cached prompt JSON by prompt ID (relative to prompts_path)."""
-        return self._by_id.get(prompt_id)
+        if prompt_id in self._by_id:
+            return self._by_id[prompt_id]
+
+        # Try normalized forms
+        decoded = decode_prompt_id(prompt_id)
+        encoded = encode_prompt_id(decoded)
+
+        return self._by_id.get(encoded) or self._by_id.get(decoded)
 
     # -------------------------
     # Internals
 
@@ -6,8 +6,8 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, cast
 
-from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
-from pydantic import BaseModel
+from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, Body
+from pydantic import BaseModel, Field
 
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import CommonProxyErrors, LitellmUserRoles, UserAPIKeyAuth
@@ -679,3 +679,185 @@ async def convert_prompt_file_to_json(
             except OSError:
                 pass  # Directory not empty or other error
 
+class PromptCompletionRequest(BaseModel):
+    prompt_id: str = Field(..., description="Unique ID of the prompt registered in PromptHub.")
+    prompt_version: Optional[str] = Field(None, description="Optional version identifier.")
+    prompt_variables: Dict[str, Any] = Field(default_factory=dict, description="Key-value mapping for template variables.")
+
+
+class PromptCompletionResponse(BaseModel):
+    prompt_id: str
+    prompt_version: Optional[str]
+    model: str
+    metadata: Dict[str, Any]
+    variables: Dict[str, Any]
+    completion_text: str
+    raw_response: Dict[str, Any]
+
+
+@router.post(
+    "/prompts/completions",
+    tags=["Prompt Completions"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def generate_completion_from_prompt_id(
+        request: PromptCompletionRequest = Body(...),
+        user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Generate a model completion using a managed prompt.
+
+    Parameter merge priority:
+    1. Prompt metadata/config (base defaults)
+    2. Prompt-level litellm_params overrides
+    3. User-supplied request.extra_params (highest precedence)
+    """
+
+    import litellm
+    from litellm.proxy.prompts.prompt_registry import PROMPT_HUB
+    from litellm.integrations.custom_prompt_management import CustomPromptManagement
+    from litellm.integrations.gitlab import GitLabPromptManager
+    from litellm.integrations.dotprompt import DotpromptManager
+    from litellm.proxy._types import LitellmUserRoles
+
+    prompt_id = request.prompt_id
+    variables = request.prompt_variables or {}
+
+    # ------------------------------------------------------------
+    # Step 1: Access validation
+    # ------------------------------------------------------------
+    prompts: Optional[List[str]] = None
+    if user_api_key_dict.metadata is not None:
+        prompts = cast(Optional[List[str]], user_api_key_dict.metadata.get("prompts", None))
+        if prompts is not None and prompt_id not in prompts:
+            raise HTTPException(status_code=400, detail=f"Prompt {prompt_id} not found")
+
+    if user_api_key_dict.user_role not in (
+            LitellmUserRoles.PROXY_ADMIN,
+            LitellmUserRoles.PROXY_ADMIN.value,
+    ):
+        raise HTTPException(
+            status_code=403,
+            detail=f"You are not authorized to access this prompt. Your role - {user_api_key_dict.user_role}, Your key's prompts - {prompts}",
+        )
+
+    # ------------------------------------------------------------
+    # Step 2: Load prompt and callback
+    # ------------------------------------------------------------
+    prompt_spec = PROMPT_HUB.get_prompt_by_id(prompt_id)
+    if prompt_spec is None:
+        raise HTTPException(status_code=404, detail=f"Prompt {prompt_id} not found")
+
+    prompt_callback: Optional[CustomPromptManagement] = PROMPT_HUB.get_prompt_callback_by_id(prompt_id)
+    if prompt_callback is None:
+        raise HTTPException(status_code=404, detail=f"No callback found for prompt {prompt_id}")
+
+    prompt_template: Optional[PromptTemplateBase] = None
+
+    if isinstance(prompt_callback, DotpromptManager):
+        template = prompt_callback.prompt_manager.get_all_prompts_as_json()
+        if template and len(template) == 1:
+            tid = list(template.keys())[0]
+            prompt_template = PromptTemplateBase(
+                litellm_prompt_id=tid,
+                content=template[tid]["content"],
+                metadata=template[tid]["metadata"],
+            )
+
+    elif isinstance(prompt_callback, GitLabPromptManager):
+        prompt_json = prompt_spec.model_dump()
+        prompt_template = PromptTemplateBase(
+            litellm_prompt_id=prompt_json.get("prompt_id", ""),
+            content=prompt_json.get("litellm_params", {}).get("model_config", {}).get("content", ""),
+            metadata=prompt_json.get("litellm_params", {}).get("model_config", {}).get("metadata", {}),
+        )
+
+    if not prompt_template:
+        raise HTTPException(status_code=400, detail=f"Could not load prompt template for {prompt_id}")
+
+    # ------------------------------------------------------------
+    # Step 3: Fill in template variables
+    # ------------------------------------------------------------
+    try:
+        filled_prompt = prompt_template.content.format(**variables)
+    except KeyError as e:
+        raise HTTPException(status_code=400, detail=f"Missing variable: {str(e)}")
+
+    metadata = prompt_template.metadata or {}
+    model = metadata.get("model")
+    if not model:
+        raise HTTPException(status_code=400, detail=f"Model not specified in metadata for {prompt_id}")
+
+    # ------------------------------------------------------------
+    # Step 4: Build messages using prompt callback
+    # ------------------------------------------------------------
+    system_prompt = metadata.get("config", {}).get("system_prompt", "You are a helpful assistant.")
+
+    completion_prompt = prompt_callback.get_chat_completion_prompt(
+        model=model,
+        messages=[{"role": "system", "content": system_prompt}],
+        non_default_params=metadata,
+        prompt_id=prompt_id,
+        prompt_variables=variables,
+        dynamic_callback_params={},
+        prompt_label=None,
+        prompt_version=request.prompt_version,
+    )
+
+    # ------------------------------------------------------------
+    # Step 5: Merge parameters from multiple sources
+    # ------------------------------------------------------------
+    base_params = metadata.get("config", {}) or {}
+    prompt_params = (
+        prompt_spec.litellm_params.get("config", {})
+        if hasattr(prompt_spec, "litellm_params") and isinstance(prompt_spec.litellm_params, dict)
+        else {}
+    )
+    user_overrides = getattr(request, "extra_body", {}) or {}
+
+    # Flatten nested "config" keys that sometimes leak through metadata
+    def flatten_config(d: dict) -> dict:
+        if "config" in d and isinstance(d["config"], dict):
+            flattened = {**d, **d["config"]}
+            flattened.pop("config", None)
+            return flattened
+        return d
+
+    base_params = flatten_config(base_params)
+    prompt_params = flatten_config(prompt_params)
+    user_overrides = flatten_config(user_overrides)
+
+    # Merge priority: base < prompt-level < user overrides
+    merged_params = {**base_params, **prompt_params, **user_overrides}
+    merged_params.setdefault("stream", False)
+    merged_params["user"] = user_api_key_dict.user_id
+    merged_params.pop("model", None)
+    merged_params.pop("messages", None)
+    # ------------------------------------------------------------
+    # Step 6: Invoke model
+    # ------------------------------------------------------------
+    try:
+        response = await litellm.acompletion(
+            model=completion_prompt[0],
+            messages=completion_prompt[1],
+            **merged_params,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error invoking model: {str(e)}")
+
+    # ------------------------------------------------------------
+    # Step 7: Extract text & return structured response
+    # ------------------------------------------------------------
+    completion_text = (
+        response.get("choices", [{}])[0].get("message", {}).get("content", "")
+    )
+
+    return PromptCompletionResponse(
+        prompt_id=prompt_id,
+        prompt_version=request.prompt_version,
+        model=model,
+        metadata=metadata,
+        variables=variables,
+        completion_text=completion_text,
+        raw_response=response.model_dump() if hasattr(response, "model_dump") else response,
+    )