|
6 | 6 | from pathlib import Path |
7 | 7 | from typing import Any, Dict, List, Optional, cast |
8 | 8 |
|
9 | | -from fastapi import APIRouter, Depends, File, HTTPException, UploadFile |
10 | | -from pydantic import BaseModel |
| 9 | +from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, Body |
| 10 | +from pydantic import BaseModel, Field |
11 | 11 |
|
12 | 12 | from litellm._logging import verbose_proxy_logger |
13 | 13 | from litellm.proxy._types import CommonProxyErrors, LitellmUserRoles, UserAPIKeyAuth |
@@ -679,3 +679,185 @@ async def convert_prompt_file_to_json( |
679 | 679 | except OSError: |
680 | 680 | pass # Directory not empty or other error |
681 | 681 |
|
| 682 | +class PromptCompletionRequest(BaseModel): |
| 683 | + prompt_id: str = Field(..., description="Unique ID of the prompt registered in PromptHub.") |
| 684 | + prompt_version: Optional[str] = Field(None, description="Optional version identifier.") |
| 685 | + prompt_variables: Dict[str, Any] = Field(default_factory=dict, description="Key-value mapping for template variables.") |
| 686 | + |
| 687 | + |
| 688 | +class PromptCompletionResponse(BaseModel): |
| 689 | + prompt_id: str |
| 690 | + prompt_version: Optional[str] |
| 691 | + model: str |
| 692 | + metadata: Dict[str, Any] |
| 693 | + variables: Dict[str, Any] |
| 694 | + completion_text: str |
| 695 | + raw_response: Dict[str, Any] |
| 696 | + |
| 697 | + |
| 698 | +@router.post( |
| 699 | + "/prompts/completions", |
| 700 | + tags=["Prompt Completions"], |
| 701 | + dependencies=[Depends(user_api_key_auth)], |
| 702 | +) |
| 703 | +async def generate_completion_from_prompt_id( |
| 704 | + request: PromptCompletionRequest = Body(...), |
| 705 | + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), |
| 706 | +): |
| 707 | + """ |
| 708 | + Generate a model completion using a managed prompt. |
| 709 | +
|
| 710 | + Parameter merge priority: |
| 711 | + 1. Prompt metadata/config (base defaults) |
| 712 | + 2. Prompt-level litellm_params overrides |
| 713 | + 3. User-supplied request.extra_params (highest precedence) |
| 714 | + """ |
| 715 | + |
| 716 | + import litellm |
| 717 | + from litellm.proxy.prompts.prompt_registry import PROMPT_HUB |
| 718 | + from litellm.integrations.custom_prompt_management import CustomPromptManagement |
| 719 | + from litellm.integrations.gitlab import GitLabPromptManager |
| 720 | + from litellm.integrations.dotprompt import DotpromptManager |
| 721 | + from litellm.proxy._types import LitellmUserRoles |
| 722 | + |
| 723 | + prompt_id = request.prompt_id |
| 724 | + variables = request.prompt_variables or {} |
| 725 | + |
| 726 | + # ------------------------------------------------------------ |
| 727 | + # Step 1: Access validation |
| 728 | + # ------------------------------------------------------------ |
| 729 | + prompts: Optional[List[str]] = None |
| 730 | + if user_api_key_dict.metadata is not None: |
| 731 | + prompts = cast(Optional[List[str]], user_api_key_dict.metadata.get("prompts", None)) |
| 732 | + if prompts is not None and prompt_id not in prompts: |
| 733 | + raise HTTPException(status_code=400, detail=f"Prompt {prompt_id} not found") |
| 734 | + |
| 735 | + if user_api_key_dict.user_role not in ( |
| 736 | + LitellmUserRoles.PROXY_ADMIN, |
| 737 | + LitellmUserRoles.PROXY_ADMIN.value, |
| 738 | + ): |
| 739 | + raise HTTPException( |
| 740 | + status_code=403, |
| 741 | + detail=f"You are not authorized to access this prompt. Your role - {user_api_key_dict.user_role}, Your key's prompts - {prompts}", |
| 742 | + ) |
| 743 | + |
| 744 | + # ------------------------------------------------------------ |
| 745 | + # Step 2: Load prompt and callback |
| 746 | + # ------------------------------------------------------------ |
| 747 | + prompt_spec = PROMPT_HUB.get_prompt_by_id(prompt_id) |
| 748 | + if prompt_spec is None: |
| 749 | + raise HTTPException(status_code=404, detail=f"Prompt {prompt_id} not found") |
| 750 | + |
| 751 | + prompt_callback: Optional[CustomPromptManagement] = PROMPT_HUB.get_prompt_callback_by_id(prompt_id) |
| 752 | + if prompt_callback is None: |
| 753 | + raise HTTPException(status_code=404, detail=f"No callback found for prompt {prompt_id}") |
| 754 | + |
| 755 | + prompt_template: Optional[PromptTemplateBase] = None |
| 756 | + |
| 757 | + if isinstance(prompt_callback, DotpromptManager): |
| 758 | + template = prompt_callback.prompt_manager.get_all_prompts_as_json() |
| 759 | + if template and len(template) == 1: |
| 760 | + tid = list(template.keys())[0] |
| 761 | + prompt_template = PromptTemplateBase( |
| 762 | + litellm_prompt_id=tid, |
| 763 | + content=template[tid]["content"], |
| 764 | + metadata=template[tid]["metadata"], |
| 765 | + ) |
| 766 | + |
| 767 | + elif isinstance(prompt_callback, GitLabPromptManager): |
| 768 | + prompt_json = prompt_spec.model_dump() |
| 769 | + prompt_template = PromptTemplateBase( |
| 770 | + litellm_prompt_id=prompt_json.get("prompt_id", ""), |
| 771 | + content=prompt_json.get("litellm_params", {}).get("model_config", {}).get("content", ""), |
| 772 | + metadata=prompt_json.get("litellm_params", {}).get("model_config", {}).get("metadata", {}), |
| 773 | + ) |
| 774 | + |
| 775 | + if not prompt_template: |
| 776 | + raise HTTPException(status_code=400, detail=f"Could not load prompt template for {prompt_id}") |
| 777 | + |
| 778 | + # ------------------------------------------------------------ |
| 779 | + # Step 3: Fill in template variables |
| 780 | + # ------------------------------------------------------------ |
| 781 | + try: |
| 782 | + filled_prompt = prompt_template.content.format(**variables) |
| 783 | + except KeyError as e: |
| 784 | + raise HTTPException(status_code=400, detail=f"Missing variable: {str(e)}") |
| 785 | + |
| 786 | + metadata = prompt_template.metadata or {} |
| 787 | + model = metadata.get("model") |
| 788 | + if not model: |
| 789 | + raise HTTPException(status_code=400, detail=f"Model not specified in metadata for {prompt_id}") |
| 790 | + |
| 791 | + # ------------------------------------------------------------ |
| 792 | + # Step 4: Build messages using prompt callback |
| 793 | + # ------------------------------------------------------------ |
| 794 | + system_prompt = metadata.get("config", {}).get("system_prompt", "You are a helpful assistant.") |
| 795 | + |
| 796 | + completion_prompt = prompt_callback.get_chat_completion_prompt( |
| 797 | + model=model, |
| 798 | + messages=[{"role": "system", "content": system_prompt}], |
| 799 | + non_default_params=metadata, |
| 800 | + prompt_id=prompt_id, |
| 801 | + prompt_variables=variables, |
| 802 | + dynamic_callback_params={}, |
| 803 | + prompt_label=None, |
| 804 | + prompt_version=request.prompt_version, |
| 805 | + ) |
| 806 | + |
| 807 | + # ------------------------------------------------------------ |
| 808 | + # Step 5: Merge parameters from multiple sources |
| 809 | + # ------------------------------------------------------------ |
| 810 | + base_params = metadata.get("config", {}) or {} |
| 811 | + prompt_params = ( |
| 812 | + prompt_spec.litellm_params.get("config", {}) |
| 813 | + if hasattr(prompt_spec, "litellm_params") and isinstance(prompt_spec.litellm_params, dict) |
| 814 | + else {} |
| 815 | + ) |
| 816 | + user_overrides = getattr(request, "extra_body", {}) or {} |
| 817 | + |
| 818 | + # Flatten nested "config" keys that sometimes leak through metadata |
| 819 | + def flatten_config(d: dict) -> dict: |
| 820 | + if "config" in d and isinstance(d["config"], dict): |
| 821 | + flattened = {**d, **d["config"]} |
| 822 | + flattened.pop("config", None) |
| 823 | + return flattened |
| 824 | + return d |
| 825 | + |
| 826 | + base_params = flatten_config(base_params) |
| 827 | + prompt_params = flatten_config(prompt_params) |
| 828 | + user_overrides = flatten_config(user_overrides) |
| 829 | + |
| 830 | + # Merge priority: base < prompt-level < user overrides |
| 831 | + merged_params = {**base_params, **prompt_params, **user_overrides} |
| 832 | + merged_params.setdefault("stream", False) |
| 833 | + merged_params["user"] = user_api_key_dict.user_id |
| 834 | + merged_params.pop("model", None) |
| 835 | + merged_params.pop("messages", None) |
| 836 | + # ------------------------------------------------------------ |
| 837 | + # Step 6: Invoke model |
| 838 | + # ------------------------------------------------------------ |
| 839 | + try: |
| 840 | + response = await litellm.acompletion( |
| 841 | + model=completion_prompt[0], |
| 842 | + messages=completion_prompt[1], |
| 843 | + **merged_params, |
| 844 | + ) |
| 845 | + except Exception as e: |
| 846 | + raise HTTPException(status_code=500, detail=f"Error invoking model: {str(e)}") |
| 847 | + |
| 848 | + # ------------------------------------------------------------ |
| 849 | + # Step 7: Extract text & return structured response |
| 850 | + # ------------------------------------------------------------ |
| 851 | + completion_text = ( |
| 852 | + response.get("choices", [{}])[0].get("message", {}).get("content", "") |
| 853 | + ) |
| 854 | + |
| 855 | + return PromptCompletionResponse( |
| 856 | + prompt_id=prompt_id, |
| 857 | + prompt_version=request.prompt_version, |
| 858 | + model=model, |
| 859 | + metadata=metadata, |
| 860 | + variables=variables, |
| 861 | + completion_text=completion_text, |
| 862 | + raw_response=response.model_dump() if hasattr(response, "model_dump") else response, |
| 863 | + ) |
0 commit comments