-
Notifications
You must be signed in to change notification settings - Fork 36
feat: support model-service proxy #166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,10 +1,127 @@ | ||
| from typing import Any | ||
|
|
||
| from fastapi import APIRouter, Request | ||
| import httpx | ||
| from fastapi import APIRouter, HTTPException, Request | ||
| from fastapi.responses import JSONResponse, StreamingResponse | ||
|
|
||
| from rock.logger import init_logger | ||
| from rock.sdk.model.server.config import PROXY_TARGET_URL | ||
|
|
||
| logger = init_logger(__name__) | ||
|
|
||
| proxy_router = APIRouter() | ||
|
|
||
|
|
||
| async def forward_non_streaming_request( | ||
| body: dict[str, Any], headers: dict[str, str], target_url: str | ||
| ) -> tuple[Any, int]: | ||
| """Forward non-streaming request to target API""" | ||
| async with httpx.AsyncClient() as client: | ||
| try: | ||
| logger.info(f"Forwarding non-streaming request body: {body}") | ||
| logger.info( | ||
| f"Forwarding headers: {['Authorization' if k.lower() == 'authorization' else k for k in headers.keys()] if headers else 'No headers'}" | ||
| ) | ||
|
|
||
| # Use provided headers to forward the request | ||
| response = await client.post( | ||
| target_url, | ||
| json=body, | ||
| headers=headers, | ||
| timeout=120.0, # Set timeout to 60 seconds | ||
| ) | ||
|
|
||
| logger.info(f"Target API non-streaming response status: {response.status_code}") | ||
|
|
||
| # Try to parse the response as JSON | ||
| try: | ||
| response_data = response.json() | ||
| logger.info(f"Target API non-streaming response data: {response_data}") | ||
| return response_data, response.status_code | ||
| except Exception: | ||
| # If response is not JSON, return as text | ||
| response_text = response.text | ||
| logger.info(f"Target API non-streaming response text: {response_text}") | ||
| return response_text, response.status_code | ||
|
|
||
| except httpx.TimeoutException: | ||
| logger.error("Request to target API timed out") | ||
| raise HTTPException(status_code=504, detail="Request to target API timed out") | ||
| except httpx.RequestError as e: | ||
| logger.error(f"Error making non-streaming request to target API: {str(e)}") | ||
| raise HTTPException(status_code=502, detail=f"Error contacting target API: {str(e)}") | ||
| except Exception as e: | ||
| logger.error(f"Unknown error making non-streaming request to target API: {str(e)}") | ||
| raise HTTPException(status_code=500, detail=f"Internal proxy error: {str(e)}") | ||
|
|
||
|
|
||
| async def forward_streaming_request( | ||
| body: dict[str, Any], headers: dict[str, str], target_url: str | ||
| ) -> StreamingResponse: | ||
| """Forward streaming request to target API""" | ||
| async with httpx.AsyncClient() as client: | ||
| try: | ||
| logger.info(f"Forwarding streaming request body: {body}") | ||
| logger.info( | ||
| f"Forwarding headers: {['Authorization' if k.lower() == 'authorization' else k for k in headers.keys()] if headers else 'No headers'}" | ||
| ) | ||
|
|
||
| # Use provided headers to forward the request | ||
| response = await client.post( | ||
| target_url, | ||
| json=body, | ||
| headers=headers, | ||
| timeout=120.0, # Set timeout to 60 seconds | ||
| ) | ||
|
|
||
| logger.info(f"Target API streaming response status: {response.status_code}") | ||
|
|
||
| # Handle streaming response | ||
| content_type = response.headers.get("content-type", "") | ||
|
|
||
| async def generate(): | ||
| # Stream response data in chunks | ||
| async for chunk in response.aiter_bytes(): | ||
| yield chunk | ||
|
|
||
| return StreamingResponse(generate(), media_type=content_type) | ||
|
|
||
| except httpx.TimeoutException: | ||
| logger.error("Request to target API timed out") | ||
| raise HTTPException(status_code=504, detail="Request to target API timed out") | ||
| except httpx.RequestError as e: | ||
| logger.error(f"Error making streaming request to target API: {str(e)}") | ||
| raise HTTPException(status_code=502, detail=f"Error contacting target API: {str(e)}") | ||
| except Exception as e: | ||
| logger.error(f"Unknown error making streaming request to target API: {str(e)}") | ||
| raise HTTPException(status_code=500, detail=f"Internal proxy error: {str(e)}") | ||
|
|
||
|
|
||
| @proxy_router.post("/v1/chat/completions") | ||
| async def chat_completions(body: dict[str, Any], request: Request): | ||
| raise NotImplementedError("Proxy chat completions not implemented yet") | ||
| # Build forwarded headers while preserving original request headers | ||
| forwarded_headers = {} | ||
| for key, value in request.headers.items(): | ||
| # Copy all headers, but skip certain headers that httpx should set automatically | ||
| if key.lower() in ["content-length", "content-type", "host", "transfer-encoding"]: | ||
| continue # Let httpx set these headers | ||
| forwarded_headers[key] = value | ||
|
|
||
| logger.info(f"Received request at proxy endpoint with body: {body}") | ||
|
|
||
| # Determine target URL | ||
| target_url = PROXY_TARGET_URL | ||
|
|
||
| # Choose handler based on stream parameter | ||
| if body.get("stream", False): | ||
| # Forward streaming request | ||
| result = await forward_streaming_request(body, forwarded_headers, target_url) | ||
| return result | ||
| else: | ||
| # Forward non-streaming request | ||
| response_data, status_code = await forward_non_streaming_request(body, forwarded_headers, target_url) | ||
|
|
||
| if status_code == 200: | ||
| return response_data | ||
| else: | ||
| return JSONResponse(content=response_data, status_code=status_code) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里至少得是个map<model_name, base_url>。一个agent可能得调用多个模型,不同的模型可能是不同的服务商。