Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion api/app/core/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,35 @@ def get_model_params(cls, config: RedBearModelConfig) -> Dict[str, Any]:
**config.extra_params
}

if provider == ModelProvider.MINIMAX:
# MiniMax 使用 OpenAI 兼容模式,需要设置默认 base_url 和温度钳制
import httpx
if not config.base_url:
config.base_url = "https://api.minimax.io/v1"
timeout_config = httpx.Timeout(
timeout=config.timeout,
connect=60.0,
read=config.timeout,
write=60.0,
pool=10.0,
)
# MiniMax 温度范围为 (0.0, 1.0],需要钳制
extra = dict(config.extra_params)
if "temperature" in extra:
temp = extra["temperature"]
if temp <= 0:
extra["temperature"] = 0.01
elif temp > 1.0:
extra["temperature"] = 1.0
return {
"model": config.model_name,
"base_url": config.base_url,
"api_key": config.api_key,
"timeout": timeout_config,
"max_retries": config.max_retries,
**extra
}

if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.OLLAMA]:
# 使用 httpx.Timeout 对象来设置详细的超时配置
# 这样可以分别控制连接超时和读取超时
Expand Down Expand Up @@ -165,6 +194,9 @@ def get_provider_llm_class(config: RedBearModelConfig, type: ModelType = ModelTy
return OpenAI
elif type == ModelType.CHAT:
return ChatOpenAI
elif provider == ModelProvider.MINIMAX:
# MiniMax 使用 OpenAI 兼容 API,始终返回 ChatOpenAI
return ChatOpenAI
elif provider == ModelProvider.DASHSCOPE:
return ChatTongyi
elif provider == ModelProvider.OLLAMA:
Expand All @@ -178,7 +210,7 @@ def get_provider_llm_class(config: RedBearModelConfig, type: ModelType = ModelTy
def get_provider_embedding_class(provider: str) -> type[Embeddings]:
"""根据模型提供商获取对应的模型类"""
provider = provider.lower()
if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK]:
if provider in [ModelProvider.OPENAI, ModelProvider.XINFERENCE, ModelProvider.GPUSTACK, ModelProvider.MINIMAX]:
from langchain_openai import OpenAIEmbeddings
return OpenAIEmbeddings
elif provider == ModelProvider.DASHSCOPE:
Expand Down
1 change: 1 addition & 0 deletions api/app/models/models_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class ModelProvider(StrEnum):
# ZHIPU = "zhipu"
# MOONSHOT = "moonshot"
# DEEPSEEK = "deepseek"
MINIMAX = "minimax"
OLLAMA = "ollama"
XINFERENCE = "xinference"
GPUSTACK = "gpustack"
Expand Down
107 changes: 91 additions & 16 deletions api/app/services/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,55 +226,55 @@ async def chat(self, prompt: str, **kwargs) -> str:

class MockLLMClient(BaseLLMClient):
"""模拟 LLM 客户端(用于测试)"""

def __init__(self):
"""初始化模拟客户端"""
self.call_count = 0

async def chat(self, prompt: str, **kwargs) -> str:
"""发送聊天请求(返回模拟结果)"""
self.call_count += 1

logger.info(f"模拟 LLM 调用 (第 {self.call_count} 次)")

# 简单的规则匹配
prompt_lower = prompt.lower()

if "数学" in prompt_lower or "方程" in prompt_lower or "计算" in prompt_lower:
return json.dumps({
"agent_id": "math-agent",
"confidence": 0.9,
"reason": "消息包含数学相关内容"
}, ensure_ascii=False)

elif "化学" in prompt_lower or "反应" in prompt_lower or "元素" in prompt_lower:
return json.dumps({
"agent_id": "chemistry-agent",
"confidence": 0.85,
"reason": "消息包含化学相关内容"
}, ensure_ascii=False)

elif "物理" in prompt_lower or "力" in prompt_lower or "速度" in prompt_lower:
return json.dumps({
"agent_id": "physics-agent",
"confidence": 0.88,
"reason": "消息包含物理相关内容"
}, ensure_ascii=False)

elif "语文" in prompt_lower or "古诗" in prompt_lower or "作文" in prompt_lower:
return json.dumps({
"agent_id": "chinese-agent",
"confidence": 0.87,
"reason": "消息包含语文相关内容"
}, ensure_ascii=False)

elif "英语" in prompt_lower or "单词" in prompt_lower or "语法" in prompt_lower:
return json.dumps({
"agent_id": "english-agent",
"confidence": 0.86,
"reason": "消息包含英语相关内容"
}, ensure_ascii=False)

else:
return json.dumps({
"agent_id": "math-agent",
Expand All @@ -283,6 +283,78 @@ async def chat(self, prompt: str, **kwargs) -> str:
}, ensure_ascii=False)


class MiniMaxClient(BaseLLMClient):
"""MiniMax LLM 客户端(通过 OpenAI 兼容 API)"""

def __init__(
self,
api_key: Optional[str] = None,
model: str = "MiniMax-M2.7",
base_url: str = "https://api.minimax.io/v1"
):
"""初始化 MiniMax 客户端

Args:
api_key: API 密钥
model: 模型名称 (MiniMax-M2.7, MiniMax-M2.7-highspeed)
base_url: API 基础 URL
"""
self.api_key = api_key or os.getenv("MINIMAX_API_KEY")
self.model = model
self.base_url = base_url

if not self.api_key:
raise ValueError("MiniMax API key 未配置,请设置 MINIMAX_API_KEY 环境变量")

try:
from openai import AsyncOpenAI
self.client = AsyncOpenAI(
api_key=self.api_key,
base_url=self.base_url
)
except ImportError:
raise ImportError("请安装 openai 库: pip install openai")

@staticmethod
def _clamp_temperature(temperature: float) -> float:
"""钳制温度值到 MiniMax 支持的范围 (0.0, 1.0]"""
if temperature <= 0:
return 0.01
if temperature > 1.0:
return 1.0
return temperature

async def chat(self, prompt: str, **kwargs) -> str:
"""发送聊天请求

Args:
prompt: 提示词
**kwargs: 其他参数(temperature, max_tokens 等)

Returns:
LLM 响应文本
"""
try:
temperature = self._clamp_temperature(kwargs.get("temperature", 0.3))
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=temperature,
max_tokens=kwargs.get("max_tokens", 500)
)

content = response.choices[0].message.content
# 去除 MiniMax M2.7 可能返回的思考标签
if content and "<think>" in content:
import re
content = re.sub(r"<think>.*?</think>\s*", "", content, flags=re.DOTALL)
return content

except Exception as e:
logger.error(f"MiniMax API 调用失败: {str(e)}")
raise


class LLMClientFactory:
"""LLM 客户端工厂"""

Expand All @@ -292,9 +364,9 @@ def create(
**kwargs
) -> BaseLLMClient:
"""创建 LLM 客户端

Args:
provider: 提供商名称 (openai, azure, anthropic, local, mock)
provider: 提供商名称 (openai, azure, anthropic, minimax, local, mock)
**kwargs: 客户端配置参数

Returns:
Expand All @@ -304,16 +376,19 @@ def create(

if provider == "openai":
return OpenAIClient(**kwargs)

elif provider == "azure":
return AzureOpenAIClient(**kwargs)

elif provider == "anthropic":
return AnthropicClient(**kwargs)


elif provider == "minimax":
return MiniMaxClient(**kwargs)

elif provider == "local":
return LocalLLMClient(**kwargs)

elif provider == "mock":
return MockLLMClient()

Expand Down
5 changes: 4 additions & 1 deletion api/env.example
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ ELASTICSEARCH_RETRY_ON_TIMEOUT=
ELASTICSEARCH_MAX_RETRIES=

# xinference configuration
XINFERENCE_URL=
XINFERENCE_URL=

# MiniMax configuration
MINIMAX_API_KEY=

# LangSmith configuration
LANGCHAIN_TRACING_V2=
Expand Down
79 changes: 79 additions & 0 deletions api/tests/test_minimax_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# -*- coding: UTF-8 -*-
"""Integration tests for MiniMax LLM provider.

These tests verify end-to-end MiniMax integration with actual API calls.
They require a valid MINIMAX_API_KEY environment variable and are skipped
when the key is not available.

Usage:
MINIMAX_API_KEY=your-key cd api && python -m pytest tests/test_minimax_integration.py -v
"""

import os
import sys

import pytest

API_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if API_DIR not in sys.path:
sys.path.insert(0, API_DIR)

MINIMAX_API_KEY = os.getenv("MINIMAX_API_KEY")
SKIP_REASON = "MINIMAX_API_KEY not set; skipping MiniMax integration tests"


@pytest.mark.skipif(not MINIMAX_API_KEY, reason=SKIP_REASON)
class TestMiniMaxClientIntegration:
"""Integration tests for MiniMaxClient in services layer."""

@pytest.mark.asyncio
async def test_minimax_client_chat(self):
"""Test MiniMaxClient.chat() with real API."""
from app.services.llm_client import MiniMaxClient

client = MiniMaxClient(
api_key=MINIMAX_API_KEY,
model="MiniMax-M2.7-highspeed"
)
result = await client.chat(
"Reply with exactly the word 'pong'. Do not include any reasoning.",
temperature=0.1,
max_tokens=50
)
assert result is not None
# Result may be empty if model returns only think tags;
# the important thing is no exception
assert isinstance(result, str)

@pytest.mark.asyncio
async def test_minimax_client_factory(self):
"""Test LLMClientFactory creates working MiniMaxClient."""
from app.services.llm_client import LLMClientFactory

client = LLMClientFactory.create(
"minimax",
api_key=MINIMAX_API_KEY,
model="MiniMax-M2.7-highspeed"
)
result = await client.chat(
"Reply with the number 42. Do not include any reasoning.",
temperature=0.1,
max_tokens=50
)
assert result is not None

@pytest.mark.asyncio
async def test_minimax_client_temperature_edge(self):
"""Test that temperature=0 works (clamped to 0.01)."""
from app.services.llm_client import MiniMaxClient

client = MiniMaxClient(
api_key=MINIMAX_API_KEY,
model="MiniMax-M2.7-highspeed"
)
result = await client.chat(
"Say hi. Do not include any reasoning.",
temperature=0,
max_tokens=50
)
assert result is not None
Loading