diff --git a/.env.example b/.env.example index b854b653..79671e72 100644 --- a/.env.example +++ b/.env.example @@ -33,7 +33,9 @@ KIMI_BASE_URL= KIMI_MODELS= MINIMAX_API_KEY= -MINIMAX_BASE_URL= +# MiniMax Anthropic-compatible endpoint for the built-in Anthropic SDK integration +MINIMAX_BASE_URL=https://api.minimaxi.com/anthropic/v1 +# Example: MiniMax-M2.7,MiniMax-M2.7-highspeed,MiniMax-M2.5,MiniMax-M2.1 MINIMAX_MODELS= GLM_API_KEY= @@ -66,6 +68,9 @@ TTS_GLM_BASE_URL= TTS_QWEN_API_KEY= TTS_QWEN_BASE_URL= +TTS_MINIMAX_API_KEY= +# MiniMax TTS endpoint (speech-2.8 / 2.6 / 02 / 01 series) +TTS_MINIMAX_BASE_URL=https://api.minimaxi.com TTS_ELEVENLABS_API_KEY= TTS_ELEVENLABS_BASE_URL= @@ -96,6 +101,10 @@ IMAGE_QWEN_IMAGE_BASE_URL= IMAGE_NANO_BANANA_API_KEY= IMAGE_NANO_BANANA_BASE_URL= +IMAGE_MINIMAX_API_KEY= +# Example models: image-01, image-01-live +IMAGE_MINIMAX_BASE_URL=https://api.minimaxi.com + IMAGE_GROK_API_KEY= IMAGE_GROK_BASE_URL= @@ -113,6 +122,10 @@ VIDEO_VEO_BASE_URL= VIDEO_SORA_API_KEY= VIDEO_SORA_BASE_URL= +VIDEO_MINIMAX_API_KEY= +# Example models: MiniMax-Hailuo-2.3, MiniMax-Hailuo-2.3-Fast, MiniMax-Hailuo-02 +VIDEO_MINIMAX_BASE_URL=https://api.minimaxi.com + VIDEO_GROK_API_KEY= VIDEO_GROK_BASE_URL= @@ -132,6 +145,7 @@ TAVILY_API_KEY= # Optional server-side default model for API routes like /api/generate-classroom # Example: anthropic:claude-3-5-haiku-20241022 or google:gemini-3-flash-preview +# MiniMax example: minimax:MiniMax-M2.7-highspeed DEFAULT_MODEL= # LOG_LEVEL=info diff --git a/README-zh.md b/README-zh.md index 56621607..94b0d683 100644 --- a/README-zh.md +++ b/README-zh.md @@ -114,11 +114,30 @@ providers: apiKey: sk-ant-... ``` -支持的服务商:**OpenAI**、**Anthropic**、**Google Gemini**、**DeepSeek**、**Grok (xAI)** 以及任何兼容 OpenAI API 的服务。 +支持的服务商:**OpenAI**、**Anthropic**、**Google Gemini**、**DeepSeek**、**MiniMax**、**Grok (xAI)** 以及任何兼容 OpenAI API 的服务。 + +MiniMax 快速示例: + +```env +MINIMAX_API_KEY=... +MINIMAX_BASE_URL=https://api.minimaxi.com/anthropic/v1 +DEFAULT_MODEL=minimax:MiniMax-M2.7-highspeed + +TTS_MINIMAX_API_KEY=... +TTS_MINIMAX_BASE_URL=https://api.minimaxi.com + +IMAGE_MINIMAX_API_KEY=... +IMAGE_MINIMAX_BASE_URL=https://api.minimaxi.com + +VIDEO_MINIMAX_API_KEY=... +VIDEO_MINIMAX_BASE_URL=https://api.minimaxi.com +``` > **推荐模型:** **Gemini 3 Flash** — 效果与速度的最佳平衡。追求最高质量可选 **Gemini 3.1 Pro**(速度较慢)。 > > 如果希望 OpenMAIC 服务端默认走 Gemini,还需要额外设置 `DEFAULT_MODEL=google:gemini-3-flash-preview`。 +> +> 如果希望默认走 MiniMax,可设置 `DEFAULT_MODEL=minimax:MiniMax-M2.7-highspeed`。 ### 3. 启动 diff --git a/README.md b/README.md index 1065d3f9..a0ed7047 100644 --- a/README.md +++ b/README.md @@ -114,11 +114,30 @@ providers: apiKey: sk-ant-... ``` -Supported providers: **OpenAI**, **Anthropic**, **Google Gemini**, **DeepSeek**, **Grok (xAI)**, and any OpenAI-compatible API. +Supported providers: **OpenAI**, **Anthropic**, **Google Gemini**, **DeepSeek**, **MiniMax**, **Grok (xAI)**, and any OpenAI-compatible API. + +MiniMax quick examples: + +```env +MINIMAX_API_KEY=... +MINIMAX_BASE_URL=https://api.minimaxi.com/anthropic/v1 +DEFAULT_MODEL=minimax:MiniMax-M2.7-highspeed + +TTS_MINIMAX_API_KEY=... +TTS_MINIMAX_BASE_URL=https://api.minimaxi.com + +IMAGE_MINIMAX_API_KEY=... +IMAGE_MINIMAX_BASE_URL=https://api.minimaxi.com + +VIDEO_MINIMAX_API_KEY=... +VIDEO_MINIMAX_BASE_URL=https://api.minimaxi.com +``` > **Recommended model:** **Gemini 3 Flash** — best balance of quality and speed. For highest quality (at slower speed), try **Gemini 3.1 Pro**. > > If you want OpenMAIC server APIs to use Gemini by default, also set `DEFAULT_MODEL=google:gemini-3-flash-preview`. +> +> If you want to use MiniMax as the default server model, set `DEFAULT_MODEL=minimax:MiniMax-M2.7-highspeed`. ### 3. Run @@ -483,3 +502,4 @@ If you find OpenMAIC useful in your research, please consider citing: ## 📄 License This project is licensed under the [GNU Affero General Public License v3.0](LICENSE). + diff --git a/app/api/generate/tts/route.ts b/app/api/generate/tts/route.ts index 542f105b..a4bf25e1 100644 --- a/app/api/generate/tts/route.ts +++ b/app/api/generate/tts/route.ts @@ -22,15 +22,17 @@ export const maxDuration = 30; export async function POST(req: NextRequest) { try { const body = await req.json(); - const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } = body as { - text: string; - audioId: string; - ttsProviderId: TTSProviderId; - ttsVoice: string; - ttsSpeed?: number; - ttsApiKey?: string; - ttsBaseUrl?: string; - }; + const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsModel, ttsApiKey, ttsBaseUrl } = + body as { + text: string; + audioId: string; + ttsProviderId: TTSProviderId; + ttsVoice: string; + ttsSpeed?: number; + ttsModel?: string; + ttsApiKey?: string; + ttsBaseUrl?: string; + }; // Validate required fields if (!text || !audioId || !ttsProviderId || !ttsVoice) { @@ -66,12 +68,13 @@ export async function POST(req: NextRequest) { providerId: ttsProviderId, voice: ttsVoice, speed: ttsSpeed ?? 1.0, + model: ttsModel, apiKey, baseUrl, }; log.info( - `Generating TTS: provider=${ttsProviderId}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`, + `Generating TTS: provider=${ttsProviderId}, model=${ttsModel || 'default'}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`, ); // Generate audio diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx index 47e6ef98..5b3736ed 100644 --- a/components/generation/media-popover.tsx +++ b/components/generation/media-popover.tsx @@ -90,6 +90,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin 'qwen-tts': t('settings.providerQwenTTS'), 'doubao-tts': t('settings.providerDoubaoTTS'), 'elevenlabs-tts': t('settings.providerElevenLabsTTS'), + 'minimax-tts': t('settings.providerMiniMaxTTS'), 'browser-native-tts': t('settings.providerBrowserNativeTTS'), }; return names[providerId] || providerId; @@ -274,6 +275,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { speed: ttsSpeed, apiKey: providerConfig?.apiKey, baseUrl: providerConfig?.baseUrl, + model: providerConfig?.model, }); } catch (error) { const message = diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx index 4ed045e4..097391f0 100644 --- a/components/settings/audio-settings.tsx +++ b/components/settings/audio-settings.tsx @@ -16,6 +16,7 @@ import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { TTS_PROVIDERS, + MINIMAX_TTS_MODELS, getTTSVoices, ASR_PROVIDERS, getASRSupportedLanguages, @@ -39,6 +40,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin 'qwen-tts': t('settings.providerQwenTTS'), 'doubao-tts': t('settings.providerDoubaoTTS'), 'elevenlabs-tts': t('settings.providerElevenLabsTTS'), + 'minimax-tts': t('settings.providerMiniMaxTTS'), 'browser-native-tts': t('settings.providerBrowserNativeTTS'), }; return names[providerId]; @@ -101,7 +103,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { const handleTTSProviderConfigChange = ( providerId: TTSProviderId, - config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>, + config: Partial<{ apiKey: string; baseUrl: string; model?: string; enabled: boolean }>, ) => { setTTSProviderConfig(providerId, config); onSave?.(); @@ -452,49 +454,76 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { {(ttsProvider.requiresApiKey || ttsProvidersConfig[ttsProviderId]?.isServerConfigured) && ( -
-
- -
+ <> +
+
+ +
+ + handleTTSProviderConfigChange(ttsProviderId, { + apiKey: e.target.value, + }) + } + className="font-mono text-sm pr-10" + /> + +
+
+ +
+ handleTTSProviderConfigChange(ttsProviderId, { - apiKey: e.target.value, + baseUrl: e.target.value, }) } - className="font-mono text-sm pr-10" + className="text-sm" /> -
-
- - - handleTTSProviderConfigChange(ttsProviderId, { - baseUrl: e.target.value, - }) - } - className="text-sm" - /> -
-
+ {ttsProviderId === 'minimax-tts' && ( +
+ + +
+ )} + )}
diff --git a/components/settings/index.tsx b/components/settings/index.tsx index 4c2397f8..d841b3ea 100644 --- a/components/settings/index.tsx +++ b/components/settings/index.tsx @@ -123,6 +123,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin 'qwen-tts': t('settings.providerQwenTTS'), 'doubao-tts': t('settings.providerDoubaoTTS'), 'elevenlabs-tts': t('settings.providerElevenLabsTTS'), + 'minimax-tts': t('settings.providerMiniMaxTTS'), 'browser-native-tts': t('settings.providerBrowserNativeTTS'), }; return names[providerId]; @@ -142,6 +143,7 @@ const IMAGE_PROVIDER_NAMES: Record = { seedream: 'providerSeedream', 'qwen-image': 'providerQwenImage', 'nano-banana': 'providerNanoBanana', + 'minimax-image': 'providerMiniMaxImage', 'grok-image': 'providerGrokImage', }; @@ -149,6 +151,7 @@ const IMAGE_PROVIDER_ICONS: Record = { seedream: '/logos/doubao.svg', 'qwen-image': '/logos/bailian.svg', 'nano-banana': '/logos/gemini.svg', + 'minimax-image': '/logos/minimax.svg', 'grok-image': '/logos/grok.svg', }; @@ -157,6 +160,7 @@ const VIDEO_PROVIDER_NAMES: Record = { kling: 'providerKling', veo: 'providerVeo', sora: 'providerSora', + 'minimax-video': 'providerMiniMaxVideo', 'grok-video': 'providerGrokVideo', }; @@ -165,6 +169,7 @@ const VIDEO_PROVIDER_ICONS: Record = { kling: '/logos/kling.svg', veo: '/logos/gemini.svg', sora: '/logos/openai.svg', + 'minimax-video': '/logos/minimax.svg', 'grok-video': '/logos/grok.svg', }; diff --git a/lib/ai/providers.ts b/lib/ai/providers.ts index 63d4a41a..c2dec546 100644 --- a/lib/ai/providers.ts +++ b/lib/ai/providers.ts @@ -15,7 +15,7 @@ * - https://api-docs.deepseek.com/quick_start/pricing * - https://platform.moonshot.cn/docs/pricing/chat * - https://platform.minimaxi.com/docs/guides/text-generation - * - https://platform.minimax.io/docs/api-reference/text-anthropic-api + * - https://platform.minimaxi.com/docs/api-reference/text-anthropic-api * - https://docs.bigmodel.cn/cn/guide/start/model-overview * - https://help.aliyun.com/zh/model-studio/models (Qwen/DashScope) * - https://siliconflow.cn/models @@ -669,8 +669,8 @@ export const PROVIDERS: Record = { icon: '/logos/minimax.svg', models: [ { - id: 'MiniMax-M2.5', - name: 'MiniMax M2.5', + id: 'MiniMax-M2', + name: 'MiniMax M2', contextWindow: 204800, outputWindow: 8192, capabilities: { streaming: true, tools: true, vision: false }, @@ -683,15 +683,36 @@ export const PROVIDERS: Record = { capabilities: { streaming: true, tools: true, vision: false }, }, { - id: 'MiniMax-M2.1-lightning', - name: 'MiniMax M2.1 Lightning', + id: 'MiniMax-M2.1-highspeed', + name: 'MiniMax M2.1 Highspeed', contextWindow: 204800, outputWindow: 8192, capabilities: { streaming: true, tools: true, vision: false }, }, { - id: 'MiniMax-M2', - name: 'MiniMax M2', + id: 'MiniMax-M2.5', + name: 'MiniMax M2.5', + contextWindow: 204800, + outputWindow: 8192, + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'MiniMax-M2.5-highspeed', + name: 'MiniMax M2.5 Highspeed', + contextWindow: 204800, + outputWindow: 8192, + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'MiniMax-M2.7', + name: 'MiniMax M2.7', + contextWindow: 204800, + outputWindow: 8192, + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'MiniMax-M2.7-highspeed', + name: 'MiniMax M2.7 Highspeed', contextWindow: 204800, outputWindow: 8192, capabilities: { streaming: true, tools: true, vision: false }, @@ -1030,6 +1051,24 @@ function getCompatThinkingBodyParams( return undefined; } +function normalizeMiniMaxAnthropicBaseUrl( + providerId: ProviderId, + baseUrl?: string, +): string | undefined { + if (providerId !== 'minimax' || !baseUrl) { + return baseUrl; + } + + const trimmed = baseUrl.replace(/\/$/, ''); + if (trimmed.endsWith('/anthropic/v1')) { + return trimmed; + } + if (trimmed.endsWith('/anthropic')) { + return `${trimmed}/v1`; + } + return `${trimmed}/anthropic/v1`; +} + /** * Get a configured language model instance with its info * Accepts individual parameters for flexibility and security @@ -1059,7 +1098,10 @@ export function getModel(config: ModelConfig): ModelWithInfo { // Resolve base URL: explicit > provider default > SDK default const provider = getProviderConfig(config.providerId); - const effectiveBaseUrl = config.baseUrl || provider?.defaultBaseUrl || undefined; + const effectiveBaseUrl = normalizeMiniMaxAnthropicBaseUrl( + config.providerId, + config.baseUrl || provider?.defaultBaseUrl || undefined, + ); let model: LanguageModel; diff --git a/lib/audio/constants.ts b/lib/audio/constants.ts index fc86cab0..27545f64 100644 --- a/lib/audio/constants.ts +++ b/lib/audio/constants.ts @@ -39,6 +39,17 @@ import type { * Central registry for all TTS providers. * Keep in sync with TTSProviderId type definition. */ +export const MINIMAX_TTS_MODELS = [ + { id: 'speech-2.8-turbo', name: 'Speech 2.8 Turbo' }, + { id: 'speech-2.8-hd', name: 'Speech 2.8 HD' }, + { id: 'speech-2.6-turbo', name: 'Speech 2.6 Turbo' }, + { id: 'speech-2.6-hd', name: 'Speech 2.6 HD' }, + { id: 'speech-02-turbo', name: 'Speech 02 Turbo' }, + { id: 'speech-02-hd', name: 'Speech 02 HD' }, + { id: 'speech-01-turbo', name: 'Speech 01 Turbo' }, + { id: 'speech-01-hd', name: 'Speech 01 HD' }, +] as const; + export const TTS_PROVIDERS: Record = { 'openai-tts': { id: 'openai-tts', @@ -606,6 +617,52 @@ export const TTS_PROVIDERS: Record = { supportedFormats: ['mp3', 'wav', 'pcm'], }, + 'minimax-tts': { + id: 'minimax-tts', + name: 'MiniMax TTS', + requiresApiKey: true, + defaultBaseUrl: 'https://api.minimaxi.com', + icon: '/logos/minimax.svg', + voices: [ + { + id: 'Chinese (Mandarin)_Warm_Girl', + name: '普通话 - Warm Girl', + language: 'zh-CN', + gender: 'female', + }, + { + id: 'Chinese (Mandarin)_Radio_Host', + name: '普通话 - Radio Host', + language: 'zh-CN', + gender: 'female', + }, + { + id: 'Chinese (Mandarin)_Gentleman', + name: '普通话 - Gentleman', + language: 'zh-CN', + gender: 'male', + }, + { + id: 'Chinese (Mandarin)_Male_Announcer', + name: '普通话 - Male Announcer', + language: 'zh-CN', + gender: 'male', + }, + { + id: 'English_expressive_narrator', + name: 'English - Expressive Narrator', + language: 'en-US', + gender: 'neutral', + }, + ], + supportedFormats: ['mp3', 'wav', 'flac', 'pcm'], + speedRange: { + min: 0.5, + max: 2.0, + default: 1.0, + }, + }, + 'doubao-tts': { id: 'doubao-tts', name: '豆包 TTS 2.0(火山引擎)', @@ -704,7 +761,6 @@ export const TTS_PROVIDERS: Record = { supportedFormats: ['mp3'], speedRange: { min: 0.5, max: 2.0, default: 1.0 }, }, - 'elevenlabs-tts': { id: 'elevenlabs-tts', name: 'ElevenLabs TTS', @@ -996,6 +1052,7 @@ export const DEFAULT_TTS_VOICES: Record = { 'qwen-tts': 'Cherry', 'doubao-tts': 'zh_female_vv_uranus_bigtts', 'elevenlabs-tts': 'EXAVITQu4vr4xnSDxMaL', + 'minimax-tts': 'Chinese (Mandarin)_Warm_Girl', 'browser-native-tts': 'default', }; diff --git a/lib/audio/tts-providers.ts b/lib/audio/tts-providers.ts index 3131f771..18d81407 100644 --- a/lib/audio/tts-providers.ts +++ b/lib/audio/tts-providers.ts @@ -9,6 +9,7 @@ * - Azure TTS: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech * - GLM TTS: https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-tts * - Qwen TTS: https://bailian.console.aliyun.com/ + * - MiniMax TTS: https://platform.minimaxi.com/docs/api-reference/speech-t2a-http * - Doubao TTS: https://www.volcengine.com/docs/6561/1257543 * - ElevenLabs TTS: https://elevenlabs.io/docs/api-reference/text-to-speech/convert * - Browser Native: Web Speech API (client-side only) @@ -149,9 +150,10 @@ export async function generateTTS( case 'qwen-tts': return await generateQwenTTS(config, text); + case 'minimax-tts': + return await generateMiniMaxTTS(config, text); case 'doubao-tts': return await generateDoubaoTTS(config, text); - case 'elevenlabs-tts': return await generateElevenLabsTTS(config, text); @@ -341,6 +343,69 @@ async function generateQwenTTS(config: TTSModelConfig, text: string): Promise { + const baseUrl = (config.baseUrl || TTS_PROVIDERS['minimax-tts'].defaultBaseUrl || '').replace( + /\/$/, + '', + ); + const response = await fetch(`${baseUrl}/v1/t2a_v2`, { + method: 'POST', + headers: { + Authorization: `Bearer ${config.apiKey}`, + 'Content-Type': 'application/json; charset=utf-8', + }, + body: JSON.stringify({ + model: config.model || 'speech-2.8-turbo', + text, + stream: false, + output_format: 'hex', + voice_setting: { + voice_id: config.voice, + speed: config.speed || 1.0, + vol: 1, + pitch: 0, + }, + audio_setting: { + sample_rate: 32000, + bitrate: 128000, + format: config.format || 'mp3', + channel: 1, + }, + language_boost: 'auto', + }), + }); + + if (!response.ok) { + const errorText = await response.text().catch(() => response.statusText); + throw new Error(`MiniMax TTS API error: ${errorText}`); + } + + const data = await response.json(); + const hexAudio = data?.data?.audio; + if (!hexAudio || typeof hexAudio !== 'string') { + throw new Error(`MiniMax TTS error: No audio returned. Response: ${JSON.stringify(data)}`); + } + + const cleanedHex = hexAudio.trim(); + if (cleanedHex.length % 2 !== 0) { + throw new Error('MiniMax TTS error: invalid hex audio payload length'); + } + + const audio = new Uint8Array( + cleanedHex.match(/.{1,2}/g)?.map((byte: string) => parseInt(byte, 16)) || [], + ); + return { + audio, + format: data?.extra_info?.audio_format || config.format || 'mp3', + }; +} + /** * ElevenLabs TTS implementation (direct API call with voice-specific endpoint) */ @@ -412,6 +477,7 @@ export async function getCurrentTTSConfig(): Promise { providerId: ttsProviderId, apiKey: providerConfig?.apiKey, baseUrl: providerConfig?.baseUrl, + model: providerConfig?.model, voice: ttsVoice, speed: ttsSpeed, }; diff --git a/lib/audio/types.ts b/lib/audio/types.ts index 4860a1b4..b4bacf94 100644 --- a/lib/audio/types.ts +++ b/lib/audio/types.ts @@ -85,6 +85,7 @@ export type TTSProviderId = | 'qwen-tts' | 'doubao-tts' | 'elevenlabs-tts' + | 'minimax-tts' | 'browser-native-tts'; // Add new TTS providers below (uncomment and modify): // | 'fish-audio-tts' @@ -128,6 +129,7 @@ export interface TTSModelConfig { providerId: TTSProviderId; apiKey?: string; baseUrl?: string; + model?: string; voice: string; speed?: number; format?: string; diff --git a/lib/audio/use-tts-preview.ts b/lib/audio/use-tts-preview.ts index 86f640fb..e3b36b23 100644 --- a/lib/audio/use-tts-preview.ts +++ b/lib/audio/use-tts-preview.ts @@ -14,6 +14,7 @@ export interface TTSPreviewOptions { speed: number; apiKey?: string; baseUrl?: string; + model?: string; } /** @@ -100,6 +101,7 @@ export function useTTSPreview() { }; if (options.apiKey?.trim()) body.ttsApiKey = options.apiKey; if (options.baseUrl?.trim()) body.ttsBaseUrl = options.baseUrl; + if (options.model?.trim()) body.ttsModel = options.model; const res = await fetch('/api/generate/tts', { method: 'POST', diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts index 41f4f036..08a5b44e 100644 --- a/lib/i18n/settings.ts +++ b/lib/i18n/settings.ts @@ -226,6 +226,7 @@ export const settingsZhCN = { providerQwenTTS: 'Qwen TTS(阿里云百炼)', providerDoubaoTTS: '豆包 TTS 2.0(火山引擎)', providerElevenLabsTTS: 'ElevenLabs TTS', + providerMiniMaxTTS: 'MiniMax TTS', providerBrowserNativeTTS: '浏览器原生 TTS', providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)', providerBrowserNative: '浏览器原生 ASR', @@ -485,6 +486,7 @@ export const settingsZhCN = { providerSeedream: 'Seedream(字节豆包)', providerQwenImage: 'Qwen Image(阿里通义)', providerNanoBanana: 'Nano Banana(Gemini)', + providerMiniMaxImage: 'MiniMax 图像', providerGrokImage: 'Grok Image(xAI)', testImageGeneration: '测试图像生成', testImageConnectivity: '测试连接', @@ -507,6 +509,7 @@ export const settingsZhCN = { providerKling: '可灵(快手)', providerVeo: 'Veo(Google)', providerSora: 'Sora(OpenAI)', + providerMiniMaxVideo: 'MiniMax 视频', providerGrokVideo: 'Grok Video(xAI)', testVideoGeneration: '测试视频生成', testVideoConnectivity: '测试连接', @@ -819,6 +822,7 @@ export const settingsEnUS = { providerQwenTTS: 'Qwen TTS (Alibaba Cloud Bailian)', providerDoubaoTTS: 'Doubao TTS 2.0 (Volcengine)', providerElevenLabsTTS: 'ElevenLabs TTS', + providerMiniMaxTTS: 'MiniMax TTS', providerBrowserNativeTTS: 'Browser Native TTS', providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)', providerBrowserNative: 'Browser Native ASR', @@ -1081,6 +1085,7 @@ export const settingsEnUS = { providerSeedream: 'Seedream (ByteDance)', providerQwenImage: 'Qwen Image (Alibaba)', providerNanoBanana: 'Nano Banana (Gemini)', + providerMiniMaxImage: 'MiniMax Image', providerGrokImage: 'Grok Image (xAI)', testImageGeneration: 'Test Image Generation', testImageConnectivity: 'Test Connection', @@ -1104,6 +1109,7 @@ export const settingsEnUS = { providerKling: 'Kling (Kuaishou)', providerVeo: 'Veo (Google)', providerSora: 'Sora (OpenAI)', + providerMiniMaxVideo: 'MiniMax Video', providerGrokVideo: 'Grok Video (xAI)', testVideoGeneration: 'Test Video Generation', testVideoConnectivity: 'Test Connection', diff --git a/lib/media/adapters/minimax-image-adapter.ts b/lib/media/adapters/minimax-image-adapter.ts new file mode 100644 index 00000000..92a64791 --- /dev/null +++ b/lib/media/adapters/minimax-image-adapter.ts @@ -0,0 +1,115 @@ +/** + * MiniMax Image Generation Adapter + * Supports: text-to-image with aspect ratio control + * API Docs: https://platform.minimaxi.com/docs/api-reference/image-generation-t2i + */ + +import type { + ImageGenerationConfig, + ImageGenerationOptions, + ImageGenerationResult, +} from '../types'; + +const BASE_URL = 'https://api.minimaxi.com'; + +export async function generateWithMiniMaxImage( + config: ImageGenerationConfig, + options: ImageGenerationOptions, +): Promise { + const baseUrl = (config.baseUrl || BASE_URL).replace(/\/$/, ''); + + const model = config.model || 'image-01'; + + const aspectRatio = options.aspectRatio || '1:1'; + + const response = await fetch(`${baseUrl}/v1/image_generation`, { + method: 'POST', + headers: { + Authorization: `Bearer ${config.apiKey}`, + 'Content-Type': 'application/json; charset=utf-8', + }, + body: JSON.stringify({ + model, + prompt: options.prompt, + negative_prompt: options.negativePrompt, + aspect_ratio: aspectRatio, + response_format: 'url', + n: 1, + prompt_optimizer: false, + }), + }); + + if (!response.ok) { + const errText = await response.text().catch(() => response.statusText); + throw new Error(`MiniMax Image API error: ${errText}`); + } + + const data = await response.json(); + + // Check for error response + if (data?.base_resp?.status_code !== 0 && data?.base_resp?.status_code !== undefined) { + const code = data.base_resp.status_code; + const msg = data.base_resp.status_msg || 'unknown error'; + throw new Error(`MiniMax Image API error ${code}: ${msg}`); + } + + const imageUrls = data?.data?.image_urls; + if (!imageUrls || imageUrls.length === 0) { + throw new Error(`MiniMax Image: no image URLs returned. Response: ${JSON.stringify(data)}`); + } + + const imageUrl = imageUrls[0]; + + // Determine dimensions from aspect ratio + let width = options.width || 1024; + let height = options.height || 1024; + if (!options.width && !options.height) { + const [w, h] = aspectRatio.split(':').map(Number); + if (w && h) { + if (w > h) { + width = 1024; + height = Math.round((1024 * h) / w); + } else { + height = 1024; + width = Math.round((1024 * w) / h); + } + } + } + + return { + url: imageUrl, + width, + height, + }; +} + +export async function testMiniMaxImageConnectivity( + config: ImageGenerationConfig, +): Promise<{ success: boolean; message: string }> { + try { + const baseUrl = (config.baseUrl || BASE_URL).replace(/\/$/, ''); + const response = await fetch(`${baseUrl}/v1/image_generation`, { + method: 'POST', + headers: { + Authorization: `Bearer ${config.apiKey}`, + 'Content-Type': 'application/json; charset=utf-8', + }, + body: JSON.stringify({ + model: 'image-01', + prompt: 'test', + aspect_ratio: '1:1', + n: 1, + }), + }); + + if (response.ok) { + return { success: true, message: 'MiniMax Image API connected' }; + } + + const errData = await response.json().catch(() => ({})); + const msg = errData?.base_resp?.status_msg || response.statusText; + return { success: false, message: `API error: ${msg}` }; + } catch (err) { + return { success: false, message: `Connection failed: ${(err as Error).message}` }; + } +} diff --git a/lib/media/adapters/minimax-video-adapter.ts b/lib/media/adapters/minimax-video-adapter.ts new file mode 100644 index 00000000..b9766ccb --- /dev/null +++ b/lib/media/adapters/minimax-video-adapter.ts @@ -0,0 +1,232 @@ +/** + * MiniMax Video Generation Adapter + * Supports: text-to-video with camera control commands + * API: POST /v1/video_generation (submit) + GET /v1/query/video_generation?task_id=xxx (poll) + * Docs: https://platform.minimaxi.com/docs/api-reference/video-generation-t2v + */ + +import type { + VideoGenerationConfig, + VideoGenerationOptions, + VideoGenerationResult, +} from '../types'; + +const BASE_URL = 'https://api.minimaxi.com'; +const POLL_INTERVAL_MS = 5000; +const MAX_POLL_ATTEMPTS = 120; // ~10 minutes max + +interface MiniMaxSubmitResponse { + task_id: string; + base_resp: { + status_code: number; + status_msg: string; + }; +} + +interface MiniMaxQueryResponse { + task_id: string; + status: 'Preparing' | 'Queueing' | 'Processing' | 'Success' | 'Fail'; + file_id?: string; + video_width?: number; + video_height?: number; + base_resp: { + status_code: number; + status_msg: string; + }; +} + +interface MiniMaxFileRetrieveResponse { + file?: { + file_id: string | number; + download_url?: string; + filename?: string; + }; + base_resp?: { + status_code: number; + status_msg: string; + }; +} + +async function submitTask( + config: VideoGenerationConfig, + options: VideoGenerationOptions, +): Promise { + const baseUrl = (config.baseUrl || BASE_URL).replace(/\/$/, ''); + + const model = config.model || 'MiniMax-Hailuo-2.3'; + const duration = options.duration || 6; + // Map OpenMAIC resolution to MiniMax format + const resolutionMap: Record = { + '720p': '720P', + '1080p': '1080P', + }; + const resolution = resolutionMap[options.resolution || ''] || '768P'; + + const response = await fetch(`${baseUrl}/v1/video_generation`, { + method: 'POST', + headers: { + Authorization: `Bearer ${config.apiKey}`, + 'Content-Type': 'application/json; charset=utf-8', + }, + body: JSON.stringify({ + model, + prompt: options.prompt, + duration, + resolution, + prompt_optimizer: false, + }), + }); + + if (!response.ok) { + const errText = await response.text().catch(() => response.statusText); + throw new Error(`MiniMax Video submit error: ${errText}`); + } + + const data: MiniMaxSubmitResponse = await response.json(); + + if (data.base_resp?.status_code !== 0) { + const code = data.base_resp?.status_code; + const msg = data.base_resp?.status_msg || 'unknown error'; + throw new Error(`MiniMax Video API error ${code}: ${msg}`); + } + + if (!data.task_id) { + throw new Error(`MiniMax Video: no task_id returned. Response: ${JSON.stringify(data)}`); + } + + return data.task_id; +} + +async function pollTaskStatus( + config: VideoGenerationConfig, + taskId: string, +): Promise { + const baseUrl = (config.baseUrl || BASE_URL).replace(/\/$/, ''); + const url = `${baseUrl}/v1/query/video_generation?task_id=${encodeURIComponent(taskId)}`; + + const response = await fetch(url, { + method: 'GET', + headers: { + Authorization: `Bearer ${config.apiKey}`, + }, + }); + + if (!response.ok) { + const errText = await response.text().catch(() => response.statusText); + throw new Error(`MiniMax Video poll error: ${errText}`); + } + + return response.json() as Promise; +} + +async function retrieveFileDownloadUrl( + config: VideoGenerationConfig, + fileId: string, +): Promise { + const baseUrl = (config.baseUrl || BASE_URL).replace(/\/$/, ''); + const url = `${baseUrl}/v1/files/retrieve?file_id=${encodeURIComponent(fileId)}`; + + const response = await fetch(url, { + method: 'GET', + headers: { + Authorization: `Bearer ${config.apiKey}`, + }, + }); + + if (!response.ok) { + const errText = await response.text().catch(() => response.statusText); + throw new Error(`MiniMax Video file retrieve error: ${errText}`); + } + + const data: MiniMaxFileRetrieveResponse = await response.json(); + if (data.base_resp?.status_code !== 0) { + const code = data.base_resp?.status_code; + const msg = data.base_resp?.status_msg || 'unknown error'; + throw new Error(`MiniMax Video file retrieve error ${code}: ${msg}`); + } + + const downloadUrl = data.file?.download_url; + if (!downloadUrl) { + throw new Error(`MiniMax Video: no download_url returned. Response: ${JSON.stringify(data)}`); + } + + return downloadUrl; +} + +export async function generateWithMiniMaxVideo( + config: VideoGenerationConfig, + options: VideoGenerationOptions, +): Promise { + // Step 1: Submit task + const taskId = await submitTask(config, options); + + // Step 2: Poll until complete + let lastStatus = ''; + let attempts = 0; + + while (attempts < MAX_POLL_ATTEMPTS) { + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)); + + const result = await pollTaskStatus(config, taskId); + lastStatus = result.status; + + if (result.status === 'Success') { + if (!result.file_id) { + throw new Error(`MiniMax Video: task succeeded but no file_id returned`); + } + + const videoUrl = await retrieveFileDownloadUrl(config, result.file_id); + + return { + url: videoUrl, + width: result.video_width || 1920, + height: result.video_height || 1080, + duration: options.duration || 6, + }; + } + + if (result.status === 'Fail') { + throw new Error( + `MiniMax Video generation failed: ${result.base_resp?.status_msg || 'unknown'}`, + ); + } + + attempts++; + } + + throw new Error( + `MiniMax Video: timeout after ${MAX_POLL_ATTEMPTS} polls, last status: ${lastStatus}`, + ); +} + +export async function testMiniMaxVideoConnectivity( + config: VideoGenerationConfig, +): Promise<{ success: boolean; message: string }> { + try { + const baseUrl = (config.baseUrl || BASE_URL).replace(/\/$/, ''); + // Submit a minimal task and immediately check if it returns a task_id + const response = await fetch(`${baseUrl}/v1/video_generation`, { + method: 'POST', + headers: { + Authorization: `Bearer ${config.apiKey}`, + 'Content-Type': 'application/json; charset=utf-8', + }, + body: JSON.stringify({ + model: 'MiniMax-Hailuo-2.3', + prompt: 'test connectivity', + duration: 6, + resolution: '768P', + }), + }); + + if (response.ok) { + return { success: true, message: 'MiniMax Video API connected' }; + } + + const errData = await response.json().catch(() => ({})); + const msg = errData?.base_resp?.status_msg || response.statusText; + return { success: false, message: `API error: ${msg}` }; + } catch (err) { + return { success: false, message: `Connection failed: ${(err as Error).message}` }; + } +} diff --git a/lib/media/image-providers.ts b/lib/media/image-providers.ts index daa1f809..d40fa770 100644 --- a/lib/media/image-providers.ts +++ b/lib/media/image-providers.ts @@ -12,6 +12,10 @@ import type { import { generateWithSeedream, testSeedreamConnectivity } from './adapters/seedream-adapter'; import { generateWithQwenImage, testQwenImageConnectivity } from './adapters/qwen-image-adapter'; import { generateWithNanoBanana, testNanoBananaConnectivity } from './adapters/nano-banana-adapter'; +import { + generateWithMiniMaxImage, + testMiniMaxImageConnectivity, +} from './adapters/minimax-image-adapter'; import { generateWithGrokImage, testGrokImageConnectivity } from './adapters/grok-image-adapter'; export const IMAGE_PROVIDERS: Record = { @@ -67,6 +71,17 @@ export const IMAGE_PROVIDERS: Record = { ], supportedAspectRatios: ['16:9', '4:3', '1:1'], }, + 'minimax-image': { + id: 'minimax-image', + name: 'MiniMax Image', + requiresApiKey: true, + defaultBaseUrl: 'https://api.minimaxi.com', + models: [ + { id: 'image-01', name: 'Image 01' }, + { id: 'image-01-live', name: 'Image 01 Live' }, + ], + supportedAspectRatios: ['16:9', '4:3', '1:1', '9:16'], + }, 'grok-image': { id: 'grok-image', name: 'Grok Image (xAI)', @@ -90,6 +105,8 @@ export async function testImageConnectivity( return testQwenImageConnectivity(config); case 'nano-banana': return testNanoBananaConnectivity(config); + case 'minimax-image': + return testMiniMaxImageConnectivity(config); case 'grok-image': return testGrokImageConnectivity(config); default: @@ -111,6 +128,8 @@ export async function generateImage( return generateWithQwenImage(config, options); case 'nano-banana': return generateWithNanoBanana(config, options); + case 'minimax-image': + return generateWithMiniMaxImage(config, options); case 'grok-image': return generateWithGrokImage(config, options); default: diff --git a/lib/media/types.ts b/lib/media/types.ts index 6fecc8cf..13fbdb69 100644 --- a/lib/media/types.ts +++ b/lib/media/types.ts @@ -69,7 +69,12 @@ * Add new image providers here as union members. * Keep in sync with IMAGE_PROVIDERS registry in constants.ts */ -export type ImageProviderId = 'seedream' | 'qwen-image' | 'nano-banana' | 'grok-image'; +export type ImageProviderId = + | 'seedream' + | 'qwen-image' + | 'nano-banana' + | 'minimax-image' + | 'grok-image'; // Add new image providers below (uncomment and modify): // | 'dall-e' // | 'midjourney' @@ -178,7 +183,13 @@ export interface ImageGenerationResult { * Add new video providers here as union members. * Keep in sync with VIDEO_PROVIDERS registry in constants.ts */ -export type VideoProviderId = 'seedance' | 'kling' | 'veo' | 'sora' | 'grok-video'; +export type VideoProviderId = + | 'seedance' + | 'kling' + | 'veo' + | 'sora' + | 'minimax-video' + | 'grok-video'; // Add new video providers below (uncomment and modify): // | 'runway' // | 'pika' diff --git a/lib/media/video-providers.ts b/lib/media/video-providers.ts index bdad4dd0..6c2b5d0e 100644 --- a/lib/media/video-providers.ts +++ b/lib/media/video-providers.ts @@ -12,6 +12,10 @@ import type { import { generateWithSeedance, testSeedanceConnectivity } from './adapters/seedance-adapter'; import { generateWithKling, testKlingConnectivity } from './adapters/kling-adapter'; import { generateWithVeo, testVeoConnectivity } from './adapters/veo-adapter'; +import { + generateWithMiniMaxVideo, + testMiniMaxVideoConnectivity, +} from './adapters/minimax-video-adapter'; import { generateWithGrokVideo, testGrokVideoConnectivity } from './adapters/grok-video-adapter'; export const VIDEO_PROVIDERS: Record = { @@ -75,6 +79,23 @@ export const VIDEO_PROVIDERS: Record = { supportedAspectRatios: ['16:9', '1:1', '9:16'], maxDuration: 20, }, + 'minimax-video': { + id: 'minimax-video', + name: 'MiniMax Video', + requiresApiKey: true, + defaultBaseUrl: 'https://api.minimaxi.com', + models: [ + { id: 'MiniMax-Hailuo-2.3', name: 'Hailuo 2.3' }, + { id: 'MiniMax-Hailuo-2.3-Fast', name: 'Hailuo 2.3 Fast' }, + { id: 'MiniMax-Hailuo-02', name: 'Hailuo 02' }, + { id: 'T2V-01-Director', name: 'T2V-01 Director' }, + { id: 'T2V-01', name: 'T2V-01' }, + ], + supportedAspectRatios: ['16:9', '4:3', '1:1', '9:16'], + supportedDurations: [6, 10], + supportedResolutions: ['720p', '1080p'], + maxDuration: 10, + }, 'grok-video': { id: 'grok-video', name: 'Grok Video (xAI)', @@ -97,6 +118,8 @@ export async function testVideoConnectivity( return testKlingConnectivity(config); case 'veo': return testVeoConnectivity(config); + case 'minimax-video': + return testMiniMaxVideoConnectivity(config); case 'grok-video': return testGrokVideoConnectivity(config); default: @@ -162,6 +185,8 @@ export async function generateVideo( return generateWithKling(config, options); case 'veo': return generateWithVeo(config, options); + case 'minimax-video': + return generateWithMiniMaxVideo(config, options); case 'grok-video': return generateWithGrokVideo(config, options); default: diff --git a/lib/server/provider-config.ts b/lib/server/provider-config.ts index f7d18576..27afa141 100644 --- a/lib/server/provider-config.ts +++ b/lib/server/provider-config.ts @@ -58,6 +58,7 @@ const TTS_ENV_MAP: Record = { TTS_QWEN: 'qwen-tts', TTS_DOUBAO: 'doubao-tts', TTS_ELEVENLABS: 'elevenlabs-tts', + TTS_MINIMAX: 'minimax-tts', }; const ASR_ENV_MAP: Record = { @@ -74,6 +75,7 @@ const IMAGE_ENV_MAP: Record = { IMAGE_SEEDREAM: 'seedream', IMAGE_QWEN_IMAGE: 'qwen-image', IMAGE_NANO_BANANA: 'nano-banana', + IMAGE_MINIMAX: 'minimax-image', IMAGE_GROK: 'grok-image', }; @@ -82,6 +84,7 @@ const VIDEO_ENV_MAP: Record = { VIDEO_KLING: 'kling', VIDEO_VEO: 'veo', VIDEO_SORA: 'sora', + VIDEO_MINIMAX: 'minimax-video', VIDEO_GROK: 'grok-video', }; diff --git a/lib/store/settings.ts b/lib/store/settings.ts index 4788f8d4..7ec272a1 100644 --- a/lib/store/settings.ts +++ b/lib/store/settings.ts @@ -50,6 +50,7 @@ export interface SettingsState { { apiKey: string; baseUrl: string; + model?: string; enabled: boolean; isServerConfigured?: boolean; serverBaseUrl?: string; @@ -178,7 +179,7 @@ export interface SettingsState { setASRLanguage: (language: string) => void; setTTSProviderConfig: ( providerId: TTSProviderId, - config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>, + config: Partial<{ apiKey: string; baseUrl: string; model?: string; enabled: boolean }>, ) => void; setASRProviderConfig: ( providerId: ASRProviderId, @@ -269,8 +270,9 @@ const getDefaultAudioConfig = () => ({ 'qwen-tts': { apiKey: '', baseUrl: '', enabled: false }, 'doubao-tts': { apiKey: '', baseUrl: '', enabled: false }, 'elevenlabs-tts': { apiKey: '', baseUrl: '', enabled: false }, + 'minimax-tts': { apiKey: '', baseUrl: '', model: 'speech-2.8-turbo', enabled: false }, 'browser-native-tts': { apiKey: '', baseUrl: '', enabled: true }, - } as Record, + } as Record, asrProvidersConfig: { 'openai-whisper': { apiKey: '', baseUrl: '', enabled: true }, 'browser-native': { apiKey: '', baseUrl: '', enabled: true }, @@ -295,6 +297,7 @@ const getDefaultImageConfig = () => ({ seedream: { apiKey: '', baseUrl: '', enabled: false }, 'qwen-image': { apiKey: '', baseUrl: '', enabled: false }, 'nano-banana': { apiKey: '', baseUrl: '', enabled: false }, + 'minimax-image': { apiKey: '', baseUrl: '', enabled: false }, 'grok-image': { apiKey: '', baseUrl: '', enabled: false }, } as Record, }); @@ -308,6 +311,7 @@ const getDefaultVideoConfig = () => ({ kling: { apiKey: '', baseUrl: '', enabled: false }, veo: { apiKey: '', baseUrl: '', enabled: false }, sora: { apiKey: '', baseUrl: '', enabled: false }, + 'minimax-video': { apiKey: '', baseUrl: '', enabled: false }, 'grok-video': { apiKey: '', baseUrl: '', enabled: false }, } as Record, }); diff --git a/tests/ai/minimax-provider.test.ts b/tests/ai/minimax-provider.test.ts new file mode 100644 index 00000000..89ebc854 --- /dev/null +++ b/tests/ai/minimax-provider.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from 'vitest'; + +import { getProvider } from '@/lib/ai/providers'; + +describe('MiniMax provider defaults', () => { + it('uses the Anthropic-compatible v1 endpoint by default', () => { + expect(getProvider('minimax')?.defaultBaseUrl).toBe('https://api.minimaxi.com/anthropic/v1'); + }); + + it('matches the official Anthropic-compatible MiniMax model list', () => { + const modelIds = getProvider('minimax')?.models.map((model) => model.id) ?? []; + expect(modelIds).toEqual([ + 'MiniMax-M2', + 'MiniMax-M2.1', + 'MiniMax-M2.1-highspeed', + 'MiniMax-M2.5', + 'MiniMax-M2.5-highspeed', + 'MiniMax-M2.7', + 'MiniMax-M2.7-highspeed', + ]); + }); +}); diff --git a/tests/audio/minimax-tts-models.test.ts b/tests/audio/minimax-tts-models.test.ts new file mode 100644 index 00000000..d814e649 --- /dev/null +++ b/tests/audio/minimax-tts-models.test.ts @@ -0,0 +1,12 @@ +import { describe, expect, it } from 'vitest'; + +import { MINIMAX_TTS_MODELS } from '@/lib/audio/constants'; + +describe('MiniMax TTS model list', () => { + it('includes the official speech-01 models', () => { + const modelIds = MINIMAX_TTS_MODELS.map((model) => model.id); + + expect(modelIds).toContain('speech-01-turbo'); + expect(modelIds).toContain('speech-01-hd'); + }); +});