code · pull · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/integrations/cerebras/integration.definition.ts b/integrations/cerebras/integration.definition.ts
@@ -8,7 +8,7 @@ export default new IntegrationDefinition({
   title: 'Cerebras',
   description:
     'Get access to a curated list of Cerebras models for content generation and chat completions within your bot.',
-  version: '7.0.1',
+  version: '8.0.0',
   readme: 'hub.md',
   icon: 'icon.svg',
   entities: {

diff --git a/integrations/cerebras/package.json b/integrations/cerebras/package.json
@@ -10,7 +10,7 @@
     "@botpress/client": "workspace:*",
     "@botpress/common": "workspace:*",
     "@botpress/sdk": "workspace:*",
-    "openai": "^4.86.1"
+    "openai": "^5.12.1"
   },
   "devDependencies": {
     "@botpress/cli": "workspace:*",

diff --git a/integrations/cerebras/src/index.ts b/integrations/cerebras/src/index.ts
@@ -1,4 +1,5 @@
 import { llm } from '@botpress/common'
+import { validateGptOssReasoningEffort } from '@botpress/common/src/llm/openai'
 import OpenAI from 'openai'
 import { DEFAULT_MODEL_ID, ModelId } from './schemas'
 import * as bp from '.botpress'
@@ -9,15 +10,31 @@ const cerebrasClient = new OpenAI({
 })
 
 const languageModels: Record<ModelId, llm.ModelDetails> = {
-  // Reference: https://inference-docs.cerebras.ai/introduction
+  // Reference:
+  // https://inference-docs.cerebras.ai/models/overview
+  // https://www.cerebras.ai/pricing
+  'gpt-oss-120b': {
+    name: 'GPT-OSS 120B (Preview)',
+    description:
+      'gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.',
+    tags: ['preview', 'general-purpose', 'reasoning'],
+    input: {
+      costPer1MTokens: 0.25,
+      maxTokens: 131_000,
+    },
+    output: {
+      costPer1MTokens: 0.69,
+      maxTokens: 16_000,
+    },
+  },
   'qwen-3-32b': {
     name: 'Qwen3 32B',
     description:
       'Qwen3-32B is a world-class reasoning model with comparable quality to DeepSeek R1 while outperforming GPT-4.1 and Claude Sonnet 3.7. It excels in code-gen, tool-calling, and advanced reasoning, making it an exceptional model for a wide range of production use cases. NOTE: This model always uses thinking tokens (reasoning) by default, but we have configured it to avoid reasoning (not guaranteed) if the `reasoningEffort` parameter is not set. If the `reasoningEffort` parameter is set, the model will use thinking tokens. The model currently only supports "high" reasoning effort so any other value will be ignored.',
     tags: ['general-purpose', 'reasoning'],
     input: {
       costPer1MTokens: 0.4,
-      maxTokens: 16_000,
+      maxTokens: 128_000,
     },
     output: {
       costPer1MTokens: 0.8,
@@ -31,7 +48,7 @@ const languageModels: Record<ModelId, llm.ModelDetails> = {
     tags: ['general-purpose'],
     input: {
       costPer1MTokens: 0.65,
-      maxTokens: 16_000,
+      maxTokens: 32_000,
     },
     output: {
       costPer1MTokens: 0.85,
@@ -45,7 +62,7 @@ const languageModels: Record<ModelId, llm.ModelDetails> = {
     tags: ['low-cost', 'general-purpose'],
     input: {
       costPer1MTokens: 0.1,
-      maxTokens: 16_000,
+      maxTokens: 32_000,
     },
     output: {
       costPer1MTokens: 0.1,
@@ -59,7 +76,7 @@ const languageModels: Record<ModelId, llm.ModelDetails> = {
       'Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.',
     input: {
       costPer1MTokens: 0.85,
-      maxTokens: 16_000,
+      maxTokens: 128_000,
     },
     output: {
       costPer1MTokens: 1.2,
@@ -98,6 +115,16 @@ export default new bp.Integration({
               }
             }
 
+            if (input.model?.id === 'gpt-oss-120b') {
+              request.reasoning_effort = validateGptOssReasoningEffort(input, logger)
+
+              // GPT-OSS models don't work well with a stop sequence, so we have to remove it from the request.
+              delete request.stop
+
+              // Reasoning models don't support temperature
+              delete request.temperature
+            }
+
             return request
           },
         }

diff --git a/integrations/cerebras/src/schemas.ts b/integrations/cerebras/src/schemas.ts
@@ -3,7 +3,7 @@ import { z } from '@botpress/sdk'
 export const DEFAULT_MODEL_ID = 'llama3.1-8b'
 
 export const modelId = z
-  .enum(['llama3.1-8b', 'llama3.3-70b', 'llama-4-scout-17b-16e-instruct', 'qwen-3-32b'])
+  .enum(['gpt-oss-120b', 'qwen-3-32b', 'llama-4-scout-17b-16e-instruct', 'llama3.1-8b', 'llama3.3-70b'])
   .describe('Model to use for content generation')
   .placeholder(DEFAULT_MODEL_ID)
 

diff --git a/integrations/fireworks-ai/integration.definition.ts b/integrations/fireworks-ai/integration.definition.ts
@@ -9,7 +9,7 @@ export default new IntegrationDefinition({
   title: 'Fireworks AI',
   description:
     'Choose from curated Fireworks AI models for content generation, chat completions, and audio transcription.',
-  version: '8.0.1',
+  version: '9.0.0',
   readme: 'hub.md',
   icon: 'icon.svg',
   entities: {

diff --git a/integrations/fireworks-ai/package.json b/integrations/fireworks-ai/package.json
@@ -10,7 +10,7 @@
     "@botpress/client": "workspace:*",
     "@botpress/common": "workspace:*",
     "@botpress/sdk": "workspace:*",
-    "openai": "^4.86.1"
+    "openai": "^5.12.1"
   },
   "devDependencies": {
     "@botpress/cli": "workspace:*",

diff --git a/integrations/fireworks-ai/src/index.ts b/integrations/fireworks-ai/src/index.ts
@@ -1,4 +1,5 @@
 import { llm, speechToText } from '@botpress/common'
+import { validateGptOssReasoningEffort } from '@botpress/common/src/llm/openai'
 import OpenAI from 'openai'
 import { LanguageModelId, ImageModelId, SpeechToTextModelId } from './schemas'
 import * as bp from '.botpress'
@@ -14,6 +15,34 @@ const DEFAULT_LANGUAGE_MODEL_ID: LanguageModelId = 'accounts/fireworks/models/ll
 //  https://fireworks.ai/models
 //  https://fireworks.ai/pricing
 const languageModels: Record<LanguageModelId, llm.ModelDetails> = {
+  'accounts/fireworks/models/gpt-oss-20b': {
+    name: 'GPT-OSS 20B',
+    description:
+      'gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.',
+    tags: ['general-purpose', 'reasoning', 'low-cost'],
+    input: {
+      costPer1MTokens: 0.07,
+      maxTokens: 128_000,
+    },
+    output: {
+      costPer1MTokens: 0.3,
+      maxTokens: 16_000,
+    },
+  },
+  'accounts/fireworks/models/gpt-oss-120b': {
+    name: 'GPT-OSS 120B',
+    description:
+      'gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.',
+    tags: ['general-purpose', 'reasoning'],
+    input: {
+      costPer1MTokens: 0.15,
+      maxTokens: 128_000,
+    },
+    output: {
+      costPer1MTokens: 0.6,
+      maxTokens: 16_000,
+    },
+  },
   'accounts/fireworks/models/deepseek-r1-0528': {
     name: 'DeepSeek R1 0528',
     description:
@@ -263,6 +292,17 @@ export default new bp.Integration({
                   content: systemPrompt.content,
                 })
               }
+            } else if (
+              input.model?.id === 'accounts/fireworks/models/gpt-oss-20b' ||
+              input.model?.id === 'accounts/fireworks/models/gpt-oss-120b'
+            ) {
+              request.reasoning_effort = validateGptOssReasoningEffort(input, logger)
+
+              // GPT-OSS models don't work well with a stop sequence, so we have to remove it from the request.
+              delete request.stop
+
+              // Reasoning models don't support temperature
+              delete request.temperature
             }
 
             return request

diff --git a/integrations/fireworks-ai/src/schemas.ts b/integrations/fireworks-ai/src/schemas.ts
@@ -17,6 +17,8 @@ export const languageModelId = z
     'accounts/fireworks/models/mixtral-8x7b-instruct',
     'accounts/fireworks/models/mythomax-l2-13b',
     'accounts/fireworks/models/gemma2-9b-it',
+    'accounts/fireworks/models/gpt-oss-20b',
+    'accounts/fireworks/models/gpt-oss-120b',
   ])
   .describe('Model to use for content generation')
   .placeholder('accounts/fireworks/models/llama-v3p1-70b-instruct')

diff --git a/integrations/groq/integration.definition.ts b/integrations/groq/integration.definition.ts
@@ -8,7 +8,7 @@ export default new IntegrationDefinition({
   name: 'groq',
   title: 'Groq',
   description: 'Gain access to Groq models for content generation, chat responses, and audio transcription.',
-  version: '13.0.1',
+  version: '14.0.0',
   readme: 'hub.md',
   icon: 'icon.svg',
   entities: {

diff --git a/integrations/groq/package.json b/integrations/groq/package.json
@@ -10,7 +10,7 @@
     "@botpress/client": "workspace:*",
     "@botpress/common": "workspace:*",
     "@botpress/sdk": "workspace:*",
-    "openai": "^4.86.1"
+    "openai": "^5.12.1"
   },
   "devDependencies": {
     "@botpress/cli": "workspace:*",

diff --git a/integrations/groq/src/index.ts b/integrations/groq/src/index.ts
@@ -1,4 +1,5 @@
 import { llm, speechToText } from '@botpress/common'
+import { validateGptOssReasoningEffort } from '@botpress/common/src/llm/openai'
 import OpenAI from 'openai'
 import { ModelId, SpeechToTextModelId } from './schemas'
 import * as bp from '.botpress'
@@ -12,6 +13,35 @@ const languageModels: Record<ModelId, llm.ModelDetails> = {
   // Reference:
   //  https://console.groq.com/docs/models
   //  https://groq.com/pricing/
+  'openai/gpt-oss-20b': {
+    name: 'GPT-OSS 20B (Preview)',
+    description:
+      'gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.',
+    tags: ['preview', 'general-purpose', 'reasoning', 'low-cost'],
+    input: {
+      costPer1MTokens: 0.1,
+      maxTokens: 131_000,
+    },
+    output: {
+      costPer1MTokens: 0.5,
+      maxTokens: 32_000,
+    },
+  },
+  'openai/gpt-oss-120b': {
+    name: 'GPT-OSS 120B (Preview)',
+    description:
+      'gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.',
+    tags: ['preview', 'general-purpose', 'reasoning'],
+
+    input: {
+      costPer1MTokens: 0.15,
+      maxTokens: 131_000,
+    },
+    output: {
+      costPer1MTokens: 0.75,
+      maxTokens: 32_000,
+    },
+  },
   'deepseek-r1-distill-llama-70b': {
     name: 'DeepSeek R1-Distill Llama 3.3 70B (Preview)',
     description:
@@ -180,6 +210,19 @@ export default new bp.Integration({
         provider,
         models: languageModels,
         defaultModel: 'llama-3.3-70b-versatile',
+        overrideRequest: (request) => {
+          if (input.model?.id === 'openai/gpt-oss-20b' || input.model?.id === 'openai/gpt-oss-120b') {
+            request.reasoning_effort = validateGptOssReasoningEffort(input, logger)
+
+            // GPT-OSS models don't work well with a stop sequence, so we have to remove it from the request.
+            delete request.stop
+
+            // Reasoning models don't support temperature
+            delete request.temperature
+          }
+
+          return request
+        },
         overrideResponse: (response) => {
           if (input.model?.id === 'deepseek-r1-distill-llama-70b') {
             for (const choice of response.choices) {

diff --git a/integrations/groq/src/schemas.ts b/integrations/groq/src/schemas.ts
@@ -2,6 +2,8 @@ import { z } from '@botpress/sdk'
 
 export const modelId = z
   .enum([
+    'openai/gpt-oss-20b',
+    'openai/gpt-oss-120b',
     'deepseek-r1-distill-llama-70b',
     'llama-3.3-70b-versatile',
     'llama-3.2-1b-preview',

diff --git a/integrations/openai/src/index.ts b/integrations/openai/src/index.ts
@@ -1,5 +1,6 @@
 import { InvalidPayloadError } from '@botpress/client'
 import { llm, speechToText, textToImage } from '@botpress/common'
+import { validateOpenAIReasoningEffort } from '@botpress/common/src/llm/openai'
 import crypto from 'crypto'
 import { TextToSpeechPricePer1MCharacters } from 'integration.definition'
 import OpenAI from 'openai'
@@ -346,27 +347,13 @@ export default new bp.Integration({
               if (input.reasoningEffort === undefined && isGPT5) {
                 // GPT-5 is a hybrid model but it doesn't support optional reasoning, so if reasoning effort isn't specified we assume the user wants to use the least amount of reasoning possible (to reduce cost/latency).
                 request.reasoning_effort = 'minimal'
-              } else if (input.reasoningEffort === 'none') {
-                const acceptedValues = SupportedReasoningEfforts.map((x) => `"${x}"`)
-                  .map((x, i) => (i === SupportedReasoningEfforts.length - 1 ? `or ${x}` : x))
-                  .join(', ')
-                throw new InvalidPayloadError(
-                  `Using "none" to disabling reasoning is not supported with OpenAI reasoning models, please use ${acceptedValues} instead or switch to a non-reasoning model`
-                )
-              } else if (SupportedReasoningEfforts.includes(input.reasoningEffort as any)) {
-                request.reasoning_effort = input.reasoningEffort as ChatCompletionReasoningEffort
               } else {
-                request.reasoning_effort = 'medium'
-                logger
-                  .forBot()
-                  .info(
-                    `Reasoning effort "${input.reasoningEffort}" is not supported by OpenAI, using "${request.reasoning_effort}" effort instead`
-                  )
+                request.reasoning_effort = validateOpenAIReasoningEffort(input, logger)
               }
 
               if (isGPT5) {
                 // GPT-5 doesn't support stop sequences
-                request.stop = undefined
+                delete request.stop
               }
 
               // Reasoning models don't allow setting temperature

diff --git a/package.json b/package.json
@@ -7,14 +7,14 @@
     "test": "vitest --run",
     "check:bplint": "turbo check:bplint",
     "check:dep": "depsynky check --ignore-dev",
-    "check:sherif": "sherif -i zod -i axios -i query-string -i googleapis -i @linear/sdk",
+    "check:sherif": "sherif -i zod -i axios -i query-string -i googleapis -i @linear/sdk -i openai",
     "check:format": "prettier --check .",
     "check:eslint": "eslint ./ --max-warnings=0",
     "check:oxlint": "oxlint -c .oxlintrc.json",
     "check:lint": "pnpm check:bplint && pnpm check:oxlint && pnpm check:eslint",
     "check:type": "turbo check:type",
     "fix:dep": "depsynky sync --ignore-dev",
-    "fix:sherif": "sherif -i zod -i axios -i query-string -i googleapis -i @linear/sdk --fix",
+    "fix:sherif": "sherif -i zod -i axios -i query-string -i googleapis -i @linear/sdk -i openai --fix",
     "fix:format": "prettier --write .",
     "fix:oxlint": "oxlint -c .oxlintrc.json --fix --fix-suggestions",
     "fix:lint": "eslint --fix ./",

diff --git a/packages/cli/package.json b/packages/cli/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@botpress/cli",
-  "version": "4.16.1",
+  "version": "4.16.2",
   "description": "Botpress CLI",
   "scripts": {
     "build": "pnpm run bundle && pnpm run template:gen",

diff --git a/packages/cli/src/command-implementations/profile-commands.ts b/packages/cli/src/command-implementations/profile-commands.ts
@@ -40,10 +40,13 @@ export class ListProfilesCommand extends GlobalCommand<ListProfilesCommandDefini
 export type UseProfileCommandDefinition = typeof commandDefinitions.profiles.subcommands.use
 export class UseProfileCommand extends GlobalCommand<UseProfileCommandDefinition> {
   public async run(): Promise<void> {
+    const logSuccess = (profileName: string) => this.logger.success(`Now using profile "${profileName}"`)
+
     if (this.argv.profileToUse) {
       const profile = await this.readProfileFromFS(this.argv.profileToUse)
       await this.globalCache.set('activeProfile', this.argv.profileToUse)
       await _updateGlobalCache({ globalCache: this.globalCache, profileName: this.argv.profileToUse, profile })
+      logSuccess(this.argv.profileToUse)
       return
     }
     const profiles = await this.readProfilesFromFS()
@@ -63,6 +66,7 @@ export class UseProfileCommand extends GlobalCommand<UseProfileCommandDefinition
     if (!profile) throw new errors.BotpressCLIError('The selected profile could not be read')
     await this.globalCache.set('activeProfile', selectedProfile)
     await _updateGlobalCache({ globalCache: this.globalCache, profileName: selectedProfile, profile })
+    logSuccess(selectedProfile)
   }
 }