Skip to content

Commit

Permalink
📋 feat: Log Custom Config File and Add Known Model Limits to Custom E…
Browse files Browse the repository at this point in the history
…ndpoint (danny-avila#1657)

* refactor(custom): add all recognized models to maxTokensMap for custom endpoint

* feat(librechat.yaml): log the custom config file on initial load

* fix(OpenAIClient): pass endpointType/endpoint to `getModelMaxTokens` call
  • Loading branch information
danny-avila committed Jan 27, 2024
1 parent c470147 commit f7f7f92
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 21 deletions.
3 changes: 2 additions & 1 deletion api/app/clients/OpenAIClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ class OpenAIClient extends BaseClient {
const { isChatGptModel } = this;
this.isUnofficialChatGptModel =
model.startsWith('text-chat') || model.startsWith('text-davinci-002-render');
this.maxContextTokens = getModelMaxTokens(model) ?? 4095; // 1 less than maximum
this.maxContextTokens =
getModelMaxTokens(model, this.options.endpointType ?? this.options.endpoint) ?? 4095; // 1 less than maximum

if (this.shouldSummarize) {
this.maxContextTokens = Math.floor(this.maxContextTokens / 2);
Expand Down
3 changes: 2 additions & 1 deletion api/server/services/Config/loadCustomConfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ async function loadCustomConfig() {
logger.error(`Invalid custom config file at ${configPath}`, result.error);
return null;
} else {
logger.info('Loaded custom config file');
logger.info('Loaded custom config file:');
logger.info(JSON.stringify(customConfig, null, 2));
}

if (customConfig.cache) {
Expand Down
42 changes: 23 additions & 19 deletions api/utils/tokens.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,28 +57,32 @@ const openAIModels = {
'mistral-': 31990, // -10 from max
};

const googleModels = {
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
gemini: 32750, // -10 from max
'text-bison-32k': 32758, // -10 from max
'chat-bison-32k': 32758, // -10 from max
'code-bison-32k': 32758, // -10 from max
'codechat-bison-32k': 32758,
/* Codey, -5 from max: 6144 */
'code-': 6139,
'codechat-': 6139,
/* PaLM2, -5 from max: 8192 */
'text-': 8187,
'chat-': 8187,
};

const anthropicModels = {
'claude-2.1': 200000,
'claude-': 100000,
};

// Order is important here: by model series and context size (gpt-4 then gpt-3, ascending)
const maxTokensMap = {
[EModelEndpoint.openAI]: openAIModels,
[EModelEndpoint.custom]: openAIModels,
[EModelEndpoint.google]: {
/* Max I/O is combined so we subtract the amount from max response tokens for actual total */
gemini: 32750, // -10 from max
'text-bison-32k': 32758, // -10 from max
'chat-bison-32k': 32758, // -10 from max
'code-bison-32k': 32758, // -10 from max
'codechat-bison-32k': 32758,
/* Codey, -5 from max: 6144 */
'code-': 6139,
'codechat-': 6139,
/* PaLM2, -5 from max: 8192 */
'text-': 8187,
'chat-': 8187,
},
[EModelEndpoint.anthropic]: {
'claude-2.1': 200000,
'claude-': 100000,
},
[EModelEndpoint.custom]: { ...openAIModels, ...googleModels, ...anthropicModels },
[EModelEndpoint.google]: googleModels,
[EModelEndpoint.anthropic]: anthropicModels,
};

/**
Expand Down

0 comments on commit f7f7f92

Please sign in to comment.