From f5dd05312889ce4b3af504cc2dcb91a51191917b Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Mon, 4 May 2026 16:09:11 -0400 Subject: [PATCH 01/12] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20refactor:=20Restr?= =?UTF-8?q?ict=20User=20Tavily=20Endpoint=20URLs=20(#12946)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/api/src/web/web.spec.ts | 303 ++++++++++++++++++++++++++----- packages/api/src/web/web.ts | 37 +++- 2 files changed, 291 insertions(+), 49 deletions(-) diff --git a/packages/api/src/web/web.spec.ts b/packages/api/src/web/web.spec.ts index 6bd184f58e54..a33a6e1eb11f 100644 --- a/packages/api/src/web/web.spec.ts +++ b/packages/api/src/web/web.spec.ts @@ -253,6 +253,118 @@ describe('web.ts', () => { expect(result.authResult.firecrawlApiUrl).toBeUndefined(); }); + it('should ignore user-provided Tavily custom URLs unless explicitly enabled', async () => { + const originalEnv = process.env; + try { + process.env = { + ...originalEnv, + TAVILY_API_KEY: 'system-tavily-api-key', + TAVILY_SEARCH_URL: 'https://api.tavily.com/search', + TAVILY_EXTRACT_URL: 'https://api.tavily.com/extract', + }; + + const tavilyConfig = { + tavilyApiKey: '${TAVILY_API_KEY}', + tavilySearchUrl: '${TAVILY_SEARCH_URL}', + tavilyExtractUrl: '${TAVILY_EXTRACT_URL}', + searchProvider: 'tavily' as SearchProviders, + scraperProvider: 'tavily' as ScraperProviders, + rerankerType: 'none' as RerankerTypes, + } as TWebSearchConfig; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_API_KEY') { + result[field] = 'system-tavily-api-key'; + } else if (field === 'TAVILY_SEARCH_URL') { + result[field] = 'https://attacker.example/search'; + } else if (field === 'TAVILY_EXTRACT_URL') { + result[field] = 'https://attacker.example/extract'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig: tavilyConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authenticated).toBe(true); + expect(result.authResult.searchProvider).toBe('tavily'); + expect(result.authResult.scraperProvider).toBe('tavily'); + expect(result.authResult.tavilyApiKey).toBe('system-tavily-api-key'); + expect(result.authResult.tavilySearchUrl).toBeUndefined(); + expect(result.authResult.tavilyExtractUrl).toBeUndefined(); + expect(result.authTypes).toEqual([ + ['providers', AuthType.SYSTEM_DEFINED], + ['scrapers', AuthType.SYSTEM_DEFINED], + ['rerankers', AuthType.SYSTEM_DEFINED], + ]); + } finally { + process.env = originalEnv; + } + }); + + it('should allow user-provided Tavily custom URLs when explicitly enabled', async () => { + mockIsSSRFTarget.mockReturnValue(false); + mockResolveHostnameSSRF.mockResolvedValue(false); + + const originalEnv = process.env; + try { + process.env = { + ...originalEnv, + TAVILY_API_KEY: 'system-tavily-api-key', + TAVILY_SEARCH_URL: AuthType.USER_PROVIDED, + TAVILY_EXTRACT_URL: AuthType.USER_PROVIDED, + }; + + const tavilyConfig = { + tavilyApiKey: '${TAVILY_API_KEY}', + tavilySearchUrl: '${TAVILY_SEARCH_URL}', + tavilyExtractUrl: '${TAVILY_EXTRACT_URL}', + searchProvider: 'tavily' as SearchProviders, + scraperProvider: 'tavily' as ScraperProviders, + rerankerType: 'none' as RerankerTypes, + } as TWebSearchConfig; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_API_KEY') { + result[field] = 'system-tavily-api-key'; + } else if (field === 'TAVILY_SEARCH_URL') { + result[field] = 'https://tenant-search.example/search'; + } else if (field === 'TAVILY_EXTRACT_URL') { + result[field] = 'https://tenant-extract.example/extract'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig: tavilyConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authenticated).toBe(true); + expect(result.authResult.tavilySearchUrl).toBe('https://tenant-search.example/search'); + expect(result.authResult.tavilyExtractUrl).toBe('https://tenant-extract.example/extract'); + expect(mockResolveHostnameSSRF).toHaveBeenCalledWith('tenant-search.example'); + expect(mockResolveHostnameSSRF).toHaveBeenCalledWith('tenant-extract.example'); + expect(result.authTypes).toEqual([ + ['providers', AuthType.USER_PROVIDED], + ['scrapers', AuthType.USER_PROVIDED], + ['rerankers', AuthType.SYSTEM_DEFINED], + ]); + } finally { + process.env = originalEnv; + } + }); + it('should preserve safeSearch setting from webSearchConfig', async () => { // Mock successful authentication mockLoadAuthValues.mockImplementation(({ authFields }) => { @@ -732,50 +844,60 @@ describe('web.ts', () => { }); it('should authenticate Tavily as a search provider and pass options through', async () => { - const webSearchConfig: TCustomConfig['webSearch'] = { - tavilyApiKey: '${TAVILY_API_KEY}', - tavilySearchUrl: '${TAVILY_SEARCH_URL}', - firecrawlApiKey: '${FIRECRAWL_API_KEY}', - firecrawlApiUrl: '${FIRECRAWL_API_URL}', - safeSearch: SafeSearchTypes.MODERATE, - searchProvider: 'tavily' as SearchProviders, - scraperProvider: 'firecrawl' as ScraperProviders, - rerankerType: 'none' as RerankerTypes, - tavilySearchOptions: { - searchDepth: 'advanced', - maxResults: 5, - includeRawContent: 'markdown', - }, - }; + const originalEnv = process.env; + try { + process.env = { + ...originalEnv, + TAVILY_SEARCH_URL: 'https://api.tavily.com/search', + }; - mockLoadAuthValues.mockImplementation(({ authFields }) => { - const result: Record = {}; - authFields.forEach((field: string) => { - if (field === 'TAVILY_API_KEY') { - result[field] = 'tavily-api-key'; - } else if (field === 'TAVILY_SEARCH_URL') { - result[field] = 'https://api.tavily.com/search'; - } else if (field === 'FIRECRAWL_API_URL') { - result[field] = 'https://api.firecrawl.dev'; - } else { - result[field] = 'test-api-key'; - } + const webSearchConfig: TCustomConfig['webSearch'] = { + tavilyApiKey: '${TAVILY_API_KEY}', + tavilySearchUrl: '${TAVILY_SEARCH_URL}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + searchProvider: 'tavily' as SearchProviders, + scraperProvider: 'firecrawl' as ScraperProviders, + rerankerType: 'none' as RerankerTypes, + tavilySearchOptions: { + searchDepth: 'advanced', + maxResults: 5, + includeRawContent: 'markdown', + }, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_API_KEY') { + result[field] = 'tavily-api-key'; + } else if (field === 'TAVILY_SEARCH_URL') { + result[field] = 'https://api.tavily.com/search'; + } else if (field === 'FIRECRAWL_API_URL') { + result[field] = 'https://api.firecrawl.dev'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); }); - return Promise.resolve(result); - }); - const result = await loadWebSearchAuth({ - userId, - webSearchConfig, - loadAuthValues: mockLoadAuthValues, - }); + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); - expect(result.authenticated).toBe(true); - expect(result.authResult.searchProvider).toBe('tavily'); - expect(result.authResult.tavilyApiKey).toBe('tavily-api-key'); - expect(result.authResult.tavilySearchUrl).toBe('https://api.tavily.com/search'); - expect(result.authResult.tavilySearchOptions).toEqual(webSearchConfig.tavilySearchOptions); - expect(result.authResult.safeSearch).toBeUndefined(); + expect(result.authenticated).toBe(true); + expect(result.authResult.searchProvider).toBe('tavily'); + expect(result.authResult.tavilyApiKey).toBe('tavily-api-key'); + expect(result.authResult.tavilySearchUrl).toBe('https://api.tavily.com/search'); + expect(result.authResult.tavilySearchOptions).toEqual(webSearchConfig.tavilySearchOptions); + expect(result.authResult.safeSearch).toBeUndefined(); + } finally { + process.env = originalEnv; + } }); it('should fail authentication when Tavily search API key is missing', async () => { @@ -1555,7 +1677,7 @@ describe('web.ts', () => { expect(scrapersAuth).toBe(AuthType.USER_PROVIDED); }); - it('should block user-provided tavilySearchUrl targeting localhost', async () => { + it('should ignore user-provided tavilySearchUrl without admin opt-in', async () => { mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === 'localhost'); const webSearchConfig: TCustomConfig['webSearch'] = { @@ -1588,10 +1710,10 @@ describe('web.ts', () => { expect(result.authResult.tavilySearchUrl).toBeUndefined(); expect(result.authResult.searchProvider).toBe('tavily'); expect(result.authenticated).toBe(true); - expect(mockIsSSRFTarget).toHaveBeenCalledWith('localhost'); + expect(mockIsSSRFTarget).not.toHaveBeenCalled(); }); - it('should block user-provided tavilyExtractUrl resolving to private IP', async () => { + it('should ignore user-provided tavilyExtractUrl without admin opt-in', async () => { mockResolveHostnameSSRF.mockImplementation((hostname: string) => Promise.resolve(hostname === 'extract.internal-service.com'), ); @@ -1628,6 +1750,101 @@ describe('web.ts', () => { expect(result.authenticated).toBe(true); const scrapersAuth = result.authTypes.find(([c]) => c === 'scrapers')?.[1]; expect(scrapersAuth).toBe(AuthType.USER_PROVIDED); + expect(mockResolveHostnameSSRF).not.toHaveBeenCalled(); + }); + + it('should block opted-in tavilySearchUrl targeting localhost', async () => { + mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === 'localhost'); + + const originalEnv = process.env; + try { + process.env = { + ...originalEnv, + TAVILY_SEARCH_URL: AuthType.USER_PROVIDED, + }; + + const webSearchConfig: TCustomConfig['webSearch'] = { + tavilyApiKey: '${TAVILY_API_KEY}', + tavilySearchUrl: '${TAVILY_SEARCH_URL}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + safeSearch: SafeSearchTypes.MODERATE, + searchProvider: 'tavily' as SearchProviders, + rerankerType: 'none' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_SEARCH_URL') { + result[field] = 'http://localhost:8080/search'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.tavilySearchUrl).toBeUndefined(); + expect(result.authResult.searchProvider).toBe('tavily'); + expect(result.authenticated).toBe(true); + expect(mockIsSSRFTarget).toHaveBeenCalledWith('localhost'); + } finally { + process.env = originalEnv; + } + }); + + it('should block opted-in tavilyExtractUrl resolving to a private host', async () => { + mockResolveHostnameSSRF.mockImplementation((hostname: string) => + Promise.resolve(hostname === 'extract.internal-service.com'), + ); + + const originalEnv = process.env; + try { + process.env = { + ...originalEnv, + TAVILY_EXTRACT_URL: AuthType.USER_PROVIDED, + }; + + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + tavilyApiKey: '${TAVILY_API_KEY}', + tavilyExtractUrl: '${TAVILY_EXTRACT_URL}', + safeSearch: SafeSearchTypes.MODERATE, + scraperProvider: 'tavily' as ScraperProviders, + rerankerType: 'none' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'TAVILY_EXTRACT_URL') { + result[field] = 'https://extract.internal-service.com/extract'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.tavilyExtractUrl).toBeUndefined(); + expect(result.authResult.scraperProvider).toBe('tavily'); + expect(result.authenticated).toBe(true); + expect(mockResolveHostnameSSRF).toHaveBeenCalledWith('extract.internal-service.com'); + } finally { + process.env = originalEnv; + } }); it('should block user-provided searxngInstanceUrl targeting metadata endpoint', async () => { diff --git a/packages/api/src/web/web.ts b/packages/api/src/web/web.ts index b73e846a5749..06d65d4d985a 100644 --- a/packages/api/src/web/web.ts +++ b/packages/api/src/web/web.ts @@ -16,17 +16,26 @@ import type { TWebSearchKeys, TWebSearchCategories } from '@librechat/data-schem import { isSSRFTarget, resolveHostnameSSRF } from '../auth'; /** - * URL-type keys in TWebSearchKeys (not API keys or version strings). - * Must stay in sync with URL-typed fields in webSearchAuth (packages/data-schemas). + * User-provided URL keys that may pass through after SSRF preflight. */ -const WEB_SEARCH_URL_KEYS = new Set([ +const USER_PROVIDED_URL_KEYS = new Set([ 'searxngInstanceUrl', 'firecrawlApiUrl', 'jinaApiUrl', +]); + +/** + * URL keys that require explicit admin opt-in before user-provided values may pass through. + */ +const USER_PROVIDED_OPT_IN_URL_KEYS = new Set([ 'tavilySearchUrl', 'tavilyExtractUrl', ]); +function isUserProvidedEnabled(field: string): boolean { + return process.env[field] === AuthType.USER_PROVIDED; +} + /** * Returns true if the URL should be blocked for SSRF risk. * Fail-closed: unparseable URLs and non-HTTP(S) schemes return true. @@ -195,15 +204,31 @@ export async function loadWebSearchAuth({ } const isFieldUserProvided = value != null && process.env[field] !== value; - const isUrlKey = originalKey != null && WEB_SEARCH_URL_KEYS.has(originalKey); + const isUserProvidedUrlKey = + originalKey != null && USER_PROVIDED_URL_KEYS.has(originalKey); + const isUserProvidedOptInUrlKey = + originalKey != null && USER_PROVIDED_OPT_IN_URL_KEYS.has(originalKey); + const isUserProvidedUrlEnabled = + isUserProvidedUrlKey || + (isUserProvidedOptInUrlKey && isUserProvidedEnabled(field)); let contributed = false; - if (isUrlKey && isFieldUserProvided && (await isSSRFUrl(value))) { + if (isUserProvidedOptInUrlKey && isFieldUserProvided && !isUserProvidedUrlEnabled) { if (!optionalSet.has(field)) { allFieldsAuthenticated = false; break; } - } else if (originalKey) { + continue; + } + + if (isUserProvidedUrlEnabled && isFieldUserProvided && (await isSSRFUrl(value))) { + if (!optionalSet.has(field)) { + allFieldsAuthenticated = false; + break; + } + continue; + } + if (originalKey) { authResult[originalKey] = value; contributed = true; } From 5683706af5b64d92ea3b1c0f3a2cc0647b26794d Mon Sep 17 00:00:00 2001 From: Artyom Bogachenko <32168471+SpectralOne@users.noreply.github.com> Date: Tue, 5 May 2026 00:06:35 +0300 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=94=90=20feat:=20OIDC=20Bearer=20To?= =?UTF-8?q?ken=20Authentication=20for=20Remote=20Agent=20API=20(#12450)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Remote Agent Auth middleware * consider migration and update user * fix eslint errors * add scope validation * fix codex review errors * add filter for use: sig * add jwks-rsa deps * Fix remote agent OIDC auth review findings * Polish remote agent OIDC timeout coverage * Reject remote OIDC tokens without subject * Use tenant context for remote agent auth config * Harden remote agent OIDC scope handling * Polish remote agent OIDC cache and scope tests * Resolve remote agent auth review comments * Reuse OpenID email claim resolver for remote auth * Skip empty OpenID email fallback claims * Use pre-auth tenant context for remote auth config * Downgrade expected OIDC fallback logging * Require secure remote OIDC endpoints * Polish remote agent auth edge cases * Enforce unique balance records * Bind remote OpenID users to issuer * Fix issuer-scoped OpenID indexes * Avoid unique balance index requirement * Fix remote OpenID issuer normalization boundaries * Require issuer-bound OpenID lookups * Enforce tenant API key policy after auth * Fix remote auth tenant policy types * Normalize remote OIDC discovery issuer * Allow normalized remote OIDC issuer validation * Enforce resolved tenant OIDC policy * Polish OpenID issuer and scope validation --------- Co-authored-by: Danny Avila --- api/server/controllers/AuthController.js | 5 +- api/server/controllers/AuthController.spec.js | 23 +- api/server/routes/agents/middleware.js | 41 + api/server/routes/agents/openai.js | 33 +- api/server/routes/agents/responses.js | 33 +- api/strategies/openIdJwtStrategy.js | 18 +- api/strategies/openIdJwtStrategy.spec.js | 22 +- api/strategies/openidStrategy.js | 36 +- api/strategies/openidStrategy.spec.js | 12 +- packages/api/package.json | 1 + packages/api/src/app/service.ts | 20 +- packages/api/src/auth/openid.spec.ts | 374 +++- packages/api/src/auth/openid.ts | 174 +- packages/api/src/middleware/balance.ts | 35 +- packages/api/src/middleware/index.ts | 1 + .../src/middleware/remoteAgentAuth.spec.ts | 1533 +++++++++++++++++ .../api/src/middleware/remoteAgentAuth.ts | 562 ++++++ .../specs/config-schemas.spec.ts | 98 ++ packages/data-provider/src/config.ts | 56 + .../src/methods/user.methods.spec.ts | 31 + .../src/migrations/tenantIndexes.spec.ts | 12 +- .../src/migrations/tenantIndexes.ts | 1 + packages/data-schemas/src/schema/user.ts | 11 + packages/data-schemas/src/types/user.ts | 1 + 24 files changed, 3005 insertions(+), 128 deletions(-) create mode 100644 api/server/routes/agents/middleware.js create mode 100644 packages/api/src/middleware/remoteAgentAuth.spec.ts create mode 100644 packages/api/src/middleware/remoteAgentAuth.ts diff --git a/api/server/controllers/AuthController.js b/api/server/controllers/AuthController.js index eb44feffa4c0..d61bcc28449d 100644 --- a/api/server/controllers/AuthController.js +++ b/api/server/controllers/AuthController.js @@ -2,7 +2,7 @@ const cookies = require('cookie'); const jwt = require('jsonwebtoken'); const openIdClient = require('openid-client'); const { logger } = require('@librechat/data-schemas'); -const { isEnabled, findOpenIDUser } = require('@librechat/api'); +const { isEnabled, findOpenIDUser, getOpenIdIssuer } = require('@librechat/api'); const { requestPasswordReset, setOpenIDAuthTokens, @@ -85,10 +85,12 @@ const refreshController = async (req, res) => { refreshParams, ); const claims = tokenset.claims(); + const openidIssuer = getOpenIdIssuer(claims, openIdConfig); const { user, error, migration } = await findOpenIDUser({ findUser, email: getOpenIdEmail(claims), openidId: claims.sub, + openidIssuer, idOnTheSource: claims.oid, strategyName: 'refreshController', }); @@ -111,6 +113,7 @@ const refreshController = async (req, res) => { await updateUser(user._id.toString(), { provider: 'openid', openidId: claims.sub, + ...(openidIssuer ? { openidIssuer } : {}), }); logger.info( `[refreshController] Updated user ${user.email} openidId (${reason}): ${user.openidId ?? 'null'} -> ${claims.sub}`, diff --git a/api/server/controllers/AuthController.spec.js b/api/server/controllers/AuthController.spec.js index 964947def9d1..8b19c28f36f8 100644 --- a/api/server/controllers/AuthController.spec.js +++ b/api/server/controllers/AuthController.spec.js @@ -23,6 +23,7 @@ jest.mock('~/models', () => ({ jest.mock('@librechat/api', () => ({ isEnabled: jest.fn(), findOpenIDUser: jest.fn(), + getOpenIdIssuer: jest.fn(() => 'https://issuer.example.com'), })); const openIdClient = require('openid-client'); @@ -157,6 +158,7 @@ describe('refreshController – OpenID path', () => { }; const baseClaims = { + iss: 'https://issuer.example.com', sub: 'oidc-sub-123', oid: 'oid-456', email: 'user@example.com', @@ -204,7 +206,10 @@ describe('refreshController – OpenID path', () => { expect(getOpenIdEmail).toHaveBeenCalledWith(baseClaims); expect(findOpenIDUser).toHaveBeenCalledWith( - expect.objectContaining({ email: baseClaims.email }), + expect.objectContaining({ + email: baseClaims.email, + openidIssuer: baseClaims.iss, + }), ); expect(res.status).toHaveBeenCalledWith(200); }); @@ -225,7 +230,10 @@ describe('refreshController – OpenID path', () => { expect(getOpenIdEmail).toHaveBeenCalledWith(claimsWithUpn); expect(findOpenIDUser).toHaveBeenCalledWith( - expect.objectContaining({ email: 'user@corp.example.com' }), + expect.objectContaining({ + email: 'user@corp.example.com', + openidIssuer: baseClaims.iss, + }), ); expect(res.status).toHaveBeenCalledWith(200); }); @@ -236,7 +244,10 @@ describe('refreshController – OpenID path', () => { await refreshController(req, res); expect(findOpenIDUser).toHaveBeenCalledWith( - expect.objectContaining({ email: baseClaims.email }), + expect.objectContaining({ + email: baseClaims.email, + openidIssuer: baseClaims.iss, + }), ); }); @@ -267,7 +278,11 @@ describe('refreshController – OpenID path', () => { expect(updateUser).toHaveBeenCalledWith( 'user-db-id', - expect.objectContaining({ provider: 'openid', openidId: baseClaims.sub }), + expect.objectContaining({ + provider: 'openid', + openidId: baseClaims.sub, + openidIssuer: baseClaims.iss, + }), ); expect(res.status).toHaveBeenCalledWith(200); }); diff --git a/api/server/routes/agents/middleware.js b/api/server/routes/agents/middleware.js new file mode 100644 index 000000000000..f71c25c6f8fd --- /dev/null +++ b/api/server/routes/agents/middleware.js @@ -0,0 +1,41 @@ +const { PermissionTypes, Permissions } = require('librechat-data-provider'); +const { + generateCheckAccess, + preAuthTenantMiddleware, + createRequireApiKeyAuth, + createRemoteAgentAuth, + createCheckRemoteAgentAccess, +} = require('@librechat/api'); +const { getEffectivePermissions } = require('~/server/services/PermissionService'); +const { getAppConfig } = require('~/server/services/Config'); +const db = require('~/models'); + +const apiKeyMiddleware = createRequireApiKeyAuth({ + validateAgentApiKey: db.validateAgentApiKey, + findUser: db.findUser, +}); + +const requireRemoteAgentAuth = createRemoteAgentAuth({ + apiKeyMiddleware, + findUser: db.findUser, + updateUser: db.updateUser, + getAppConfig, +}); + +const checkRemoteAgentsFeature = generateCheckAccess({ + permissionType: PermissionTypes.REMOTE_AGENTS, + permissions: [Permissions.USE], + getRoleByName: db.getRoleByName, +}); + +const checkAgentPermission = createCheckRemoteAgentAccess({ + getAgent: db.getAgent, + getEffectivePermissions, +}); + +module.exports = { + checkAgentPermission, + preAuthTenantMiddleware, + requireRemoteAgentAuth, + checkRemoteAgentsFeature, +}; diff --git a/api/server/routes/agents/openai.js b/api/server/routes/agents/openai.js index 72e3da6c5a0e..fa7f9b26c811 100644 --- a/api/server/routes/agents/openai.js +++ b/api/server/routes/agents/openai.js @@ -17,40 +17,23 @@ * } */ const express = require('express'); -const { PermissionTypes, Permissions } = require('librechat-data-provider'); -const { - generateCheckAccess, - createRequireApiKeyAuth, - createCheckRemoteAgentAccess, -} = require('@librechat/api'); const { OpenAIChatCompletionController, ListModelsController, GetModelController, } = require('~/server/controllers/agents/openai'); -const { getEffectivePermissions } = require('~/server/services/PermissionService'); const { configMiddleware } = require('~/server/middleware'); -const db = require('~/models'); +const { + checkAgentPermission, + preAuthTenantMiddleware, + requireRemoteAgentAuth, + checkRemoteAgentsFeature, +} = require('./middleware'); const router = express.Router(); -const requireApiKeyAuth = createRequireApiKeyAuth({ - validateAgentApiKey: db.validateAgentApiKey, - findUser: db.findUser, -}); - -const checkRemoteAgentsFeature = generateCheckAccess({ - permissionType: PermissionTypes.REMOTE_AGENTS, - permissions: [Permissions.USE], - getRoleByName: db.getRoleByName, -}); - -const checkAgentPermission = createCheckRemoteAgentAccess({ - getAgent: db.getAgent, - getEffectivePermissions, -}); - -router.use(requireApiKeyAuth); +router.use(preAuthTenantMiddleware); +router.use(requireRemoteAgentAuth); router.use(configMiddleware); router.use(checkRemoteAgentsFeature); diff --git a/api/server/routes/agents/responses.js b/api/server/routes/agents/responses.js index 2c118e059712..401025bfd62a 100644 --- a/api/server/routes/agents/responses.js +++ b/api/server/routes/agents/responses.js @@ -20,40 +20,23 @@ * @see https://openresponses.org/specification */ const express = require('express'); -const { PermissionTypes, Permissions } = require('librechat-data-provider'); -const { - generateCheckAccess, - createRequireApiKeyAuth, - createCheckRemoteAgentAccess, -} = require('@librechat/api'); const { createResponse, getResponse, listModels, } = require('~/server/controllers/agents/responses'); -const { getEffectivePermissions } = require('~/server/services/PermissionService'); const { configMiddleware } = require('~/server/middleware'); -const db = require('~/models'); +const { + checkAgentPermission, + preAuthTenantMiddleware, + requireRemoteAgentAuth, + checkRemoteAgentsFeature, +} = require('./middleware'); const router = express.Router(); -const requireApiKeyAuth = createRequireApiKeyAuth({ - validateAgentApiKey: db.validateAgentApiKey, - findUser: db.findUser, -}); - -const checkRemoteAgentsFeature = generateCheckAccess({ - permissionType: PermissionTypes.REMOTE_AGENTS, - permissions: [Permissions.USE], - getRoleByName: db.getRoleByName, -}); - -const checkAgentPermission = createCheckRemoteAgentAccess({ - getAgent: db.getAgent, - getEffectivePermissions, -}); - -router.use(requireApiKeyAuth); +router.use(preAuthTenantMiddleware); +router.use(requireRemoteAgentAuth); router.use(configMiddleware); router.use(checkRemoteAgentsFeature); diff --git a/api/strategies/openIdJwtStrategy.js b/api/strategies/openIdJwtStrategy.js index 83a40bf9487c..8af11bf99c5e 100644 --- a/api/strategies/openIdJwtStrategy.js +++ b/api/strategies/openIdJwtStrategy.js @@ -3,9 +3,14 @@ const jwksRsa = require('jwks-rsa'); const { logger } = require('@librechat/data-schemas'); const { HttpsProxyAgent } = require('https-proxy-agent'); const { SystemRoles } = require('librechat-data-provider'); -const { isEnabled, findOpenIDUser, math } = require('@librechat/api'); const { Strategy: JwtStrategy, ExtractJwt } = require('passport-jwt'); -const { getOpenIdEmail } = require('./openidStrategy'); +const { + isEnabled, + findOpenIDUser, + getOpenIdEmail, + getOpenIdIssuer, + math, +} = require('@librechat/api'); const { updateUser, findUser } = require('~/models'); /** @@ -27,7 +32,9 @@ const { updateUser, findUser } = require('~/models'); */ const openIdJwtLogin = (openIdConfig) => { let jwksRsaOptions = { - cache: isEnabled(process.env.OPENID_JWKS_URL_CACHE_ENABLED) || true, + cache: process.env.OPENID_JWKS_URL_CACHE_ENABLED + ? isEnabled(process.env.OPENID_JWKS_URL_CACHE_ENABLED) + : true, cacheMaxAge: math(process.env.OPENID_JWKS_URL_CACHE_TIME, 60000), jwksUri: openIdConfig.serverMetadata().jwks_uri, }; @@ -51,11 +58,13 @@ const openIdJwtLogin = (openIdConfig) => { try { const authHeader = req.headers.authorization; const rawToken = authHeader?.replace('Bearer ', ''); + const openidIssuer = getOpenIdIssuer(payload, openIdConfig); const { user, error, migration } = await findOpenIDUser({ findUser, email: payload ? getOpenIdEmail(payload) : undefined, openidId: payload?.sub, + openidIssuer, idOnTheSource: payload?.oid, strategyName: 'openIdJwtLogin', }); @@ -72,6 +81,9 @@ const openIdJwtLogin = (openIdConfig) => { if (migration) { updateData.provider = 'openid'; updateData.openidId = payload?.sub; + if (openidIssuer) { + updateData.openidIssuer = openidIssuer; + } } if (!user.role) { user.role = SystemRoles.USER; diff --git a/api/strategies/openIdJwtStrategy.spec.js b/api/strategies/openIdJwtStrategy.spec.js index fd710f1ebd47..e3bc9f6e2865 100644 --- a/api/strategies/openIdJwtStrategy.spec.js +++ b/api/strategies/openIdJwtStrategy.spec.js @@ -23,6 +23,8 @@ jest.mock('@librechat/data-schemas', () => ({ jest.mock('@librechat/api', () => ({ isEnabled: jest.fn(() => false), findOpenIDUser: jest.fn(), + getOpenIdEmail: jest.requireActual('@librechat/api').getOpenIdEmail, + getOpenIdIssuer: jest.fn(() => 'https://issuer.example.com'), math: jest.fn((val, fallback) => fallback), })); jest.mock('~/models', () => ({ @@ -47,7 +49,10 @@ const { findUser, updateUser } = require('~/models'); // Helper: build a mock openIdConfig const mockOpenIdConfig = { - serverMetadata: () => ({ jwks_uri: 'https://example.com/.well-known/jwks.json' }), + serverMetadata: () => ({ + issuer: 'https://issuer.example.com', + jwks_uri: 'https://example.com/.well-known/jwks.json', + }), }; // Helper: invoke the captured verify callback @@ -225,6 +230,7 @@ describe('openIdJwtStrategy – OPENID_EMAIL_CLAIM', () => { _id: 'user-id-1', provider: 'openid', openidId: payload.sub, + openidIssuer: 'https://issuer.example.com', email: payload.email, role: SystemRoles.USER, }; @@ -240,7 +246,9 @@ describe('openIdJwtStrategy – OPENID_EMAIL_CLAIM', () => { expect(findUser).toHaveBeenCalledWith( expect.objectContaining({ - $or: expect.arrayContaining([{ openidId: payload.sub }]), + $or: expect.arrayContaining([ + { openidId: payload.sub, openidIssuer: 'https://issuer.example.com' }, + ]), }), ); }); @@ -254,7 +262,9 @@ describe('openIdJwtStrategy – OPENID_EMAIL_CLAIM', () => { expect(findUser).toHaveBeenCalledTimes(2); expect(findUser.mock.calls[0][0]).toMatchObject({ - $or: expect.arrayContaining([{ openidId: payload.sub }]), + $or: expect.arrayContaining([ + { openidId: payload.sub, openidIssuer: 'https://issuer.example.com' }, + ]), }); expect(findUser.mock.calls[1][0]).toEqual({ email: 'test@corp.example.com' }); expect(user).toBe(false); @@ -367,7 +377,11 @@ describe('openIdJwtStrategy – OPENID_EMAIL_CLAIM', () => { expect(user).toBeTruthy(); expect(updateUser).toHaveBeenCalledWith( 'legacy-db-id', - expect.objectContaining({ provider: 'openid', openidId: payloadNoEmail.sub }), + expect.objectContaining({ + provider: 'openid', + openidId: payloadNoEmail.sub, + openidIssuer: 'https://issuer.example.com', + }), ); }); }); diff --git a/api/strategies/openidStrategy.js b/api/strategies/openidStrategy.js index 7314a84e1567..6d08fa1e5979 100644 --- a/api/strategies/openidStrategy.js +++ b/api/strategies/openidStrategy.js @@ -13,6 +13,8 @@ const { logHeaders, safeStringify, findOpenIDUser, + getOpenIdEmail, + getOpenIdIssuer, getBalanceConfig, isEmailDomainAllowed, resolveAppConfigForUser, @@ -268,34 +270,6 @@ function getFullName(userinfo) { return userinfo.username || userinfo.email; } -/** - * Resolves the user identifier from OpenID claims. - * Configurable via OPENID_EMAIL_CLAIM; defaults to: email -> preferred_username -> upn. - * - * @param {Object} userinfo - The user information object from OpenID Connect - * @returns {string|undefined} The resolved identifier string - */ -function getOpenIdEmail(userinfo) { - const claimKey = process.env.OPENID_EMAIL_CLAIM?.trim(); - if (claimKey) { - const value = userinfo[claimKey]; - if (typeof value === 'string' && value) { - return value; - } - if (value !== undefined && value !== null) { - logger.warn( - `[openidStrategy] OPENID_EMAIL_CLAIM="${claimKey}" resolved to a non-string value (type: ${typeof value}). Falling back to: email -> preferred_username -> upn.`, - ); - } else { - logger.warn( - `[openidStrategy] OPENID_EMAIL_CLAIM="${claimKey}" not present in userinfo. Falling back to: email -> preferred_username -> upn.`, - ); - } - } - const fallback = userinfo.email || userinfo.preferred_username || userinfo.upn; - return typeof fallback === 'string' ? fallback : undefined; -} - /** * Converts an input into a string suitable for a username. * If the input is a string, it will be returned as is. @@ -470,6 +444,7 @@ async function processOpenIDAuth(tokenset, existingUsersOnly = false) { } const email = getOpenIdEmail(userinfo); + const openidIssuer = getOpenIdIssuer(claims, openidConfig); const baseConfig = await getAppConfig({ baseOnly: true }); if (!isEmailDomainAllowed(email, baseConfig?.registration?.allowedDomains)) { @@ -483,6 +458,7 @@ async function processOpenIDAuth(tokenset, existingUsersOnly = false) { findUser, email: email, openidId: claims.sub || userinfo.sub, + openidIssuer, idOnTheSource: claims.oid || userinfo.oid, strategyName: 'openidStrategy', }); @@ -588,6 +564,7 @@ async function processOpenIDAuth(tokenset, existingUsersOnly = false) { emailVerified: userinfo.email_verified || false, name: fullName, idOnTheSource: userinfo.oid, + openidIssuer, }; const balanceConfig = getBalanceConfig(appConfig); @@ -595,6 +572,9 @@ async function processOpenIDAuth(tokenset, existingUsersOnly = false) { } else { user.provider = 'openid'; user.openidId = userinfo.sub; + if (openidIssuer) { + user.openidIssuer = openidIssuer; + } user.username = username; user.name = fullName; user.idOnTheSource = userinfo.oid; diff --git a/api/strategies/openidStrategy.spec.js b/api/strategies/openidStrategy.spec.js index 6d824176f7ff..2812341e5cbe 100644 --- a/api/strategies/openidStrategy.spec.js +++ b/api/strategies/openidStrategy.spec.js @@ -3,7 +3,7 @@ const fetch = require('node-fetch'); const jwtDecode = require('jsonwebtoken/decode'); const { ErrorTypes } = require('librechat-data-provider'); const { findUser, createUser, updateUser } = require('~/models'); -const { resolveAppConfigForUser } = require('@librechat/api'); +const { getOpenIdIssuer, resolveAppConfigForUser } = require('@librechat/api'); const { getAppConfig } = require('~/server/services/Config'); const { setupOpenId } = require('./openidStrategy'); @@ -27,9 +27,11 @@ jest.mock('@librechat/api', () => ({ isEnabled: jest.fn(() => false), isEmailDomainAllowed: jest.fn(() => true), findOpenIDUser: jest.requireActual('@librechat/api').findOpenIDUser, + getOpenIdEmail: jest.requireActual('@librechat/api').getOpenIdEmail, getBalanceConfig: jest.fn(() => ({ enabled: false, })), + getOpenIdIssuer: jest.fn(() => 'https://fake-issuer.com'), resolveAppConfigForUser: jest.fn(async (_getAppConfig, _user) => ({})), })); jest.mock('~/models', () => ({ @@ -202,10 +204,17 @@ describe('setupOpenId', () => { // Assert expect(user.username).toBe(userinfo.preferred_username); + expect(getOpenIdIssuer).toHaveBeenCalledTimes(1); + expect(getOpenIdIssuer.mock.calls[0]).toHaveLength(2); + expect(getOpenIdIssuer.mock.calls[0][0]).toEqual(userinfo); + expect(getOpenIdIssuer.mock.calls[0][1]).toEqual( + expect.objectContaining({ issuer: 'https://fake-issuer.com' }), + ); expect(createUser).toHaveBeenCalledWith( expect.objectContaining({ provider: 'openid', openidId: userinfo.sub, + openidIssuer: 'https://fake-issuer.com', username: userinfo.preferred_username, email: userinfo.email, name: `${userinfo.given_name} ${userinfo.family_name}`, @@ -326,6 +335,7 @@ describe('setupOpenId', () => { expect.objectContaining({ provider: 'openid', openidId: userinfo.sub, + openidIssuer: 'https://fake-issuer.com', username: userinfo.preferred_username, name: `${userinfo.given_name} ${userinfo.family_name}`, }), diff --git a/packages/api/package.json b/packages/api/package.json index 9ef4178e3b78..788fa791d487 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -111,6 +111,7 @@ "ioredis": "^5.3.2", "js-yaml": "^4.1.1", "jsonwebtoken": "^9.0.0", + "jwks-rsa": "^3.2.0", "keyv": "^5.3.2", "keyv-file": "^5.1.2", "librechat-data-provider": "*", diff --git a/packages/api/src/app/service.ts b/packages/api/src/app/service.ts index 952544f02cb4..613c428fca77 100644 --- a/packages/api/src/app/service.ts +++ b/packages/api/src/app/service.ts @@ -48,6 +48,15 @@ export interface AppConfigServiceDeps { overrideCacheTtl?: number; } +export interface GetAppConfigOptions { + role?: string; + userId?: string; + tenantId?: string; + refresh?: boolean; + /** When true, return only the YAML-derived base config — no DB override queries. */ + baseOnly?: boolean; +} + // ── Helpers ────────────────────────────────────────────────────────── let _strictOverride: boolean | undefined; @@ -140,16 +149,7 @@ export function createAppConfigService(deps: AppConfigServiceDeps) { * `role`, `userId`, and `tenantId` are ignored in this mode. * Use this for startup, auth strategies, and other pre-tenant code paths. */ - async function getAppConfig( - options: { - role?: string; - userId?: string; - tenantId?: string; - refresh?: boolean; - /** When true, return only the YAML-derived base config — no DB override queries. */ - baseOnly?: boolean; - } = {}, - ): Promise { + async function getAppConfig(options: GetAppConfigOptions = {}): Promise { const { role, userId, tenantId, refresh, baseOnly } = options; const baseConfig = await ensureBaseConfig(refresh); diff --git a/packages/api/src/auth/openid.spec.ts b/packages/api/src/auth/openid.spec.ts index 2cf3992cdfa6..deaba8c5c044 100644 --- a/packages/api/src/auth/openid.spec.ts +++ b/packages/api/src/auth/openid.spec.ts @@ -1,8 +1,8 @@ import { Types } from 'mongoose'; -import { ErrorTypes } from 'librechat-data-provider'; import { logger } from '@librechat/data-schemas'; +import { ErrorTypes } from 'librechat-data-provider'; import type { IUser, UserMethods } from '@librechat/data-schemas'; -import { findOpenIDUser } from './openid'; +import { findOpenIDUser, getOpenIdEmail, getOpenIdIssuer, normalizeOpenIdIssuer } from './openid'; function newId() { return new Types.ObjectId(); @@ -16,22 +16,76 @@ jest.mock('@librechat/data-schemas', () => ({ }, })); +describe('normalizeOpenIdIssuer', () => { + it('normalizes blank, trailing-slash, and discovery-document issuers', () => { + expect(normalizeOpenIdIssuer('')).toBeUndefined(); + expect(normalizeOpenIdIssuer(' ')).toBeUndefined(); + expect(normalizeOpenIdIssuer('https://issuer.example.com/')).toBe('https://issuer.example.com'); + expect( + normalizeOpenIdIssuer('https://issuer.example.com/.well-known/openid-configuration/'), + ).toBe('https://issuer.example.com'); + expect( + normalizeOpenIdIssuer('https://issuer.example.com/realm/.well-known/openid-configuration'), + ).toBe('https://issuer.example.com/realm'); + }); +}); + +describe('getOpenIdIssuer', () => { + const originalOpenIdIssuer = process.env.OPENID_ISSUER; + + afterEach(() => { + if (originalOpenIdIssuer == null) { + delete process.env.OPENID_ISSUER; + return; + } + + process.env.OPENID_ISSUER = originalOpenIdIssuer; + }); + + it('prefers token issuer and falls back to metadata and env issuer', () => { + process.env.OPENID_ISSUER = 'https://env.example.com/.well-known/openid-configuration'; + + expect( + getOpenIdIssuer( + { iss: 'https://token.example.com/' }, + { serverMetadata: () => ({ issuer: 'https://metadata.example.com' }) }, + ), + ).toBe('https://token.example.com'); + expect( + getOpenIdIssuer({}, { serverMetadata: () => ({ issuer: 'https://metadata.example.com/' }) }), + ).toBe('https://metadata.example.com'); + expect(getOpenIdIssuer({})).toBe('https://env.example.com'); + }); +}); + describe('findOpenIDUser', () => { let mockFindUser: jest.MockedFunction; + const originalOpenIdIssuer = process.env.OPENID_ISSUER; + const issuer = 'https://issuer.example.com'; beforeEach(() => { mockFindUser = jest.fn(); + delete process.env.OPENID_ISSUER; jest.clearAllMocks(); (logger.warn as jest.Mock).mockClear(); (logger.info as jest.Mock).mockClear(); }); + afterAll(() => { + if (originalOpenIdIssuer == null) { + delete process.env.OPENID_ISSUER; + return; + } + process.env.OPENID_ISSUER = originalOpenIdIssuer; + }); + describe('Primary condition searches', () => { it('should find user by openidId', async () => { const mockUser: IUser = { _id: newId(), provider: 'openid', openidId: 'openid_123', + openidIssuer: issuer, email: 'user@example.com', username: 'testuser', } as IUser; @@ -40,12 +94,13 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); expect(mockFindUser).toHaveBeenCalledWith({ - $or: [{ openidId: 'openid_123' }], + $or: [{ openidId: 'openid_123', openidIssuer: issuer }], }); expect(result).toEqual({ user: mockUser, @@ -67,12 +122,16 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, idOnTheSource: 'source_123', }); expect(mockFindUser).toHaveBeenCalledWith({ - $or: [{ openidId: 'openid_123' }, { idOnTheSource: 'source_123' }], + $or: [ + { openidId: 'openid_123', openidIssuer: issuer }, + { idOnTheSource: 'source_123', openidIssuer: issuer }, + ], }); expect(result).toEqual({ user: mockUser, @@ -87,6 +146,7 @@ describe('findOpenIDUser', () => { provider: 'openid', openidId: 'openid_123', idOnTheSource: 'source_123', + openidIssuer: issuer, email: 'user@example.com', username: 'testuser', } as IUser; @@ -95,13 +155,46 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, idOnTheSource: 'source_123', email: 'user@example.com', }); expect(mockFindUser).toHaveBeenCalledWith({ - $or: [{ openidId: 'openid_123' }, { idOnTheSource: 'source_123' }], + $or: [ + { openidId: 'openid_123', openidIssuer: issuer }, + { idOnTheSource: 'source_123', openidIssuer: issuer }, + ], + }); + expect(result).toEqual({ + user: mockUser, + error: null, + migration: false, + }); + }); + + it('should bind primary lookup to issuer', async () => { + const mockUser: IUser = { + _id: newId(), + provider: 'openid', + openidId: 'openid_123', + openidIssuer: 'https://issuer.example.com', + email: 'user@example.com', + username: 'testuser', + } as IUser; + + mockFindUser.mockResolvedValueOnce(mockUser); + + const result = await findOpenIDUser({ + openidId: 'openid_123', + openidIssuer: 'https://issuer.example.com/', + findUser: mockFindUser, + email: 'user@example.com', + }); + + expect(mockFindUser).toHaveBeenCalledWith({ + $or: [{ openidId: 'openid_123', openidIssuer: 'https://issuer.example.com' }], }); expect(result).toEqual({ user: mockUser, @@ -109,6 +202,101 @@ describe('findOpenIDUser', () => { migration: false, }); }); + + it('should allow legacy issuer-less lookup only for the configured OpenID login issuer', async () => { + process.env.OPENID_ISSUER = 'https://issuer.example.com/'; + const mockUser: IUser = { + _id: newId(), + provider: 'openid', + openidId: 'openid_123', + email: 'user@example.com', + username: 'testuser', + } as IUser; + + mockFindUser.mockResolvedValueOnce(mockUser); + + const result = await findOpenIDUser({ + openidId: 'openid_123', + openidIssuer: 'https://issuer.example.com', + findUser: mockFindUser, + }); + + expect(mockFindUser).toHaveBeenCalledWith({ + $or: [ + { openidId: 'openid_123', openidIssuer: 'https://issuer.example.com' }, + { + openidId: 'openid_123', + $or: [ + { openidIssuer: { $exists: false } }, + { openidIssuer: null }, + { openidIssuer: '' }, + ], + }, + ], + }); + expect(result).toEqual({ + user: { ...mockUser, openidIssuer: 'https://issuer.example.com' }, + error: null, + migration: true, + }); + }); + + it('should allow legacy issuer-less lookup when login issuer is a discovery document URL', async () => { + process.env.OPENID_ISSUER = 'https://issuer.example.com/.well-known/openid-configuration'; + const mockUser: IUser = { + _id: newId(), + provider: 'openid', + openidId: 'openid_123', + email: 'user@example.com', + username: 'testuser', + } as IUser; + + mockFindUser.mockResolvedValueOnce(mockUser); + + const result = await findOpenIDUser({ + openidId: 'openid_123', + openidIssuer: 'https://issuer.example.com', + findUser: mockFindUser, + }); + + expect(mockFindUser).toHaveBeenCalledWith({ + $or: [ + { openidId: 'openid_123', openidIssuer: 'https://issuer.example.com' }, + { + openidId: 'openid_123', + $or: [ + { openidIssuer: { $exists: false } }, + { openidIssuer: null }, + { openidIssuer: '' }, + ], + }, + ], + }); + expect(result).toEqual({ + user: { ...mockUser, openidIssuer: 'https://issuer.example.com' }, + error: null, + migration: true, + }); + }); + + it('should skip primary ID lookup when issuer context is missing', async () => { + mockFindUser.mockResolvedValueOnce(null); + + const result = await findOpenIDUser({ + openidId: 'openid_123', + idOnTheSource: 'source_123', + findUser: mockFindUser, + email: 'user@example.com', + }); + + expect(mockFindUser).toHaveBeenCalledTimes(1); + expect(mockFindUser).toHaveBeenCalledWith({ email: 'user@example.com' }); + expect(result).toEqual({ + user: null, + error: null, + migration: false, + }); + }); }); describe('Email-based searches', () => { @@ -117,6 +305,7 @@ describe('findOpenIDUser', () => { _id: newId(), provider: 'openid', openidId: 'openid_123', + openidIssuer: issuer, email: 'user@example.com', username: 'testuser', } as IUser; @@ -125,12 +314,13 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); expect(mockFindUser).toHaveBeenNthCalledWith(1, { - $or: [{ openidId: 'openid_123' }], + $or: [{ openidId: 'openid_123', openidIssuer: issuer }], }); expect(mockFindUser).toHaveBeenNthCalledWith(2, { email: 'user@example.com' }); expect(result).toEqual({ @@ -147,6 +337,7 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); @@ -164,12 +355,13 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, }); expect(mockFindUser).toHaveBeenCalledTimes(1); expect(mockFindUser).toHaveBeenCalledWith({ - $or: [{ openidId: 'openid_123' }], + $or: [{ openidId: 'openid_123', openidIssuer: issuer }], }); expect(result).toEqual({ user: null, @@ -194,6 +386,7 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); @@ -218,6 +411,7 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); @@ -234,6 +428,7 @@ describe('findOpenIDUser', () => { _id: newId(), provider: 'openid', openidId: 'openid_123', + openidIssuer: issuer, email: 'user@example.com', username: 'testuser', } as IUser; @@ -242,6 +437,7 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); @@ -252,6 +448,58 @@ describe('findOpenIDUser', () => { migration: false, }); }); + + it('should reject email fallback when stored openidIssuer does not match token issuer', async () => { + const mockUser: IUser = { + _id: newId(), + provider: 'openid', + openidId: 'openid_123', + openidIssuer: 'https://issuer-a.example.com', + email: 'user@example.com', + username: 'testuser', + } as IUser; + + mockFindUser.mockResolvedValueOnce(null).mockResolvedValueOnce(mockUser); + + const result = await findOpenIDUser({ + openidId: 'openid_123', + openidIssuer: 'https://issuer-b.example.com', + findUser: mockFindUser, + email: 'user@example.com', + }); + + expect(result).toEqual({ + user: null, + error: ErrorTypes.AUTH_FAILED, + migration: false, + }); + }); + + it('should reject legacy email fallback when token issuer is not the configured OpenID login issuer', async () => { + process.env.OPENID_ISSUER = 'https://issuer-a.example.com'; + const mockUser: IUser = { + _id: newId(), + provider: 'openid', + openidId: 'openid_123', + email: 'user@example.com', + username: 'testuser', + } as IUser; + + mockFindUser.mockResolvedValueOnce(null).mockResolvedValueOnce(mockUser); + + const result = await findOpenIDUser({ + openidId: 'openid_123', + openidIssuer: 'https://issuer-b.example.com', + findUser: mockFindUser, + email: 'user@example.com', + }); + + expect(result).toEqual({ + user: null, + error: ErrorTypes.AUTH_FAILED, + migration: false, + }); + }); }); describe('User migration scenarios', () => { @@ -269,6 +517,7 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); @@ -278,6 +527,35 @@ describe('findOpenIDUser', () => { ...mockUser, provider: 'openid', openidId: 'openid_123', + openidIssuer: issuer, + }, + error: null, + migration: true, + }); + }); + + it('should persist issuer when migrating a user by email', async () => { + const mockUser: IUser = { + _id: newId(), + email: 'user@example.com', + username: 'testuser', + } as IUser; + + mockFindUser.mockResolvedValueOnce(null).mockResolvedValueOnce(mockUser); + + const result = await findOpenIDUser({ + openidId: 'openid_123', + openidIssuer: 'https://issuer.example.com', + findUser: mockFindUser, + email: 'user@example.com', + }); + + expect(result).toEqual({ + user: { + ...mockUser, + provider: 'openid', + openidId: 'openid_123', + openidIssuer: 'https://issuer.example.com', }, error: null, migration: true, @@ -297,6 +575,7 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); @@ -321,6 +600,7 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'user@example.com', }); @@ -388,12 +668,13 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, idOnTheSource: '', }); expect(mockFindUser).toHaveBeenCalledWith({ - $or: [{ openidId: 'openid_123' }], + $or: [{ openidId: 'openid_123', openidIssuer: issuer }], }); expect(result).toEqual({ user: null, @@ -420,6 +701,7 @@ describe('findOpenIDUser', () => { _id: newId(), provider: 'openid', openidId: 'openid_123', + openidIssuer: issuer, email: 'user@example.com', username: 'testuser', } as IUser; @@ -428,6 +710,7 @@ describe('findOpenIDUser', () => { const result = await findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, email: 'User@Example.COM', }); @@ -446,6 +729,7 @@ describe('findOpenIDUser', () => { await expect( findOpenIDUser({ openidId: 'openid_123', + openidIssuer: issuer, findUser: mockFindUser, }), ).rejects.toThrow('Database error'); @@ -478,3 +762,77 @@ describe('findOpenIDUser', () => { }); }); }); + +describe('getOpenIdEmail', () => { + const originalEmailClaim = process.env.OPENID_EMAIL_CLAIM; + + beforeEach(() => { + jest.clearAllMocks(); + delete process.env.OPENID_EMAIL_CLAIM; + }); + + afterAll(() => { + if (originalEmailClaim == null) { + delete process.env.OPENID_EMAIL_CLAIM; + return; + } + process.env.OPENID_EMAIL_CLAIM = originalEmailClaim; + }); + + it('uses the default claim order', () => { + expect( + getOpenIdEmail({ + email: 'user@example.com', + preferred_username: 'preferred@example.com', + upn: 'upn@example.com', + }), + ).toBe('user@example.com'); + }); + + it('returns undefined when default claims are absent', () => { + expect(getOpenIdEmail({})).toBeUndefined(); + }); + + it('skips empty fallback claims', () => { + expect( + getOpenIdEmail({ + email: '', + preferred_username: 'preferred@example.com', + upn: 'upn@example.com', + }), + ).toBe('preferred@example.com'); + }); + + it('uses OPENID_EMAIL_CLAIM when present', () => { + process.env.OPENID_EMAIL_CLAIM = 'custom_identifier'; + + expect( + getOpenIdEmail({ + email: 'user@example.com', + custom_identifier: 'agent@corp.example.com', + }), + ).toBe('agent@corp.example.com'); + }); + + it('falls back with a warning when OPENID_EMAIL_CLAIM is missing', () => { + process.env.OPENID_EMAIL_CLAIM = 'missing_identifier'; + + expect(getOpenIdEmail({ email: 'user@example.com' }, 'remoteAgentAuth')).toBe( + 'user@example.com', + ); + expect(logger.warn).toHaveBeenCalledWith( + expect.stringContaining('OPENID_EMAIL_CLAIM="missing_identifier" not present in userinfo'), + ); + }); + + it('falls back with a warning when OPENID_EMAIL_CLAIM is not a string', () => { + process.env.OPENID_EMAIL_CLAIM = 'groups'; + + expect(getOpenIdEmail({ email: 'user@example.com', groups: ['a'] })).toBe('user@example.com'); + expect(logger.warn).toHaveBeenCalledWith( + expect.stringContaining( + 'OPENID_EMAIL_CLAIM="groups" resolved to a non-string value (type: object)', + ), + ); + }); +}); diff --git a/packages/api/src/auth/openid.ts b/packages/api/src/auth/openid.ts index 12ff48b2a9be..e647e09f1714 100644 --- a/packages/api/src/auth/openid.ts +++ b/packages/api/src/auth/openid.ts @@ -1,6 +1,144 @@ import { logger } from '@librechat/data-schemas'; import { ErrorTypes } from 'librechat-data-provider'; import type { IUser, UserMethods } from '@librechat/data-schemas'; +import type { FilterQuery } from 'mongoose'; + +export type OpenIdEmailClaims = { + email?: unknown; + preferred_username?: unknown; + upn?: unknown; + [claim: string]: unknown; +}; + +export type OpenIdIssuerSource = { + iss?: string; + issuer?: string; + serverMetadata?: () => { issuer?: string } | undefined; +}; + +type OpenIdLookupField = 'openidId' | 'idOnTheSource'; +type OpenIdUserResolution = { user: IUser | null; error: string | null; migration: boolean }; + +const OPENID_DISCOVERY_PATH = '/.well-known/openid-configuration'; + +export function normalizeOpenIdIssuer(issuer: string | undefined): string | undefined { + const normalized = issuer?.trim().replace(/\/+$/, ''); + if (!normalized) return undefined; + if (!normalized.endsWith(OPENID_DISCOVERY_PATH)) return normalized; + return normalized.slice(0, -OPENID_DISCOVERY_PATH.length) || undefined; +} + +function getIssuerFromSource(source: OpenIdIssuerSource | null | undefined): string | undefined { + if (source == null) return undefined; + + const issuer = source.iss || source.serverMetadata?.()?.issuer || source.issuer; + return normalizeOpenIdIssuer(issuer); +} + +function getStringClaim(claims: OpenIdEmailClaims, claim: string): string | undefined { + const value = claims[claim]; + return typeof value === 'string' && value ? value : undefined; +} + +export function getOpenIdIssuer( + ...sources: Array +): string | undefined { + for (const source of sources) { + const issuer = getIssuerFromSource(source); + if (issuer) return issuer; + } + + return normalizeOpenIdIssuer(process.env.OPENID_ISSUER); +} + +function isLegacyOpenIdIssuer(openidIssuer: string | undefined): boolean { + const loginIssuer = normalizeOpenIdIssuer(process.env.OPENID_ISSUER); + return openidIssuer != null && loginIssuer != null && openidIssuer === loginIssuer; +} + +function getIssuerBoundConditions( + field: OpenIdLookupField, + value: string | undefined, + openidIssuer: string | undefined, +): FilterQuery[] { + if (!value || typeof value !== 'string') return []; + if (!openidIssuer) return []; + + const conditions: FilterQuery[] = [{ [field]: value, openidIssuer }]; + + if (isLegacyOpenIdIssuer(openidIssuer)) { + conditions.push({ + [field]: value, + $or: [{ openidIssuer: { $exists: false } }, { openidIssuer: null }, { openidIssuer: '' }], + }); + } + + return conditions; +} + +function isUserIssuerAllowed(user: IUser, openidIssuer: string | undefined): boolean { + if (!openidIssuer) return true; + + const userIssuer = normalizeOpenIdIssuer(user.openidIssuer); + if (userIssuer) return userIssuer === openidIssuer; + + return isLegacyOpenIdIssuer(openidIssuer); +} + +function resolveIssuerBoundUser( + user: IUser | null, + normalizedIssuer: string | undefined, + strategyName: string, + context: string, +): OpenIdUserResolution | null { + if (!user?.openidId) return null; + + if (!isUserIssuerAllowed(user, normalizedIssuer)) { + logger.warn( + `[${strategyName}] Rejected ${context} for ${user.email}: stored openidIssuer does not match token issuer`, + ); + return { user: null, error: ErrorTypes.AUTH_FAILED, migration: false }; + } + + if (normalizedIssuer && !normalizeOpenIdIssuer(user.openidIssuer)) { + user.openidIssuer = normalizedIssuer; + return { user, error: null, migration: true }; + } + + return null; +} + +/** + * Resolves the OpenID user identifier claim, honoring OPENID_EMAIL_CLAIM before + * email/preferred_username/upn fallbacks. + */ +export function getOpenIdEmail( + claims: OpenIdEmailClaims | null | undefined, + strategyName = 'openidStrategy', +): string | undefined { + if (claims == null) return undefined; + + const claimKey = process.env.OPENID_EMAIL_CLAIM?.trim(); + if (claimKey) { + const value = claims[claimKey]; + if (typeof value === 'string' && value) return value; + if (value != null) { + logger.warn( + `[${strategyName}] OPENID_EMAIL_CLAIM="${claimKey}" resolved to a non-string value (type: ${typeof value}). Falling back to: email -> preferred_username -> upn.`, + ); + } else { + logger.warn( + `[${strategyName}] OPENID_EMAIL_CLAIM="${claimKey}" not present in userinfo. Falling back to: email -> preferred_username -> upn.`, + ); + } + } + + return ( + getStringClaim(claims, 'email') ?? + getStringClaim(claims, 'preferred_username') ?? + getStringClaim(claims, 'upn') + ); +} /** * Finds or migrates a user for OpenID authentication @@ -10,29 +148,36 @@ export async function findOpenIDUser({ openidId, findUser, email, + openidIssuer, idOnTheSource, strategyName = 'openid', }: { openidId: string; findUser: UserMethods['findUser']; email?: string; + openidIssuer?: string; idOnTheSource?: string; strategyName?: string; -}): Promise<{ user: IUser | null; error: string | null; migration: boolean }> { - const primaryConditions = []; - - if (openidId && typeof openidId === 'string') { - primaryConditions.push({ openidId }); - } - - if (idOnTheSource && typeof idOnTheSource === 'string') { - primaryConditions.push({ idOnTheSource }); - } +}): Promise { + const normalizedIssuer = normalizeOpenIdIssuer(openidIssuer); + const primaryConditions = [ + ...getIssuerBoundConditions('openidId', openidId, normalizedIssuer), + ...getIssuerBoundConditions('idOnTheSource', idOnTheSource, normalizedIssuer), + ]; let user = null; if (primaryConditions.length > 0) { user = await findUser({ $or: primaryConditions }); } + + const primaryIssuerResolution = resolveIssuerBoundUser( + user, + normalizedIssuer, + strategyName, + 'OpenID lookup', + ); + if (primaryIssuerResolution) return primaryIssuerResolution; + if (!user && email) { user = await findUser({ email }); logger.warn( @@ -54,12 +199,21 @@ export async function findOpenIDUser({ return { user: null, error: ErrorTypes.AUTH_FAILED, migration: false }; } + const emailIssuerResolution = resolveIssuerBoundUser( + user, + normalizedIssuer, + strategyName, + 'email fallback', + ); + if (emailIssuerResolution) return emailIssuerResolution; + if (user && !user.openidId) { logger.info( `[${strategyName}] Preparing user ${user.email} for migration to OpenID with sub: ${openidId}`, ); user.provider = 'openid'; user.openidId = openidId; + if (normalizedIssuer) user.openidIssuer = normalizedIssuer; return { user, error: null, migration: true }; } } diff --git a/packages/api/src/middleware/balance.ts b/packages/api/src/middleware/balance.ts index 19719680ec62..4b746820f43a 100644 --- a/packages/api/src/middleware/balance.ts +++ b/packages/api/src/middleware/balance.ts @@ -21,6 +21,27 @@ export interface BalanceMiddlewareOptions { upsertBalanceFields: (userId: string, fields: IBalanceUpdate) => Promise; } +const balanceUpdateLocks = new Map>(); + +async function runBalanceUpdate(userId: string, task: () => Promise): Promise { + const previous = balanceUpdateLocks.get(userId) ?? Promise.resolve(); + const current = previous.catch(() => undefined).then(task); + const tail = current.then( + () => undefined, + () => undefined, + ); + + balanceUpdateLocks.set(userId, tail); + + try { + await current; + } finally { + if (balanceUpdateLocks.get(userId) === tail) { + balanceUpdateLocks.delete(userId); + } + } +} + /** * Build an object containing fields that need updating * @param config - The balance configuration @@ -112,14 +133,16 @@ export function createSetBalanceConfig({ return next(); } const userId = typeof user._id === 'string' ? user._id : user._id.toString(); - const userBalanceRecord = await findBalanceByUser(userId); - const updateFields = buildUpdateFields(balanceConfig, userBalanceRecord, userId); + await runBalanceUpdate(userId, async () => { + const userBalanceRecord = await findBalanceByUser(userId); + const updateFields = buildUpdateFields(balanceConfig, userBalanceRecord, userId); - if (Object.keys(updateFields).length === 0) { - return next(); - } + if (Object.keys(updateFields).length === 0) { + return; + } - await upsertBalanceFields(userId, updateFields); + await upsertBalanceFields(userId, updateFields); + }); next(); } catch (error) { diff --git a/packages/api/src/middleware/index.ts b/packages/api/src/middleware/index.ts index b91fee2999bb..9fccfa6780d1 100644 --- a/packages/api/src/middleware/index.ts +++ b/packages/api/src/middleware/index.ts @@ -9,3 +9,4 @@ export { tenantContextMiddleware } from './tenant'; export { preAuthTenantMiddleware } from './preAuthTenant'; export * from './concurrency'; export * from './checkBalance'; +export * from './remoteAgentAuth'; diff --git a/packages/api/src/middleware/remoteAgentAuth.spec.ts b/packages/api/src/middleware/remoteAgentAuth.spec.ts new file mode 100644 index 000000000000..c0ae2ca090d5 --- /dev/null +++ b/packages/api/src/middleware/remoteAgentAuth.spec.ts @@ -0,0 +1,1533 @@ +import type { AppConfig, IUser, UserMethods } from '@librechat/data-schemas'; +import type { TAgentsEndpoint } from 'librechat-data-provider'; +import type { JwtPayload, VerifyOptions } from 'jsonwebtoken'; +import type { Request, Response } from 'express'; +import type { RequestInit } from 'undici'; + +jest.mock('@librechat/data-schemas', () => { + const actual = jest.requireActual('@librechat/data-schemas'); + return { + ...actual, + logger: { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, + }; +}); + +jest.mock('~/utils', () => ({ + isEnabled: jest.fn(() => false), + math: jest.fn(() => 60000), +})); + +const mockGetSigningKey = jest.fn(); +const mockGetSigningKeys = jest.fn(); + +jest.mock('jwks-rsa', () => + jest.fn(() => ({ getSigningKey: mockGetSigningKey, getSigningKeys: mockGetSigningKeys })), +); + +jest.mock('undici', () => ({ + fetch: jest.fn(), + ProxyAgent: jest.fn((proxy: string) => ({ proxy })), +})); + +jest.mock('jsonwebtoken', () => ({ + decode: jest.fn(), + verify: jest.fn(), +})); + +jest.mock('../auth/openid', () => { + const actual = jest.requireActual('../auth/openid') as typeof import('../auth/openid'); + return { ...actual, findOpenIDUser: jest.fn(actual.findOpenIDUser) }; +}); + +import jwt from 'jsonwebtoken'; +import jwksRsa from 'jwks-rsa'; +import { ProxyAgent, fetch as undiciFetch } from 'undici'; +import { logger, tenantStorage } from '@librechat/data-schemas'; +import { clearRemoteAgentAuthCache, createRemoteAgentAuth } from './remoteAgentAuth'; +import { findOpenIDUser, getOpenIdEmail } from '../auth/openid'; +import { math } from '~/utils'; + +const mockFetch = undiciFetch as jest.Mock; +const mockProxyAgent = ProxyAgent as unknown as jest.Mock; +const mockMath = math as jest.Mock; +const realFindOpenIDUser = + jest.requireActual('../auth/openid').findOpenIDUser; +const mockFindOpenIDUser = findOpenIDUser as jest.MockedFunction; +const FAKE_TOKEN = 'header.payload.signature'; +const BASE_ISSUER = 'https://auth.example.com/realms/test'; +const BASE_JWKS_URI = `${BASE_ISSUER}/protocol/openid-connect/certs`; +const ENV_KEYS = [ + 'OPENID_EMAIL_CLAIM', + 'OPENID_JWKS_URL', + 'OPENID_JWKS_URL_CACHE_ENABLED', + 'OPENID_JWKS_URL_CACHE_TIME', + 'PROXY', +] as const; + +type AgentAuthConfig = NonNullable['auth']>; +type OidcConfig = NonNullable; +type ApiKeyConfig = NonNullable; +type JwtVerifyCallback = (err: Error | null, payload?: JwtPayload) => void; +type FindUserValue = IUser['_id'] | string | null | { $exists: boolean }; +type FindUserCondition = { + _id?: FindUserValue; + email?: FindUserValue; + openidId?: FindUserValue; + openidIssuer?: FindUserValue; + idOnTheSource?: FindUserValue; + $or?: FindUserCondition[]; +}; +type FindUserQuery = FindUserCondition & { $or?: FindUserCondition[] }; + +const mockUser = { _id: 'uid123', id: 'uid123', email: 'agent@test.com' }; +const originalEnv = ENV_KEYS.reduce>( + (env, key) => ({ ...env, [key]: process.env[key] }), + { + OPENID_EMAIL_CLAIM: undefined, + OPENID_JWKS_URL: undefined, + OPENID_JWKS_URL_CACHE_ENABLED: undefined, + OPENID_JWKS_URL_CACHE_TIME: undefined, + PROXY: undefined, + }, +); + +function deleteEnvKeys() { + ENV_KEYS.forEach((key) => { + delete process.env[key]; + }); +} + +function restoreOriginalEnv() { + ENV_KEYS.forEach((key) => { + const value = originalEnv[key]; + if (value == null) { + delete process.env[key]; + return; + } + process.env[key] = value; + }); +} + +function makeRes() { + const json = jest.fn(); + const status = jest.fn().mockReturnValue({ json }); + return { res: { status, json } as unknown as Response, status, json }; +} + +function makeReq(headers: Record = {}): Partial { + return { headers }; +} + +function makeConfig( + oidcOverrides?: Partial, + apiKeyOverrides?: Partial, +): AppConfig { + return { + endpoints: { + agents: { + remoteApi: { + auth: { + oidc: { + enabled: true, + issuer: BASE_ISSUER, + jwksUri: BASE_JWKS_URI, + ...oidcOverrides, + }, + apiKey: { enabled: true, ...apiKeyOverrides }, + }, + }, + }, + }, + } as unknown as AppConfig; +} + +function makeUser(overrides: Partial = {}): IUser { + return { + ...mockUser, + role: 'user', + provider: 'openid', + openidId: 'sub123', + openidIssuer: BASE_ISSUER, + ...overrides, + } as IUser; +} + +function matchesValue(value: FindUserValue | undefined, condition: FindUserValue): boolean { + if (condition && typeof condition === 'object' && '$exists' in condition) { + return condition.$exists ? value != null : value == null; + } + return value === condition; +} + +function matchesCondition(user: IUser, condition: FindUserCondition): boolean { + if (condition.$or && !condition.$or.some((nested) => matchesCondition(user, nested))) { + return false; + } + if (condition._id !== undefined && !matchesValue(user._id, condition._id)) return false; + if (condition.email !== undefined && !matchesValue(user.email, condition.email)) return false; + if (condition.openidId !== undefined && !matchesValue(user.openidId, condition.openidId)) { + return false; + } + if ( + condition.openidIssuer !== undefined && + !matchesValue(user.openidIssuer, condition.openidIssuer) + ) { + return false; + } + if ( + condition.idOnTheSource !== undefined && + !matchesValue(user.idOnTheSource, condition.idOnTheSource) + ) { + return false; + } + return true; +} + +function makeFindUser(...users: IUser[]): jest.MockedFunction { + return jest.fn(async (query) => { + const userQuery = query as FindUserQuery; + const conditions = userQuery.$or ?? [userQuery]; + return ( + users.find((user) => conditions.some((condition) => matchesCondition(user, condition))) ?? + null + ); + }) as jest.MockedFunction; +} + +function makeDeps(appConfig: AppConfig = makeConfig()) { + return { + findUser: makeFindUser(makeUser()), + updateUser: jest.fn(), + getAppConfig: jest.fn().mockResolvedValue(appConfig), + apiKeyMiddleware: jest.fn((_req: unknown, _res: unknown, next: () => void) => next()), + }; +} + +function setupOidcMocks(payload: JwtPayload, kid: string | null = 'test-kid') { + (jwt.decode as jest.Mock).mockReturnValue({ header: kid == null ? {} : { kid }, payload }); + mockGetSigningKey.mockResolvedValue({ getPublicKey: () => 'public-key' }); + mockGetSigningKeys.mockResolvedValue([ + { kid: kid ?? 'test-kid', getPublicKey: () => 'public-key' }, + ]); + (jwt.verify as jest.Mock).mockImplementation( + (_t: string, _k: string, _o: VerifyOptions, cb: JwtVerifyCallback) => cb(null, payload), + ); +} + +describe('createRemoteAgentAuth', () => { + let mockNext: jest.Mock; + + beforeEach(() => { + jest.clearAllMocks(); + deleteEnvKeys(); + clearRemoteAgentAuthCache(); + mockFetch.mockReset(); + mockMath.mockReturnValue(60000); + mockFindOpenIDUser.mockImplementation(realFindOpenIDUser); + mockNext = jest.fn(); + }); + + afterEach(() => { + deleteEnvKeys(); + clearRemoteAgentAuthCache(); + }); + + afterAll(() => { + restoreOriginalEnv(); + }); + + describe('when OIDC is not enabled', () => { + it('returns 401 when oidc.enabled is false and apiKey is disabled', async () => { + const deps = makeDeps(makeConfig({ enabled: false }, { enabled: false })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)(makeReq() as Request, res, mockNext); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Authentication required' }); + expect(mockNext).not.toHaveBeenCalled(); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + }); + + it('falls back to apiKeyMiddleware when oidc.enabled is false', async () => { + const deps = makeDeps(makeConfig({ enabled: false })); + await createRemoteAgentAuth(deps)(makeReq() as Request, makeRes().res, mockNext); + expect(deps.apiKeyMiddleware).toHaveBeenCalled(); + }); + + it('loads base config before authentication when tenant context is absent', async () => { + const deps = makeDeps(makeConfig({ enabled: false })); + + await createRemoteAgentAuth(deps)(makeReq() as Request, makeRes().res, mockNext); + + expect(deps.getAppConfig).toHaveBeenCalledWith({ baseOnly: true }); + expect(deps.apiKeyMiddleware).toHaveBeenCalled(); + }); + + it('rejects API key auth when the resolved user tenant disables API keys', async () => { + const deps = makeDeps(makeConfig({ enabled: false }, { enabled: true })); + deps.getAppConfig.mockImplementation(async (options) => + options?.tenantId === 'tenant-oidc-only' + ? makeConfig({ enabled: true }, { enabled: false }) + : makeConfig({ enabled: false }, { enabled: true }), + ); + deps.apiKeyMiddleware.mockImplementation((req: unknown, _res: unknown, next) => { + (req as Request).user = makeUser({ tenantId: 'tenant-oidc-only' }); + next(); + }); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)(makeReq() as Request, res, mockNext); + + expect(deps.getAppConfig).toHaveBeenNthCalledWith(1, { baseOnly: true }); + expect(deps.getAppConfig).toHaveBeenNthCalledWith(2, { tenantId: 'tenant-oidc-only' }); + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('loads tenant config from pre-auth tenant context before authentication', async () => { + const deps = makeDeps(makeConfig({ enabled: false })); + + await tenantStorage.run({ tenantId: 'tenant-preauth' }, async () => { + await createRemoteAgentAuth(deps)(makeReq() as Request, makeRes().res, mockNext); + }); + + expect(deps.getAppConfig).toHaveBeenCalledWith({ tenantId: 'tenant-preauth' }); + expect(deps.apiKeyMiddleware).toHaveBeenCalled(); + }); + + it('enforces tenant auth policy from pre-auth tenant context', async () => { + const deps = makeDeps(makeConfig({ enabled: false })); + deps.getAppConfig.mockImplementation(async (options) => + options?.tenantId === 'tenant-oidc-only' + ? makeConfig({ enabled: false }, { enabled: false }) + : makeConfig({ enabled: false }, { enabled: true }), + ); + const { res, status } = makeRes(); + + await tenantStorage.run({ tenantId: 'tenant-oidc-only' }, async () => { + await createRemoteAgentAuth(deps)(makeReq() as Request, res, mockNext); + }); + + expect(deps.getAppConfig).toHaveBeenCalledWith({ tenantId: 'tenant-oidc-only' }); + expect(status).toHaveBeenCalledWith(401); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + }); + + it('loads tenant config when an authenticated user is already present', async () => { + const deps = makeDeps(makeConfig({ enabled: false })); + const req = makeReq(); + req.user = makeUser({ tenantId: 'tenant-a' }); + + await createRemoteAgentAuth(deps)(req as Request, makeRes().res, mockNext); + + expect(deps.getAppConfig).toHaveBeenCalledWith({ tenantId: 'tenant-a' }); + expect(deps.apiKeyMiddleware).toHaveBeenCalled(); + }); + + it('falls back to apiKeyMiddleware when remoteApi auth is absent', async () => { + const deps = makeDeps({ endpoints: { agents: {} } } as unknown as AppConfig); + await createRemoteAgentAuth(deps)(makeReq() as Request, makeRes().res, mockNext); + expect(deps.apiKeyMiddleware).toHaveBeenCalled(); + }); + + it('returns 401 when remoteApi auth is absent and apiKey is disabled', async () => { + const config = { + endpoints: { agents: { remoteApi: { auth: { apiKey: { enabled: false } } } } }, + } as unknown as AppConfig; + const deps = makeDeps(config); + const { res, status } = makeRes(); + + await createRemoteAgentAuth(deps)(makeReq() as Request, res, mockNext); + + expect(status).toHaveBeenCalledWith(401); + expect(mockNext).not.toHaveBeenCalled(); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + }); + }); + + describe('when OIDC enabled but no Bearer token', () => { + it('falls back to apiKeyMiddleware when apiKey is enabled', async () => { + const deps = makeDeps(makeConfig({}, { enabled: true })); + const { res } = makeRes(); + + await createRemoteAgentAuth(deps)(makeReq() as Request, res, mockNext); + + expect(deps.apiKeyMiddleware).toHaveBeenCalled(); + expect(mockNext).toHaveBeenCalled(); + }); + + it('returns 401 when apiKey is disabled and no token present', async () => { + const deps = makeDeps(makeConfig({}, { enabled: false })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)(makeReq() as Request, res, mockNext); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Bearer token required' }); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('returns 500 when OIDC is enabled without an issuer', async () => { + const deps = makeDeps(makeConfig({ issuer: undefined }, { enabled: false })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)(makeReq() as Request, res, mockNext); + + expect(status).toHaveBeenCalledWith(500); + expect(json).toHaveBeenCalledWith({ error: 'Internal server error' }); + expect(mockNext).not.toHaveBeenCalled(); + }); + }); + + describe('when OIDC verification succeeds', () => { + it('sets req.user and calls next()', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com', exp: 9999999999 }); + const deps = makeDeps(); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }); + const { res } = makeRes(); + + await createRemoteAgentAuth(deps)(req as Request, res, mockNext); + + expect(req.user).toMatchObject({ id: 'uid123', email: 'agent@test.com' }); + expect(mockNext).toHaveBeenCalledWith(); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + }); + + it('re-evaluates OIDC auth config after resolving the user tenant', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com', scope: 'remote_agent' }); + const deps = makeDeps(); + deps.findUser = makeFindUser(makeUser({ tenantId: 'tenant-strict' })); + deps.getAppConfig.mockImplementation(async (options) => + options?.tenantId === 'tenant-strict' + ? makeConfig({ audience: 'tenant-audience', scope: 'remote_agent' }, { enabled: false }) + : makeConfig({ audience: undefined, scope: undefined }, { enabled: true }), + ); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }); + + await createRemoteAgentAuth(deps)(req as Request, makeRes().res, mockNext); + + expect(deps.getAppConfig).toHaveBeenNthCalledWith(1, { baseOnly: true }); + expect(deps.getAppConfig).toHaveBeenNthCalledWith(2, { tenantId: 'tenant-strict' }); + expect(jwt.verify).toHaveBeenCalledTimes(2); + expect((jwt.verify as jest.Mock).mock.calls[1][2]).toEqual( + expect.objectContaining({ audience: 'tenant-audience' }), + ); + expect(req.user).toMatchObject({ id: 'uid123', tenantId: 'tenant-strict' }); + expect(mockNext).toHaveBeenCalledWith(); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + }); + + it('rejects OIDC auth when the resolved user tenant requires a missing scope', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com', scope: 'openid profile' }); + const deps = makeDeps(); + deps.findUser = makeFindUser( + makeUser({ + tenantId: 'tenant-strict', + provider: undefined, + openidId: undefined, + openidIssuer: undefined, + }), + ); + deps.getAppConfig.mockImplementation(async (options) => + options?.tenantId === 'tenant-strict' + ? makeConfig({ scope: 'remote_agent' }, { enabled: false }) + : makeConfig({ scope: undefined }, { enabled: true }), + ); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)(req as Request, res, mockNext); + + expect(deps.getAppConfig).toHaveBeenNthCalledWith(1, { baseOnly: true }); + expect(deps.getAppConfig).toHaveBeenNthCalledWith(2, { tenantId: 'tenant-strict' }); + expect(jwt.verify).toHaveBeenCalledTimes(2); + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(req.user).toBeUndefined(); + expect(mockNext).not.toHaveBeenCalled(); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + expect(deps.updateUser).not.toHaveBeenCalled(); + }); + + it('rejects OIDC auth when the resolved user tenant disables OIDC', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + const deps = makeDeps(); + deps.findUser = makeFindUser( + makeUser({ + tenantId: 'tenant-api-key-only', + provider: undefined, + openidId: undefined, + openidIssuer: undefined, + }), + ); + deps.getAppConfig.mockImplementation(async (options) => + options?.tenantId === 'tenant-api-key-only' + ? makeConfig({ enabled: false }, { enabled: true }) + : makeConfig({}, { enabled: true }), + ); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)(req as Request, res, mockNext); + + expect(deps.getAppConfig).toHaveBeenNthCalledWith(1, { baseOnly: true }); + expect(deps.getAppConfig).toHaveBeenNthCalledWith(2, { tenantId: 'tenant-api-key-only' }); + expect(jwt.verify).toHaveBeenCalledTimes(1); + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(req.user).toBeUndefined(); + expect(mockNext).not.toHaveBeenCalled(); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + expect(deps.updateUser).not.toHaveBeenCalled(); + }); + + it('allows exact and normalized configured issuers when verifying JWTs', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + const issuer = `${BASE_ISSUER}/`; + const deps = makeDeps(makeConfig({ issuer })); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect((jwt.verify as jest.Mock).mock.calls[0][2]).toEqual( + expect.objectContaining({ issuer: [issuer, BASE_ISSUER] }), + ); + expect(mockNext).toHaveBeenCalledWith(); + }); + + it('accepts case-insensitive Bearer auth scheme', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + const deps = makeDeps(); + const req = makeReq({ authorization: `bearer ${FAKE_TOKEN}` }); + + await createRemoteAgentAuth(deps)(req as Request, makeRes().res, mockNext); + + expect(jwt.verify).toHaveBeenCalledWith( + FAKE_TOKEN, + 'public-key', + expect.any(Object), + expect.any(Function), + ); + expect(req.user).toMatchObject({ id: 'uid123', email: 'agent@test.com' }); + expect(mockNext).toHaveBeenCalledWith(); + }); + + it('trims trailing whitespace from Bearer tokens', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + const deps = makeDeps(); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN} ` }); + + await createRemoteAgentAuth(deps)(req as Request, makeRes().res, mockNext); + + expect(jwt.verify).toHaveBeenCalledWith( + FAKE_TOKEN, + 'public-key', + expect.any(Object), + expect.any(Function), + ); + expect(req.user).toMatchObject({ id: 'uid123', email: 'agent@test.com' }); + expect(mockNext).toHaveBeenCalledWith(); + }); + + it('allows RSA, RSA-PSS, and ECDSA signing algorithms', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + const deps = makeDeps(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect((jwt.verify as jest.Mock).mock.calls[0][2]).toEqual( + expect.objectContaining({ + algorithms: expect.arrayContaining([ + 'RS256', + 'RS384', + 'RS512', + 'PS256', + 'PS384', + 'PS512', + 'ES256', + 'ES384', + 'ES512', + ]), + }), + ); + }); + + it('does not allow HMAC signing algorithms', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + const deps = makeDeps(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect((jwt.verify as jest.Mock).mock.calls[0][2]).toEqual( + expect.objectContaining({ + algorithms: expect.not.arrayContaining(['HS256', 'HS384', 'HS512']), + }), + ); + }); + + it('tries signing keys until a token without kid verifies', async () => { + const payload = { sub: 'sub123', email: 'agent@test.com' }; + setupOidcMocks(payload, null); + mockGetSigningKeys.mockResolvedValue([ + { kid: 'first-kid', getPublicKey: () => 'first-public-key' }, + { kid: 'second-kid', getPublicKey: () => 'second-public-key' }, + ]); + (jwt.verify as jest.Mock).mockImplementation( + (_t: string, key: string, _o: VerifyOptions, cb: JwtVerifyCallback) => { + if (key === 'first-public-key') { + cb(new Error('invalid signature')); + return; + } + cb(null, payload); + }, + ); + + const deps = makeDeps(); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }); + + await createRemoteAgentAuth(deps)(req as Request, makeRes().res, mockNext); + + expect(mockGetSigningKey).not.toHaveBeenCalled(); + expect(jwt.verify).toHaveBeenCalledTimes(2); + expect((jwt.verify as jest.Mock).mock.calls[0][1]).toBe('first-public-key'); + expect((jwt.verify as jest.Mock).mock.calls[1][1]).toBe('second-public-key'); + expect(req.user).toMatchObject({ id: 'uid123', email: 'agent@test.com' }); + expect(mockNext).toHaveBeenCalledWith(); + }); + + it('attaches federatedTokens with access_token and expires_at', async () => { + const exp = 1234567890; + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com', exp }); + const deps = makeDeps(); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }); + + await createRemoteAgentAuth(deps)(req as Request, makeRes().res, mockNext); + + expect((req.user as IUser).federatedTokens).toEqual({ + access_token: FAKE_TOKEN, + expires_at: exp, + }); + }); + + it('omits federatedTokens expires_at when exp is absent', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + const deps = makeDeps(); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }); + + await createRemoteAgentAuth(deps)(req as Request, makeRes().res, mockNext); + + expect((req.user as IUser).federatedTokens).toEqual({ + access_token: FAKE_TOKEN, + }); + }); + + it('falls back to apiKeyMiddleware when user is not found and apiKey is enabled', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + + const deps = makeDeps(makeConfig({}, { enabled: true })); + deps.findUser.mockResolvedValue(null); + const { res } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(deps.apiKeyMiddleware).toHaveBeenCalled(); + expect(logger.warn).toHaveBeenCalledWith( + expect.stringContaining('no matching LibreChat user'), + ); + }); + + it('returns 401 when user is not found and apiKey is disabled', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + + const deps = makeDeps(makeConfig({}, { enabled: false })); + deps.findUser.mockResolvedValue(null); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('does not resolve a colliding legacy openidId from a different issuer', async () => { + const issuer = 'https://issuer-b.example.com'; + setupOidcMocks({ sub: 'shared-sub', email: 'attacker@example.com' }); + + const deps = makeDeps(makeConfig({ issuer }, { enabled: false })); + deps.findUser = makeFindUser( + makeUser({ + email: 'victim@example.com', + openidId: 'shared-sub', + openidIssuer: undefined, + }), + ); + const req = makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)(req as Request, res, mockNext); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(req.user).toBeUndefined(); + expect(mockNext).not.toHaveBeenCalled(); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + }); + + it('returns 401 without user lookup when sub claim is missing', async () => { + setupOidcMocks({ email: 'agent@test.com' }); + + const deps = makeDeps(makeConfig({}, { enabled: true })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(findOpenIDUser).not.toHaveBeenCalled(); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('returns 401 without API key fallback when OpenID user resolution is rejected', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + + const deps = makeDeps(makeConfig({}, { enabled: true })); + deps.findUser + .mockResolvedValueOnce(null) + .mockResolvedValueOnce(makeUser({ provider: 'google', openidId: undefined })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + expect(mockNext).not.toHaveBeenCalled(); + }); + }); + + describe('when OIDC verification fails', () => { + beforeEach(() => { + (jwt.decode as jest.Mock).mockReturnValue({ header: { kid: 'kid' }, payload: {} }); + mockGetSigningKey.mockRejectedValue(new Error('Signing key not found')); + }); + + it('falls back to apiKeyMiddleware when apiKey is enabled', async () => { + const deps = makeDeps(makeConfig({}, { enabled: true })); + const { res } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(deps.apiKeyMiddleware).toHaveBeenCalled(); + expect(logger.error).not.toHaveBeenCalled(); + expect(logger.debug).toHaveBeenCalledWith( + expect.stringContaining('trying API key auth'), + expect.any(Error), + ); + }); + + it('returns 401 when apiKey is disabled', async () => { + const deps = makeDeps(makeConfig({}, { enabled: false })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(logger.error).toHaveBeenCalledWith( + expect.stringContaining('OIDC verification failed'), + expect.any(Error), + ); + }); + + it('returns 401 when JWT cannot be decoded', async () => { + (jwt.decode as jest.Mock).mockReturnValue(null); + const deps = makeDeps(makeConfig({}, { enabled: false })); + const { res, status } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: 'Bearer not.a.jwt' }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + }); + + it('returns 401 when verifier rejects a HMAC-signed token', async () => { + const payload = { sub: 'sub123', email: 'agent@test.com' }; + setupOidcMocks(payload); + (jwt.decode as jest.Mock).mockReturnValue({ + header: { alg: 'HS256', kid: 'test-kid' }, + payload, + }); + (jwt.verify as jest.Mock).mockImplementation( + (_t: string, _k: string, _options: VerifyOptions, cb: JwtVerifyCallback) => { + cb(new Error('invalid algorithm')); + }, + ); + + const deps = makeDeps(makeConfig({}, { enabled: false })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect((jwt.verify as jest.Mock).mock.calls[0][2]).toEqual( + expect.objectContaining({ + algorithms: expect.not.arrayContaining(['HS256', 'HS384', 'HS512']), + }), + ); + expect(mockNext).not.toHaveBeenCalled(); + }); + }); + + describe('unexpected errors', () => { + it('returns 500 when getAppConfig throws', async () => { + const deps = { + ...makeDeps(), + getAppConfig: jest.fn().mockRejectedValue(new Error('DB down')), + }; + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)(makeReq() as Request, res, mockNext); + + expect(status).toHaveBeenCalledWith(500); + expect(json).toHaveBeenCalledWith({ error: 'Internal server error' }); + expect(logger.error).toHaveBeenCalledWith( + expect.stringContaining('Unexpected error'), + expect.any(Error), + ); + }); + + it('returns 500 when findOpenIDUser throws', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + mockFindOpenIDUser.mockRejectedValue(new Error('DB error')); + + const deps = makeDeps(makeConfig({}, { enabled: true })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(500); + expect(json).toHaveBeenCalledWith({ error: 'Internal server error' }); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + }); + }); + + describe('JWKS URI resolution', () => { + beforeEach(() => { + setupOidcMocks({ sub: 'sub123', email: 'a@b.com' }); + }); + + it('uses jwksUri from config and skips discovery', async () => { + const deps = makeDeps( + makeConfig({ + jwksUri: 'https://explicit-1.example.com/jwks', + issuer: 'https://issuer-explicit-1.example.com', + }), + ); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockFetch).not.toHaveBeenCalled(); + expect(mockNext).toHaveBeenCalled(); + }); + + it('uses OPENID_JWKS_URL env var and skips discovery', async () => { + process.env.OPENID_JWKS_URL = 'https://env.example.com/jwks'; + const deps = makeDeps( + makeConfig({ jwksUri: undefined, issuer: 'https://issuer-env-1.example.com' }), + ); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockFetch).not.toHaveBeenCalled(); + expect(mockNext).toHaveBeenCalled(); + }); + + it('rejects insecure OPENID_JWKS_URL values outside localhost', async () => { + process.env.OPENID_JWKS_URL = 'http://env.example.com/jwks'; + const deps = makeDeps( + makeConfig( + { jwksUri: undefined, issuer: 'https://issuer-env-insecure.example.com' }, + { enabled: false }, + ), + ); + const { res, status } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(mockFetch).not.toHaveBeenCalled(); + expect(jwksRsa).not.toHaveBeenCalled(); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('rejects insecure issuer values outside localhost before JWKS resolution', async () => { + process.env.OPENID_JWKS_URL = 'https://env.example.com/jwks'; + const deps = makeDeps( + makeConfig( + { jwksUri: undefined, issuer: 'http://issuer-env-insecure.example.com' }, + { enabled: false }, + ), + ); + const { res, status } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(mockFetch).not.toHaveBeenCalled(); + expect(jwksRsa).not.toHaveBeenCalled(); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('allows localhost HTTP OPENID_JWKS_URL values for development', async () => { + process.env.OPENID_JWKS_URL = 'http://localhost:8080/jwks'; + const deps = makeDeps( + makeConfig({ jwksUri: undefined, issuer: 'http://localhost:8080/realms/test' }), + ); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockFetch).not.toHaveBeenCalled(); + expect(jwksRsa).toHaveBeenCalledWith( + expect.objectContaining({ jwksUri: 'http://localhost:8080/jwks' }), + ); + expect(mockNext).toHaveBeenCalled(); + }); + + it('fetches discovery document when jwksUri and env var are absent', async () => { + const issuer = 'https://issuer-discovery-1.example.com'; + + mockFetch.mockResolvedValue({ + ok: true, + json: async () => ({ jwks_uri: `${issuer}/protocol/openid-connect/certs` }), + }); + + const deps = makeDeps(makeConfig({ jwksUri: undefined, issuer })); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockFetch).toHaveBeenCalledWith( + `${issuer}/.well-known/openid-configuration`, + expect.objectContaining({ signal: expect.any(Object) }), + ); + expect(mockNext).toHaveBeenCalled(); + }); + + it('normalizes discovery document issuer URL for discovery and issuer validation', async () => { + const issuer = 'https://issuer-discovery-url.example.com/.well-known/openid-configuration'; + const normalizedIssuer = 'https://issuer-discovery-url.example.com'; + + mockFetch.mockResolvedValue({ + ok: true, + json: async () => ({ jwks_uri: `${normalizedIssuer}/protocol/openid-connect/certs` }), + }); + + const deps = makeDeps(makeConfig({ jwksUri: undefined, issuer })); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockFetch).toHaveBeenCalledWith( + `${normalizedIssuer}/.well-known/openid-configuration`, + expect.objectContaining({ signal: expect.any(Object) }), + ); + expect((jwt.verify as jest.Mock).mock.calls[0][2]).toEqual( + expect.objectContaining({ issuer: [issuer, normalizedIssuer] }), + ); + expect(mockNext).toHaveBeenCalled(); + }); + + it('rejects insecure JWKS URIs returned by discovery', async () => { + const issuer = 'https://issuer-discovery-insecure.example.com'; + + mockFetch.mockResolvedValue({ + ok: true, + json: async () => ({ jwks_uri: 'http://issuer-discovery-insecure.example.com/jwks' }), + }); + + const deps = makeDeps(makeConfig({ jwksUri: undefined, issuer }, { enabled: false })); + const { res, status } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(jwksRsa).not.toHaveBeenCalled(); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('uses a proxy agent for discovery when PROXY is set', async () => { + process.env.PROXY = 'http://proxy.example.com'; + const issuer = 'https://issuer-proxy.example.com'; + + mockFetch.mockResolvedValue({ + ok: true, + json: async () => ({ jwks_uri: `${issuer}/protocol/openid-connect/certs` }), + }); + + const deps = makeDeps(makeConfig({ jwksUri: undefined, issuer })); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockProxyAgent).toHaveBeenCalledWith('http://proxy.example.com'); + expect(mockFetch).toHaveBeenCalledWith( + `${issuer}/.well-known/openid-configuration`, + expect.objectContaining({ dispatcher: { proxy: 'http://proxy.example.com' } }), + ); + }); + + it('caches JWKS clients by resolved URI', async () => { + process.env.OPENID_JWKS_URL = 'https://env-one.example.com/jwks'; + const deps = makeDeps( + makeConfig({ jwksUri: undefined, issuer: 'https://issuer-env-cache.example.com' }), + ); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + process.env.OPENID_JWKS_URL = 'https://env-two.example.com/jwks'; + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(jwksRsa).toHaveBeenCalledWith( + expect.objectContaining({ jwksUri: 'https://env-one.example.com/jwks' }), + ); + expect(jwksRsa).toHaveBeenCalledWith( + expect.objectContaining({ jwksUri: 'https://env-two.example.com/jwks' }), + ); + }); + + it('honors disabled JWKS caching', async () => { + process.env.OPENID_JWKS_URL_CACHE_ENABLED = 'false'; + const deps = makeDeps( + makeConfig({ + jwksUri: 'https://cache-disabled.example.com/jwks', + issuer: 'https://issuer-cache-disabled.example.com', + }), + ); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(jwksRsa).toHaveBeenCalledTimes(2); + }); + + it('evicts the oldest JWKS client entry when the cache exceeds its limit', async () => { + const runRequest = async (index: number) => { + const deps = makeDeps( + makeConfig({ + jwksUri: `https://cache-${index}.example.com/jwks`, + issuer: `https://issuer-cache-${index}.example.com`, + }), + ); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + }; + + for (let i = 0; i < 101; i++) { + await runRequest(i); + } + await runRequest(0); + + expect(jwksRsa).toHaveBeenCalledTimes(102); + expect(jwksRsa).toHaveBeenLastCalledWith( + expect.objectContaining({ jwksUri: 'https://cache-0.example.com/jwks' }), + ); + }); + + it('prunes expired JWKS client entries before evicting valid entries', async () => { + const nowSpy = jest.spyOn(Date, 'now').mockReturnValue(0); + const runRequest = async (key: string) => { + const deps = makeDeps( + makeConfig({ + jwksUri: `https://cache-prune-${key}.example.com/jwks`, + issuer: `https://issuer-cache-prune-${key}.example.com`, + }), + ); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + }; + + try { + mockMath.mockReturnValue(120000); + await runRequest('keeper'); + + mockMath.mockReturnValue(1000); + for (let i = 0; i < 99; i++) { + await runRequest(`expired-${i}`); + } + + nowSpy.mockReturnValue(2000); + mockMath.mockReturnValue(60000); + await runRequest('new'); + + expect(jwksRsa).toHaveBeenCalledTimes(101); + + await runRequest('keeper'); + + expect(jwksRsa).toHaveBeenCalledTimes(101); + } finally { + nowSpy.mockRestore(); + } + }); + + it('aborts discovery when the timeout expires', async () => { + jest.useFakeTimers(); + + try { + mockFetch.mockImplementation( + (_url: string, init?: RequestInit) => + new Promise((_resolve, reject) => { + init?.signal?.addEventListener('abort', () => reject(new Error('aborted'))); + }), + ); + + const deps = makeDeps( + makeConfig( + { jwksUri: undefined, issuer: 'https://issuer-discovery-timeout.example.com' }, + { enabled: false }, + ), + ); + const { res, status } = makeRes(); + const request = createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + await Promise.resolve(); + jest.advanceTimersByTime(10000); + await request; + + expect(status).toHaveBeenCalledWith(401); + expect(mockNext).not.toHaveBeenCalled(); + } finally { + jest.useRealTimers(); + } + }); + + it('returns 401 when discovery returns non-ok response', async () => { + mockFetch.mockResolvedValue({ ok: false, status: 404, statusText: 'Not Found' }); + + const deps = makeDeps( + makeConfig( + { jwksUri: undefined, issuer: 'https://issuer-discovery-fail-1.example.com' }, + { enabled: false }, + ), + ); + const { res, status } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + }); + + it('returns 401 when discovery response is missing jwks_uri field', async () => { + mockFetch.mockResolvedValue({ ok: true, json: async () => ({}) }); + + const deps = makeDeps( + makeConfig( + { jwksUri: undefined, issuer: 'https://issuer-missing-jwks-1.example.com' }, + { enabled: false }, + ), + ); + const { res, status } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + }); + }); + + describe('email claim resolution', () => { + async function captureEmailArg(claims: JwtPayload): Promise { + setupOidcMocks(claims); + + const deps = makeDeps(); + deps.findUser + .mockResolvedValueOnce(null) + .mockResolvedValueOnce(makeUser({ email: getOpenIdEmail(claims), openidId: claims.sub })); + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + return (deps.findUser.mock.calls[1]?.[0] as { email?: string } | undefined)?.email; + } + + it('uses email claim', async () => { + expect(await captureEmailArg({ sub: 's1', email: 'user@example.com' })).toBe( + 'user@example.com', + ); + }); + + it('falls back to preferred_username when email is absent', async () => { + expect(await captureEmailArg({ sub: 's2', preferred_username: 'agent-user' })).toBe( + 'agent-user', + ); + }); + + it('falls back to preferred_username when email is empty', async () => { + expect( + await captureEmailArg({ + sub: 's2-empty', + email: '', + preferred_username: 'agent-user', + }), + ).toBe('agent-user'); + }); + + it('falls back to upn when email and preferred_username are absent', async () => { + expect(await captureEmailArg({ sub: 's3', upn: 'upn@corp.com' })).toBe('upn@corp.com'); + }); + + it('uses OPENID_EMAIL_CLAIM when configured', async () => { + process.env.OPENID_EMAIL_CLAIM = 'custom_identifier'; + + expect( + await captureEmailArg({ + sub: 's4', + email: 'user@example.com', + custom_identifier: 'agent@corp.example.com', + }), + ).toBe('agent@corp.example.com'); + }); + }); + + describe('update user and migration scenarios', () => { + it('persists openidId binding when migration is needed', async () => { + const mockUpdateUser = jest.fn().mockResolvedValue(undefined); + setupOidcMocks({ sub: 'sub-new', email: 'existing@test.com' }); + + const deps = { ...makeDeps(), updateUser: mockUpdateUser }; + deps.findUser.mockResolvedValueOnce(null).mockResolvedValueOnce( + makeUser({ + email: 'existing@test.com', + openidId: undefined, + provider: undefined, + role: 'user', + }), + ); + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockUpdateUser).toHaveBeenCalledWith( + mockUser.id, + expect.objectContaining({ + provider: 'openid', + openidId: 'sub-new', + openidIssuer: BASE_ISSUER, + }), + ); + expect(mockNext).toHaveBeenCalled(); + }); + + it('returns 500 when migration update fails', async () => { + const mockUpdateUser = jest.fn().mockRejectedValue(new Error('DB write failed')); + setupOidcMocks({ sub: 'sub-new', email: 'existing@test.com' }); + + const deps = { ...makeDeps(makeConfig({}, { enabled: true })), updateUser: mockUpdateUser }; + deps.findUser.mockResolvedValueOnce(null).mockResolvedValueOnce( + makeUser({ + email: 'existing@test.com', + openidId: undefined, + provider: undefined, + role: 'user', + }), + ); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(500); + expect(json).toHaveBeenCalledWith({ error: 'Internal server error' }); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('does not call updateUser when migration is false and role exists', async () => { + const mockUpdateUser = jest.fn(); + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + + const deps = { ...makeDeps(), updateUser: mockUpdateUser }; + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockUpdateUser).not.toHaveBeenCalled(); + }); + }); + + describe('scope validation', () => { + it('returns 401 when required scope is missing from token', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com', scope: 'openid profile' }); + + const deps = makeDeps(makeConfig({ scope: 'remote_agent' }, { enabled: false })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + }); + + it('does not fall back to apiKeyMiddleware when a verified token is missing scope', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com', scope: 'openid profile' }); + + const deps = makeDeps(makeConfig({ scope: 'remote_agent' }, { enabled: true })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(deps.apiKeyMiddleware).not.toHaveBeenCalled(); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('passes when required scope is present in token', async () => { + setupOidcMocks({ + sub: 'sub123', + email: 'agent@test.com', + scope: 'openid profile remote_agent', + }); + + const deps = makeDeps(makeConfig({ scope: 'remote_agent' })); + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockNext).toHaveBeenCalled(); + }); + + it('passes when scp is an array containing the required scope', async () => { + setupOidcMocks({ + sub: 'sub123', + email: 'agent@test.com', + scp: ['openid', 'remote_agent'], + }); + + const deps = makeDeps(makeConfig({ scope: 'remote_agent' })); + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockNext).toHaveBeenCalled(); + }); + + it('passes when all configured scopes are present', async () => { + setupOidcMocks({ + sub: 'sub123', + email: 'agent@test.com', + scp: ['openid', 'remote_agent', 'admin'], + }); + + const deps = makeDeps(makeConfig({ scope: 'remote_agent admin' })); + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockNext).toHaveBeenCalled(); + }); + + it('returns 401 when any configured scope is missing', async () => { + setupOidcMocks({ + sub: 'sub123', + email: 'agent@test.com', + scp: ['openid', 'remote_agent'], + }); + + const deps = makeDeps(makeConfig({ scope: 'remote_agent admin' }, { enabled: false })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('treats comma-separated configured scopes as one invalid scope token', async () => { + setupOidcMocks({ + sub: 'sub123', + email: 'agent@test.com', + scope: 'remote_agent admin', + }); + + const deps = makeDeps(makeConfig({ scope: 'remote_agent,admin' }, { enabled: false })); + const { res, status, json } = makeRes(); + + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + res, + mockNext, + ); + + expect(status).toHaveBeenCalledWith(401); + expect(json).toHaveBeenCalledWith({ error: 'Unauthorized' }); + expect(mockNext).not.toHaveBeenCalled(); + }); + + it('passes when scope is not configured (backward compat)', async () => { + setupOidcMocks({ sub: 'sub123', email: 'agent@test.com' }); + + const deps = makeDeps(makeConfig({ scope: undefined })); + await createRemoteAgentAuth(deps)( + makeReq({ authorization: `Bearer ${FAKE_TOKEN}` }) as Request, + makeRes().res, + mockNext, + ); + + expect(mockNext).toHaveBeenCalled(); + }); + }); +}); diff --git a/packages/api/src/middleware/remoteAgentAuth.ts b/packages/api/src/middleware/remoteAgentAuth.ts new file mode 100644 index 000000000000..ab06583c4149 --- /dev/null +++ b/packages/api/src/middleware/remoteAgentAuth.ts @@ -0,0 +1,562 @@ +import jwt from 'jsonwebtoken'; +import jwksRsa from 'jwks-rsa'; +import { HttpsProxyAgent } from 'https-proxy-agent'; +import { ProxyAgent, fetch as undiciFetch } from 'undici'; +import { getTenantId, logger } from '@librechat/data-schemas'; +import { SystemRoles, isRemoteOidcUrlAllowed } from 'librechat-data-provider'; +import type { RequestHandler, Request, Response, NextFunction } from 'express'; +import type { AppConfig, IUser, UserMethods } from '@librechat/data-schemas'; +import type { Algorithm, JwtPayload, VerifyOptions } from 'jsonwebtoken'; +import type { TAgentsEndpoint } from 'librechat-data-provider'; +import type { RequestInit } from 'undici'; +import type { GetAppConfigOptions } from '../app/service'; +import { findOpenIDUser, getOpenIdEmail, normalizeOpenIdIssuer } from '../auth/openid'; +import { isEnabled, math } from '~/utils'; + +export interface RemoteAgentAuthDeps { + apiKeyMiddleware: RequestHandler; + findUser: UserMethods['findUser']; + updateUser: UserMethods['updateUser']; + getAppConfig: (options?: GetAppConfigOptions) => Promise; +} + +type OidcConfig = NonNullable< + NonNullable['auth']>['oidc'] +>; + +type AgentAuthConfig = NonNullable['auth']>; +type EnabledOidcConfig = OidcConfig & { issuer: string }; +type JwksCacheOptions = { + enabled: boolean; + maxAge: number; +}; +type CacheEntry = { + expiresAt: number; + promise: Promise; +}; +type ScopeClaim = string | string[] | undefined; +type UserResolution = + | { status: 'resolved'; user: IUser; updateData: Partial } + | { status: 'missing' } + | { status: 'rejected'; error: string }; + +const OIDC_DISCOVERY_TIMEOUT_MS = 10000; +const MAX_JWKS_CACHE_ENTRIES = 100; +const JWT_ALGORITHMS: Algorithm[] = [ + 'RS256', + 'RS384', + 'RS512', + 'PS256', + 'PS384', + 'PS512', + 'ES256', + 'ES384', + 'ES512', +]; +const jwksUriCache = new Map>(); +const jwksClientCache = new Map>(); + +export function clearRemoteAgentAuthCache(): void { + jwksUriCache.clear(); + jwksClientCache.clear(); +} + +function pruneExpiredEntries(cache: Map>): void { + const now = Date.now(); + for (const [key, entry] of cache) { + if (entry.expiresAt <= now) cache.delete(key); + } +} + +function setCacheEntry( + cache: Map>, + key: string, + entry: CacheEntry, +): void { + pruneExpiredEntries(cache); + + while (cache.size >= MAX_JWKS_CACHE_ENTRIES) { + const oldestKey = cache.keys().next().value; + if (oldestKey == null) break; + cache.delete(oldestKey); + } + + cache.set(key, entry); +} + +function extractBearer(authHeader: string | undefined): string | null { + const match = authHeader?.match(/^Bearer\s+(\S+)\s*$/i); + return match?.[1] ?? null; +} + +function splitScopes(scopes: string): string[] { + return scopes.trim().split(/\s+/).filter(Boolean); +} + +function getTokenScopes(scopeClaim: ScopeClaim): string[] { + if (Array.isArray(scopeClaim)) return scopeClaim.flatMap(splitScopes); + return scopeClaim ? splitScopes(scopeClaim) : []; +} + +function hasRequiredScopes(requiredScope: string | undefined, payload: JwtPayload): boolean { + if (!requiredScope) return true; + + const requiredScopes = splitScopes(requiredScope); + if (requiredScopes.length === 0) return true; + + const rawScope = (payload['scp'] ?? payload['scope']) as ScopeClaim; + const tokenScopes = getTokenScopes(rawScope); + return requiredScopes.every((scope) => tokenScopes.includes(scope)); +} + +function getJwksCacheOptions(): JwksCacheOptions { + return { + enabled: process.env.OPENID_JWKS_URL_CACHE_ENABLED + ? isEnabled(process.env.OPENID_JWKS_URL_CACHE_ENABLED) + : true, + maxAge: Math.max(math(process.env.OPENID_JWKS_URL_CACHE_TIME, 60000), 0), + }; +} + +function buildDiscoveryOptions(controller: AbortController): RequestInit { + const options: RequestInit = { signal: controller.signal }; + + if (process.env.PROXY) { + options.dispatcher = new ProxyAgent(process.env.PROXY); + } + + return options; +} + +function ensureRemoteOidcUrlAllowed(value: string, label: string): string { + if (isRemoteOidcUrlAllowed(value)) return value; + throw new Error(`${label} must use https:// unless targeting localhost`); +} + +async function discoverJwksUri(issuer: string): Promise { + const normalizedIssuer = normalizeOpenIdIssuer(ensureRemoteOidcUrlAllowed(issuer, 'OIDC issuer')); + if (!normalizedIssuer) throw new Error('OIDC issuer is required'); + + const discoveryUrl = `${normalizedIssuer}/.well-known/openid-configuration`; + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), OIDC_DISCOVERY_TIMEOUT_MS); + + try { + const res = await undiciFetch(discoveryUrl, buildDiscoveryOptions(controller)); + if (!res.ok) throw new Error(`OIDC discovery failed: ${res.status} ${res.statusText}`); + + const meta = (await res.json()) as { jwks_uri?: string }; + if (!meta.jwks_uri) throw new Error('OIDC discovery response missing jwks_uri'); + + return ensureRemoteOidcUrlAllowed(meta.jwks_uri, 'OIDC JWKS URI'); + } finally { + clearTimeout(timeout); + } +} + +async function resolveJwksUri( + oidcConfig: EnabledOidcConfig, + cacheOptions: JwksCacheOptions, +): Promise { + if (oidcConfig.jwksUri) return ensureRemoteOidcUrlAllowed(oidcConfig.jwksUri, 'OIDC JWKS URI'); + if (process.env.OPENID_JWKS_URL) { + return ensureRemoteOidcUrlAllowed(process.env.OPENID_JWKS_URL, 'OIDC JWKS URI'); + } + + if (!cacheOptions.enabled) return discoverJwksUri(oidcConfig.issuer); + + const cacheKey = oidcConfig.issuer; + const cached = jwksUriCache.get(cacheKey); + if (cached != null && cached.expiresAt > Date.now()) return cached.promise; + if (cached != null) jwksUriCache.delete(cacheKey); + + const promise = discoverJwksUri(oidcConfig.issuer).catch((err) => { + jwksUriCache.delete(cacheKey); + throw err; + }); + + setCacheEntry(jwksUriCache, cacheKey, { + promise, + expiresAt: Date.now() + cacheOptions.maxAge, + }); + return promise; +} + +function buildJwksClient(uri: string, cacheOptions: JwksCacheOptions): jwksRsa.JwksClient { + const options: jwksRsa.Options = { + cache: cacheOptions.enabled, + cacheMaxAge: cacheOptions.maxAge, + jwksUri: uri, + }; + + if (process.env.PROXY) { + options.requestAgent = new HttpsProxyAgent(process.env.PROXY); + } + + return jwksRsa(options); +} + +async function getJwksClient(oidcConfig: EnabledOidcConfig): Promise { + const cacheOptions = getJwksCacheOptions(); + const uri = await resolveJwksUri(oidcConfig, cacheOptions); + + if (!cacheOptions.enabled) return buildJwksClient(uri, cacheOptions); + + const cacheKey = uri; + const cached = jwksClientCache.get(cacheKey); + if (cached != null && cached.expiresAt > Date.now()) return cached.promise; + if (cached != null) jwksClientCache.delete(cacheKey); + + let client: jwksRsa.JwksClient; + try { + client = buildJwksClient(uri, cacheOptions); + } catch (err) { + jwksClientCache.delete(cacheKey); + throw err; + } + + const promise = Promise.resolve(client); + + setCacheEntry(jwksClientCache, cacheKey, { + promise, + expiresAt: Date.now() + cacheOptions.maxAge, + }); + return promise; +} + +function getVerifyOptions(oidcConfig: EnabledOidcConfig): VerifyOptions { + const normalizedIssuer = normalizeOpenIdIssuer(oidcConfig.issuer); + const issuer = + normalizedIssuer && normalizedIssuer !== oidcConfig.issuer + ? [oidcConfig.issuer, normalizedIssuer] + : oidcConfig.issuer; + + return { + algorithms: JWT_ALGORITHMS, + issuer, + ...(oidcConfig.audience ? { audience: oidcConfig.audience } : {}), + }; +} + +function getConfigOptions(req: Request): GetAppConfigOptions { + const user = req.user as { tenantId?: string } | undefined; + const tenantId = user?.tenantId ?? getTenantId(); + + if (tenantId) return { tenantId }; + return { baseOnly: true }; +} + +function getUserConfigOptions(user: IUser): GetAppConfigOptions { + if (user.tenantId) return { tenantId: user.tenantId }; + return { baseOnly: true }; +} + +function isResolvedUserConfigScope(initialOptions: GetAppConfigOptions, user: IUser): boolean { + const userOptions = getUserConfigOptions(user); + return ( + initialOptions.tenantId === userOptions.tenantId && + initialOptions.baseOnly === userOptions.baseOnly + ); +} + +function getRemoteAuthConfig(config: AppConfig): AgentAuthConfig | undefined { + return config.endpoints?.agents?.remoteApi?.auth; +} + +function getEnabledOidcConfig( + authConfig: AgentAuthConfig | undefined, +): EnabledOidcConfig | undefined { + if (authConfig?.oidc?.enabled !== true) return undefined; + if (!authConfig.oidc.issuer) throw new Error('OIDC issuer is required when OIDC auth is enabled'); + return { ...authConfig.oidc, issuer: authConfig.oidc.issuer }; +} + +function isApiKeyEnabled(config: AppConfig): boolean { + return getRemoteAuthConfig(config)?.apiKey?.enabled !== false; +} + +async function enforceApiKeyTenantPolicy( + req: Request, + res: Response, + next: NextFunction, + getAppConfig: RemoteAgentAuthDeps['getAppConfig'], +): Promise { + const config = await getAppConfig(getConfigOptions(req)); + + if (!isApiKeyEnabled(config)) { + logger.warn('[remoteAgentAuth] API key rejected by resolved tenant auth policy'); + res.status(401).json({ error: 'Unauthorized' }); + return; + } + + next(); +} + +async function runApiKeyAuth( + req: Request, + res: Response, + next: NextFunction, + apiKeyMiddleware: RequestHandler, + getAppConfig: RemoteAgentAuthDeps['getAppConfig'], +): Promise { + let postAuth: Promise | undefined; + + const wrappedNext: NextFunction = (err?: unknown) => { + if (err != null) { + next(err); + return; + } + + postAuth = enforceApiKeyTenantPolicy(req, res, next, getAppConfig); + }; + + await Promise.resolve(apiKeyMiddleware(req, res, wrappedNext)); + if (postAuth) await postAuth; +} + +async function enforceOidcTenantPolicy( + token: string, + user: IUser, + initialOptions: GetAppConfigOptions, + getAppConfig: RemoteAgentAuthDeps['getAppConfig'], +): Promise { + if (isResolvedUserConfigScope(initialOptions, user)) return true; + + const config = await getAppConfig(getUserConfigOptions(user)); + const oidcConfig = getEnabledOidcConfig(getRemoteAuthConfig(config)); + if (!oidcConfig) { + logger.warn('[remoteAgentAuth] OIDC rejected by resolved tenant auth policy'); + return false; + } + + try { + const payload = await verifyOidcBearer(token, oidcConfig); + if (hasRequiredScopes(oidcConfig.scope, payload)) return true; + logger.warn( + `[remoteAgentAuth] Token missing resolved tenant required scope: ${oidcConfig.scope}`, + ); + } catch (err) { + logger.warn('[remoteAgentAuth] OIDC token rejected by resolved tenant auth policy:', err); + } + + return false; +} + +function verifyJwt( + token: string, + signingKey: jwksRsa.SigningKey, + oidcConfig: EnabledOidcConfig, +): Promise { + return new Promise((resolve, reject) => { + jwt.verify(token, signingKey.getPublicKey(), getVerifyOptions(oidcConfig), (err, payload) => { + if (err != null || payload == null) return reject(err ?? new Error('Empty payload')); + if (typeof payload === 'string') return reject(new Error('Invalid JWT payload')); + resolve(payload); + }); + }); +} + +async function verifyWithSigningKeys( + token: string, + signingKeys: jwksRsa.SigningKey[], + oidcConfig: EnabledOidcConfig, +): Promise { + let lastError: Error | null = null; + + for (const signingKey of signingKeys) { + try { + return await verifyJwt(token, signingKey, oidcConfig); + } catch (err) { + lastError = err instanceof Error ? err : new Error(String(err)); + } + } + + throw lastError ?? new Error('No signing keys in JWKS'); +} + +async function verifyOidcBearer(token: string, oidcConfig: EnabledOidcConfig): Promise { + ensureRemoteOidcUrlAllowed(oidcConfig.issuer, 'OIDC issuer'); + + const decoded = jwt.decode(token, { complete: true }); + if (decoded == null || typeof decoded === 'string') throw new Error('Invalid JWT: cannot decode'); + + const kid = typeof decoded.header?.kid === 'string' ? decoded.header.kid : undefined; + const client = await getJwksClient(oidcConfig); + + if (kid != null) { + const signingKey = await client.getSigningKey(kid); + return verifyJwt(token, signingKey, oidcConfig); + } + + return verifyWithSigningKeys(token, await client.getSigningKeys(), oidcConfig); +} + +async function resolveUser( + token: string, + payload: JwtPayload, + oidcConfig: EnabledOidcConfig, + findUser: UserMethods['findUser'], +): Promise { + if (typeof payload.sub !== 'string' || payload.sub.trim() === '') { + return { status: 'rejected', error: 'missing_sub_claim' }; + } + + const { user, error, migration } = await findOpenIDUser({ + findUser, + email: getOpenIdEmail(payload, 'remoteAgentAuth'), + openidId: payload.sub, + openidIssuer: oidcConfig.issuer, + idOnTheSource: payload['oid'] as string | undefined, + strategyName: 'remoteAgentAuth', + }); + + if (error != null) return { status: 'rejected', error }; + if (user == null) return { status: 'missing' }; + + user.id = String(user._id); + + const updateData: Partial = {}; + + if (migration) { + updateData.provider = 'openid'; + updateData.openidId = payload.sub; + updateData.openidIssuer = normalizeOpenIdIssuer(oidcConfig.issuer); + } + + if (!user.role) { + user.role = SystemRoles.USER; + updateData.role = SystemRoles.USER; + } + + user.federatedTokens = { + access_token: token, + ...(payload.exp != null ? { expires_at: payload.exp } : {}), + }; + return { status: 'resolved', user, updateData }; +} + +/** + * Factory for Remote Agent API auth middleware. + * + * Validates Bearer tokens against configured OIDC issuer via JWKS, + * falling back to API key auth when enabled. Stateless — no session dependency. + * + * ```yaml + * endpoints: + * agents: + * remoteApi: + * auth: + * apiKey: + * enabled: false + * oidc: + * enabled: true + * issuer: + * jwksUri: + * audience: + * scope: + * ``` + */ +export function createRemoteAgentAuth({ + apiKeyMiddleware, + findUser, + updateUser, + getAppConfig, +}: RemoteAgentAuthDeps): RequestHandler { + return async (req: Request, res: Response, next: NextFunction) => { + try { + const initialConfigOptions = getConfigOptions(req); + const config = await getAppConfig(initialConfigOptions); + const authConfig = getRemoteAuthConfig(config); + const apiKeyEnabled = isApiKeyEnabled(config); + + if (authConfig?.oidc?.enabled !== true) { + if (apiKeyEnabled) { + await runApiKeyAuth(req, res, next, apiKeyMiddleware, getAppConfig); + return; + } + res.status(401).json({ error: 'Authentication required' }); + return; + } + + if (!authConfig.oidc.issuer) { + logger.error('[remoteAgentAuth] OIDC issuer is required when OIDC auth is enabled'); + res.status(500).json({ error: 'Internal server error' }); + return; + } + + const oidcConfig = getEnabledOidcConfig(authConfig); + if (!oidcConfig) throw new Error('OIDC issuer is required when OIDC auth is enabled'); + + const token = extractBearer(req.headers.authorization); + if (token == null) { + if (apiKeyEnabled) { + await runApiKeyAuth(req, res, next, apiKeyMiddleware, getAppConfig); + return; + } + res.status(401).json({ error: 'Bearer token required' }); + return; + } + + let payload: JwtPayload; + + try { + payload = await verifyOidcBearer(token, oidcConfig); + if (!hasRequiredScopes(oidcConfig.scope, payload)) { + logger.warn(`[remoteAgentAuth] Token missing required scope: ${oidcConfig.scope}`); + res.status(401).json({ error: 'Unauthorized' }); + return; + } + } catch (oidcErr) { + if (apiKeyEnabled) { + logger.debug('[remoteAgentAuth] OIDC verification failed; trying API key auth:', oidcErr); + await runApiKeyAuth(req, res, next, apiKeyMiddleware, getAppConfig); + return; + } + logger.error('[remoteAgentAuth] OIDC verification failed:', oidcErr); + res.status(401).json({ error: 'Unauthorized' }); + return; + } + + const userResolution = await resolveUser(token, payload, oidcConfig, findUser); + + if (userResolution.status === 'rejected') { + logger.warn(`[remoteAgentAuth] OpenID user rejected: ${userResolution.error}`); + res.status(401).json({ error: 'Unauthorized' }); + return; + } + + if (userResolution.status === 'missing') { + logger.warn('[remoteAgentAuth] OIDC token valid but no matching LibreChat user'); + if (apiKeyEnabled) { + await runApiKeyAuth(req, res, next, apiKeyMiddleware, getAppConfig); + return; + } + res.status(401).json({ error: 'Unauthorized' }); + return; + } + + if ( + !(await enforceOidcTenantPolicy( + token, + userResolution.user, + initialConfigOptions, + getAppConfig, + )) + ) { + res.status(401).json({ error: 'Unauthorized' }); + return; + } + + if (Object.keys(userResolution.updateData).length > 0) { + await updateUser(userResolution.user.id, userResolution.updateData); + } + + req.user = userResolution.user; + return next(); + } catch (err) { + logger.error('[remoteAgentAuth] Unexpected error', err); + res.status(500).json({ error: 'Internal server error' }); + return; + } + }; +} diff --git a/packages/data-provider/specs/config-schemas.spec.ts b/packages/data-provider/specs/config-schemas.spec.ts index d0b5dbb591f0..94f3cc8edb69 100644 --- a/packages/data-provider/specs/config-schemas.spec.ts +++ b/packages/data-provider/specs/config-schemas.spec.ts @@ -352,6 +352,104 @@ describe('agentsEndpointSchema', () => { expect(result.data).not.toHaveProperty('baseURL'); } }); + + it('allows explicitly disabled remote OIDC auth without issuer', () => { + const result = agentsEndpointSchema.safeParse({ + remoteApi: { + auth: { + oidc: { + enabled: false, + }, + }, + }, + }); + + expect(result.success).toBe(true); + }); + + it('requires a valid issuer when remote OIDC auth is enabled', () => { + const missingIssuer = agentsEndpointSchema.safeParse({ + remoteApi: { + auth: { + oidc: { + enabled: true, + }, + }, + }, + }); + const invalidIssuer = agentsEndpointSchema.safeParse({ + remoteApi: { + auth: { + oidc: { + enabled: true, + issuer: 'my-realm', + }, + }, + }, + }); + + expect(missingIssuer.success).toBe(false); + expect(invalidIssuer.success).toBe(false); + }); + + it('requires HTTPS remote OIDC issuer and JWKS URLs outside localhost', () => { + const insecureIssuer = agentsEndpointSchema.safeParse({ + remoteApi: { + auth: { + oidc: { + enabled: true, + issuer: 'http://auth.example.com', + }, + }, + }, + }); + const insecureJwksUri = agentsEndpointSchema.safeParse({ + remoteApi: { + auth: { + oidc: { + enabled: true, + issuer: 'https://auth.example.com', + jwksUri: 'http://auth.example.com/jwks', + }, + }, + }, + }); + + expect(insecureIssuer.success).toBe(false); + expect(insecureJwksUri.success).toBe(false); + }); + + it('allows localhost HTTP remote OIDC URLs for development', () => { + const result = agentsEndpointSchema.safeParse({ + remoteApi: { + auth: { + oidc: { + enabled: true, + issuer: 'http://localhost:8080/realms/test', + jwksUri: 'http://127.0.0.1:8080/realms/test/protocol/openid-connect/certs', + }, + }, + }, + }); + + expect(result.success).toBe(true); + }); + + it('requires space-separated remote OIDC scopes', () => { + const result = agentsEndpointSchema.safeParse({ + remoteApi: { + auth: { + oidc: { + enabled: true, + issuer: 'https://auth.example.com', + scope: 'remote_agent,admin', + }, + }, + }, + }); + + expect(result.success).toBe(false); + }); }); describe('azureEndpointSchema', () => { diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 72228d547f51..ca6ec7cdaeb5 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -420,6 +420,61 @@ export const defaultAgentCapabilities = [ AgentCapabilities.ocr, ]; +const LOCAL_REMOTE_OIDC_HOSTS = new Set(['localhost', '127.0.0.1', '[::1]']); + +export function isRemoteOidcUrlAllowed(value: string): boolean { + try { + const url = new URL(value); + if (url.protocol === 'https:') return true; + if (url.protocol !== 'http:') return false; + + const hostname = url.hostname.toLowerCase(); + return LOCAL_REMOTE_OIDC_HOSTS.has(hostname) || hostname.endsWith('.localhost'); + } catch { + return false; + } +} + +const remoteApiOidcUrlSchema = z + .string() + .url() + .refine(isRemoteOidcUrlAllowed, { message: 'must use https:// unless targeting localhost' }); + +const remoteApiOidcScopeSchema = z.string().refine((scope) => !scope.includes(','), { + message: 'scopes must be space-separated', +}); + +const remoteApiOidcSchema = z + .object({ + enabled: z.boolean().default(false), + issuer: remoteApiOidcUrlSchema.optional(), + audience: z.string().optional(), + jwksUri: remoteApiOidcUrlSchema.optional(), + scope: remoteApiOidcScopeSchema.optional(), + }) + .superRefine((oidc, ctx) => { + if (oidc.enabled === true && !oidc.issuer) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ['issuer'], + message: 'issuer is required when OIDC auth is enabled', + }); + } + }); + +const remoteApiAuthSchema = z.object({ + apiKey: z + .object({ + enabled: z.boolean().default(true), + }) + .optional(), + oidc: remoteApiOidcSchema.optional(), +}); + +const remoteApiSchema = z.object({ + auth: remoteApiAuthSchema.optional(), +}); + export const agentsEndpointSchema = baseEndpointSchema .omit({ baseURL: true }) .merge( @@ -436,6 +491,7 @@ export const agentsEndpointSchema = baseEndpointSchema .array(z.nativeEnum(AgentCapabilities)) .optional() .default(defaultAgentCapabilities), + remoteApi: remoteApiSchema.optional(), }), ) .default({ diff --git a/packages/data-schemas/src/methods/user.methods.spec.ts b/packages/data-schemas/src/methods/user.methods.spec.ts index 90f9100d1dbc..091cdf4f763e 100644 --- a/packages/data-schemas/src/methods/user.methods.spec.ts +++ b/packages/data-schemas/src/methods/user.methods.spec.ts @@ -37,6 +37,37 @@ beforeEach(async () => { await mongoose.connection.dropDatabase(); }); +describe('User schema indexes', () => { + test('should allow the same OpenID subject from different issuers', async () => { + await User.syncIndexes(); + + await User.create({ + email: 'issuer-a@example.com', + provider: 'openid', + openidId: 'shared-sub', + openidIssuer: 'https://issuer-a.example.com', + }); + + await expect( + User.create({ + email: 'issuer-b@example.com', + provider: 'openid', + openidId: 'shared-sub', + openidIssuer: 'https://issuer-b.example.com', + }), + ).resolves.toBeTruthy(); + + await expect( + User.create({ + email: 'issuer-a-duplicate@example.com', + provider: 'openid', + openidId: 'shared-sub', + openidIssuer: 'https://issuer-a.example.com', + }), + ).rejects.toThrow(/duplicate key/); + }); +}); + describe('User Methods - Database Tests', () => { describe('findUser', () => { test('should find user by exact email', async () => { diff --git a/packages/data-schemas/src/migrations/tenantIndexes.spec.ts b/packages/data-schemas/src/migrations/tenantIndexes.spec.ts index 6a0987d757b3..a2fb2fef0185 100644 --- a/packages/data-schemas/src/migrations/tenantIndexes.spec.ts +++ b/packages/data-schemas/src/migrations/tenantIndexes.spec.ts @@ -22,7 +22,7 @@ afterAll(async () => { }); describe('dropSupersededTenantIndexes', () => { - describe('with pre-existing single-field unique indexes (simulates upgrade)', () => { + describe('with pre-existing superseded unique indexes (simulates upgrade)', () => { beforeAll(async () => { const db = mongoose.connection.db!; @@ -35,6 +35,10 @@ describe('dropSupersededTenantIndexes', () => { { unique: true, sparse: true, name: 'facebookId_1' }, ); await users.createIndex({ openidId: 1 }, { unique: true, sparse: true, name: 'openidId_1' }); + await users.createIndex( + { openidId: 1, tenantId: 1 }, + { unique: true, name: 'openidId_1_tenantId_1' }, + ); await users.createIndex({ samlId: 1 }, { unique: true, sparse: true, name: 'samlId_1' }); await users.createIndex({ ldapId: 1 }, { unique: true, sparse: true, name: 'ldapId_1' }); await users.createIndex({ githubId: 1 }, { unique: true, sparse: true, name: 'githubId_1' }); @@ -139,6 +143,7 @@ describe('dropSupersededTenantIndexes', () => { expect(indexNames).not.toContain('email_1'); expect(indexNames).not.toContain('googleId_1'); expect(indexNames).not.toContain('openidId_1'); + expect(indexNames).not.toContain('openidId_1_tenantId_1'); expect(indexNames).toContain('_id_'); }); @@ -290,11 +295,12 @@ describe('dropSupersededTenantIndexes', () => { } }); - it('users collection lists all 9 OAuth ID indexes plus email', () => { - expect(SUPERSEDED_INDEXES.users).toHaveLength(9); + it('users collection lists all superseded OAuth and email indexes', () => { + expect(SUPERSEDED_INDEXES.users).toHaveLength(10); expect(SUPERSEDED_INDEXES.users).toContain('email_1'); expect(SUPERSEDED_INDEXES.users).toContain('googleId_1'); expect(SUPERSEDED_INDEXES.users).toContain('openidId_1'); + expect(SUPERSEDED_INDEXES.users).toContain('openidId_1_tenantId_1'); }); }); }); diff --git a/packages/data-schemas/src/migrations/tenantIndexes.ts b/packages/data-schemas/src/migrations/tenantIndexes.ts index 6536423ad2f7..e511bb17364b 100644 --- a/packages/data-schemas/src/migrations/tenantIndexes.ts +++ b/packages/data-schemas/src/migrations/tenantIndexes.ts @@ -17,6 +17,7 @@ const SUPERSEDED_INDEXES: Record = { 'googleId_1', 'facebookId_1', 'openidId_1', + 'openidId_1_tenantId_1', 'samlId_1', 'ldapId_1', 'githubId_1', diff --git a/packages/data-schemas/src/schema/user.ts b/packages/data-schemas/src/schema/user.ts index 78ea91ca8395..3e2756c3d402 100644 --- a/packages/data-schemas/src/schema/user.ts +++ b/packages/data-schemas/src/schema/user.ts @@ -74,6 +74,9 @@ const userSchema = new Schema( openidId: { type: String, }, + openidIssuer: { + type: String, + }, samlId: { type: String, }, @@ -178,6 +181,14 @@ const oAuthIdFields = [ ] as const; for (const field of oAuthIdFields) { + if (field === 'openidId') { + userSchema.index( + { openidId: 1, openidIssuer: 1, tenantId: 1 }, + { unique: true, partialFilterExpression: { openidId: { $exists: true } } }, + ); + continue; + } + userSchema.index( { [field]: 1, tenantId: 1 }, { unique: true, partialFilterExpression: { [field]: { $exists: true } } }, diff --git a/packages/data-schemas/src/types/user.ts b/packages/data-schemas/src/types/user.ts index 0bbf704014ff..d4ab3356f914 100644 --- a/packages/data-schemas/src/types/user.ts +++ b/packages/data-schemas/src/types/user.ts @@ -21,6 +21,7 @@ export interface IUser extends Document { discordId?: string; appleId?: string; plugins?: string[]; + openidIssuer?: string; twoFactorEnabled?: boolean; totpSecret?: string; backupCodes?: Array<{ From f20419d0b7b1a4031a52d529830812a8a69d5c5e Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Mon, 4 May 2026 23:06:10 -0400 Subject: [PATCH 03/12] =?UTF-8?q?=F0=9F=93=84=20feat:=20Rich=20File=20Arti?= =?UTF-8?q?fact=20Previews=20for=20DOCX,=20CSV,=20XLSX,=20PPTX=20(#12934)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📄 feat: Rich File Artifact Previews for DOCX, CSV, XLSX, PPTX Render office files emitted by tools as interactive previews in the artifact panel instead of raw extracted text. The backend produces a sanitized HTML document via mammoth (DOCX), SheetJS (CSV/XLSX/XLS/ODS), or yauzl-based slide extraction (PPTX) and ships it through the existing SSE attachment payload; the client routes it through the Sandpack `static` template's `index.html` slot — no new browser deps, no client-side blob fetch, no React renderer components. * 🔐 fix: Restrict data: URLs to in office HTML sanitizer Codex review on #12934 caught that `data:` lived in the global `allowedSchemes`, which meant a smuggled `` would survive sanitization. The Sandpack iframe sandbox does not gate `target="_blank"` navigations, so a click would open attacker-controlled HTML in a new tab. Scope `data:` to `` only via `allowedSchemesByTag` (mammoth inlines DOCX images as base64 `data:image/...` URIs — that path still works). Add a regression suite (`sanitizeOfficeHtml security`) with 8 cases covering: ` would have been injected as executable markup — direct XSS. The contract for office types is now HTML-or-null with no text fallback. Failed render returns null, the client's empty-text gate keeps the artifact off the panel, and the file falls back to the regular download UI (matching what PPTX already did). PDF and ODT still go through `extractDocument` because the client routes them to PLAIN_TEXT (which the markdown viewer escapes) or no artifact at all, so plain text is safe there. Test reshuffle: - `document` describe block now uses ODT/PDF for the legacy parseDocument-path tests (DOCX/XLSX/XLS/ODS bypass that path). - New "does NOT call parseDocument for office HTML types" test locks in the SEC contract for all four office HTML buckets. - "falls back to ..." tests rewritten as "returns null when ..." with explicit `parseDocumentCalls.length === 0` assertions to prove no text leaks back to the client. - New XSS regression test for the XLSX failure path. - Mock parseDocument failure-name match relaxed to `includes()` so ODT-named tests can use the same trigger. * 🧽 chore: Address follow-up review findings on PR #12934 Wraps up the 10-finding follow-up review. Two MAJOR + four MINOR + two NIT addressed; one NIT skipped after verifying it was a misread of the package.json structure. MAJOR - #1: Rewrite `renderOfficeHtml` JSDoc to document the HTML-or-null contract explicitly. The pre-fix doc described a text-fallback path that was the original XSS vector (commit b06f08a). A future maintainer trusting the stale doc could reintroduce the fallback. - #2: Replace byte-truncation of office HTML with a small "preview too large" banner document. Cutting at a UTF-8 boundary lands mid-tag (`
con\n…[truncated]`) and ships malformed markup to the iframe — unpredictable rendering, occasional broken layouts on DOCX with embedded images / wide spreadsheets. MINOR - #4: Wrap `readSlidesFromZip`'s `zipfile.close()` in try/catch so a close-time exception (mid-flight stream) doesn't replace the original error. Mirrors the defensive pattern in zipSafety.ts. - #5: Refactor PPTX extraction to use `yauzl.fromBuffer` directly, eliminating the temp-file write/unlink the safety pre-flight already proved unnecessary. Removes 4 unused imports (os, path, fs/promises, randomUUID). - #6: Extract `isPreviewOnlyArtifact(type)` to `client/src/utils/ artifacts.ts` so the membership check is unit-testable without mounting the full Artifacts component (Recoil + Sandpack + media query). 15 new test cases covering positive types, negative types, null/undefined, and unknown strings. NIT - #3: Remove dead `stripColorStyles` / `COLOR_PROPERTY_PATTERN` — unused (sanitizer's `allowedStyles` config handles color implicitly). - #7: Remove dead `!_lc_csv_label` worksheet property write. - #9: Remove no-op `exclusiveFilter: () => false` sanitize-html config. - #10: Type-narrow `PREVIEW_ONLY_ARTIFACT_TYPES` to `ReadonlySet` so the membership table is compile-time checked against the enum. SKIPPED - #8: Reviewer flagged `sanitize-html` as duplicated in devDeps and dependencies. The package has no `dependencies` section — only `devDependencies` and `peerDependencies`. Existing convention (mammoth, xlsx, yauzl, pdfjs-dist) is to appear in BOTH. Removing the devDep entry would break local test runs. Tests: packages/api 4406/4406, client artifacts 128/128. * 🪞 chore: Fix isPreviewOnlyArtifact test description parameter order Follow-up review nit on PR #12934. Jest's `it.each` substitutes `%s` positionally, and the table rows were `[type, expected]` while the description template read `'returns %s for type %s'` — outputting "returns application/vnd.librechat.docx-preview for type true" instead of the intended "type ... returns true". Reorder the template to match the column order. Test runner output now reads naturally: "type application/vnd.librechat.docx-preview returns true". Pure cosmetic — runtime behavior unchanged. * ✨ feat: Improve DOCX rendering and surface filename in panel header Two UX improvements based on hands-on use of the office preview pipeline. DOCX rendering — mammoth strips the navy banners, cell shading, and column layouts that direct-formatted docs apply (python-docx-style output is a common case). The flat `

X

` and bare `` (mammoth never emits ``), adds zebra striping, and treats the python-docx `

X

` section-heading idiom as a pseudo-h2 with a thin accent left border so document structure survives the round trip. Headings get a left accent or underline so they read as headings instead of just bold paragraphs. - Sanitizer's `allowedAttributes` opens `class` on the heading and block tags the styleMap and CSS heuristics rely on. `', + textFormat: 'text', + }), + ).toBe(TOOL_ARTIFACT_TYPES.PLAIN_TEXT); + }); + + it('downgrades office types to PLAIN_TEXT when textFormat is null (DB legacy)', () => { + /* Mongoose returns `null` for fields the document was saved without + * — covers attachments persisted before the textFormat field existed. */ + expect( + detectArtifactTypeFromFile({ + filename: 'workbook.xlsx', + type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + text: '
` it emits looks washed out next to the source. Three targeted compensations: - Style map promotes `Title`, `Subtitle`, `Heading 1` thru `Heading 6`, and `Quote` paragraphs to their semantic HTML equivalents (mammoth's default only handles Heading 1-6, missing Title/Subtitle/Quote). - Extra CSS scoped to `.lc-docx` gives the first table row sticky- looking header styling regardless of `
1
', + textFormat: null, + }), + ).toBe(TOOL_ARTIFACT_TYPES.PLAIN_TEXT); + }); + + /* Regression: an office attachment with no `text` AND no `textFormat` + * must NOT downgrade to PLAIN_TEXT (which has the lenient empty-text + * gate and would render as a half-empty panel card). The historical + * contract for office types with missing extraction is "fall through + * to the legacy download UI"; the security gate's empty-text exception + * preserves that. CI regression on PR #12934 — `LogContent.test.tsx` + * "falls back to the legacy download branch for an office file with + * no extracted text" was the canary. */ + it.each([ + ['report.docx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'], + ['data.csv', 'text/csv'], + ['workbook.xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'], + ['slides.pptx', 'application/vnd.openxmlformats-officedocument.presentationml.presentation'], + ])( + '%s with no text and no textFormat returns null (preserves legacy download path)', + (filename, type) => { + expect(detectArtifactTypeFromFile({ filename, type, text: '' })).toBeNull(); + expect(detectArtifactTypeFromFile({ filename, type, text: undefined })).toBeNull(); + }, + ); + it('falls back to MIME when the extension is unknown', () => { expect(detectArtifactTypeFromFile({ filename: 'noext', type: 'text/html', text: 'x' })).toBe( TOOL_ARTIFACT_TYPES.HTML, @@ -114,11 +276,13 @@ describe('detectArtifactTypeFromFile', () => { }); it('returns null for unsupported types', () => { + /* PDFs have no rich preview path on the client (the artifact panel + * doesn't host a PDF viewer); they fall back to the download UI. */ expect( - detectArtifactTypeFromFile({ filename: 'output.csv', type: 'text/csv', text: 'a,b' }), + detectArtifactTypeFromFile({ filename: 'doc.pdf', type: 'application/pdf', text: 'x' }), ).toBeNull(); expect( - detectArtifactTypeFromFile({ filename: 'doc.pdf', type: 'application/pdf', text: 'x' }), + detectArtifactTypeFromFile({ filename: 'photo.jpg', type: 'image/jpeg', text: 'binary' }), ).toBeNull(); }); @@ -177,12 +341,11 @@ describe('detectArtifactTypeFromFile', () => { ); }); - it('does NOT route data formats to CODE (CSV / JSON / YAML / TOML / XML)', () => { + it('does NOT route data formats to CODE (JSON / YAML / TOML / XML)', () => { /* These get dedicated viewers in follow-ups; for now they fall - * through to inline rendering (return null). */ - expect( - detectArtifactTypeFromFile({ filename: 'data.csv', type: 'text/csv', text: 'a,b' }), - ).toBeNull(); + * through to inline rendering (return null). CSV is the exception: + * it routes to the SPREADSHEET preview bucket — covered separately + * in the "classifies %s by extension" suite above. */ expect( detectArtifactTypeFromFile({ filename: 'data.json', type: 'application/json', text: '{}' }), ).toBeNull(); @@ -397,7 +560,10 @@ describe('fileToArtifact', () => { }); it('returns null for unsupported types so callers can fall through', () => { - expect(fileToArtifact({ ...baseFile, filename: 'data.csv', type: 'text/csv' })).toBeNull(); + expect(fileToArtifact({ ...baseFile, filename: 'photo.jpg', type: 'image/jpeg' })).toBeNull(); + expect( + fileToArtifact({ ...baseFile, filename: 'doc.pdf', type: 'application/pdf' }), + ).toBeNull(); }); /* End-to-end test for the CODE bucket. The classification path is @@ -502,13 +668,18 @@ describe('fileToArtifact', () => { }); it('uses the caller-provided placeholder when a deferred-extraction file has no text', () => { - // Backend extractor returns null for pptx (deferred). Client sees - // `text === null` and substitutes the localized placeholder. + /* Plain-text and markdown remain on the lenient empty-text gate so the + * artifact card can render a "preparing preview…" placeholder while + * extraction is in flight. (Office preview buckets — DOCX, SPREADSHEET, + * PRESENTATION — use the strict gate instead: their renderers need + * server-rendered HTML, so the artifact stays unregistered until the + * `text` field arrives. See the strict-gate test in the + * `detectArtifactTypeFromFile` suite.) */ const artifact = fileToArtifact( { ...baseFile, - filename: 'slides.pptx', - type: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + filename: 'notes.txt', + type: 'text/plain', text: null as unknown as string, }, { placeholder: '_Coming soon_' }, @@ -521,13 +692,115 @@ describe('fileToArtifact', () => { it('falls back to empty content when no placeholder is supplied and text is missing', () => { const artifact = fileToArtifact({ ...baseFile, - filename: 'slides.pptx', - type: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + filename: 'notes.txt', + type: 'text/plain', text: undefined, }); + expect(artifact).not.toBeNull(); expect(artifact!.content).toBe(''); }); + it('returns null for office preview buckets without text (strict gate)', () => { + /* `textFormat: 'html'` is required for routing to land on the office + * bucket in the first place; without it the security gate downgrades + * to PLAIN_TEXT (which has the lenient empty-text gate). The strict + * empty-text gate then fires and the artifact stays unregistered. */ + expect( + fileToArtifact({ + ...baseFile, + filename: 'slides.pptx', + type: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + text: undefined, + textFormat: 'html', + }), + ).toBeNull(); + expect( + fileToArtifact({ + ...baseFile, + filename: 'report.docx', + type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + text: '', + textFormat: 'html', + }), + ).toBeNull(); + expect( + fileToArtifact({ + ...baseFile, + filename: 'data.csv', + type: 'text/csv', + text: undefined, + textFormat: 'html', + }), + ).toBeNull(); + }); + + it('builds a SPREADSHEET artifact for csv/xlsx with backend-rendered HTML in text', () => { + const csv = fileToArtifact({ + ...baseFile, + filename: 'data.csv', + type: 'text/csv', + text: '
1
', + textFormat: 'html', + }); + expect(csv).not.toBeNull(); + expect(csv!.type).toBe(TOOL_ARTIFACT_TYPES.SPREADSHEET); + expect(csv!.title).toBe('data.csv'); + expect(csv!.content).toContain(''); + + const xlsx = fileToArtifact({ + ...baseFile, + filename: 'workbook.xlsx', + type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + text: 'sheet', + textFormat: 'html', + }); + expect(xlsx).not.toBeNull(); + expect(xlsx!.type).toBe(TOOL_ARTIFACT_TYPES.SPREADSHEET); + }); + + it('builds a DOCX artifact for .docx with backend-rendered HTML in text', () => { + const artifact = fileToArtifact({ + ...baseFile, + filename: 'report.docx', + type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + text: '

hello

', + textFormat: 'html', + }); + expect(artifact).not.toBeNull(); + expect(artifact!.type).toBe(TOOL_ARTIFACT_TYPES.DOCX); + }); + + it('builds a PRESENTATION artifact for .pptx with backend-rendered HTML in text', () => { + const artifact = fileToArtifact({ + ...baseFile, + filename: 'deck.pptx', + type: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + text: '
  1. Slide 1
', + textFormat: 'html', + }); + expect(artifact).not.toBeNull(); + expect(artifact!.type).toBe(TOOL_ARTIFACT_TYPES.PRESENTATION); + }); + + /* Codex P1 SECURITY companion: legacy/RAG path where `textFormat` is + * missing — the security gate downgrades to PLAIN_TEXT, which has the + * lenient empty-text gate. The text round-trips into the markdown + * viewer as escaped content rather than being injected as HTML. */ + it('downgrades to a PLAIN_TEXT artifact when an office file has text but no textFormat flag', () => { + const artifact = fileToArtifact({ + ...baseFile, + filename: 'report.docx', + type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + /* This is what mammoth.extractRawText returns for a RAG upload — + * not sanitized HTML, just the document's flowed text. The + * security gate ensures it's never injected as HTML. */ + text: 'Document body text from extractRawText. ', + }); + expect(artifact).not.toBeNull(); + expect(artifact!.type).toBe(TOOL_ARTIFACT_TYPES.PLAIN_TEXT); + expect(artifact!.content).toContain('` would then execute inside + * the Sandpack iframe. The fix returns null instead, and the + * client's empty-text gate keeps the artifact off the panel. */ + mockOfficeHtml.mockResolvedValueOnce(null); + const text = await extractCodeArtifactText( + Buffer.from('PKfake-xlsx'), + 'attack.xlsx', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'document', + ); + expect(text).toBeNull(); + expect(parseDocumentCalls.length).toBe(0); + }); + + it('substitutes a "preview too large" banner when HTML exceeds the cache cap', async () => { + /* Regression for review finding #2 on PR #12934. The earlier + * implementation byte-truncated the producer's HTML at the + * UTF-8 boundary, which would land mid-tag and ship malformed + * markup like `
con\n…[truncated]` to the iframe. + * The new behavior swaps the entire payload for a small valid + * HTML banner — under the cap by construction. */ + const huge = 'X'.repeat(MAX_TEXT_CACHE_BYTES + 5_000); + mockOfficeHtml.mockResolvedValueOnce(huge); + const text = await extractCodeArtifactText( + Buffer.from('PK'), + 'big.docx', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'document', + ); + expect(text).not.toBeNull(); + // Always under the cap. + expect(Buffer.byteLength(text!, 'utf-8')).toBeLessThanOrEqual(MAX_TEXT_CACHE_BYTES); + // Valid HTML doc, not byte-truncated markup. + expect(text!).toMatch(/^/); + expect(text!).toContain(''); + // No truncation marker (which would only appear on the legacy path). + expect(text!).not.toContain('…[truncated]'); + // User-facing banner content. + expect(text!).toContain('Preview exceeds the size limit'); + }); + + it('passes through HTML output unchanged when within the cache cap', async () => { + const small = '

hello

'; + mockOfficeHtml.mockResolvedValueOnce(small); + const text = await extractCodeArtifactText( + Buffer.from('PK'), + 'small.docx', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'document', + ); + expect(text).toBe(small); + }); + + it('falls back to parseDocument for pdf (HTML producer returns null for unsupported types)', async () => { + // Default mock returns null — the producer's own dispatcher would do the + // same for PDF since pdf has no HTML rendering. Whether we call it or + // skip it is an implementation detail; what matters is that PDF still + // routes to parseDocument and yields the docx-mock text. + const text = await extractCodeArtifactText( + Buffer.from('%PDF-1.4'), + 'doc.pdf', + 'application/pdf', + 'document', + ); + expect(text).toBe(docxText); + expect(parseDocumentCalls[0]?.originalname).toBe('doc.pdf'); + }); + + it('does not call the office HTML producer for plain .txt utf8-text', async () => { + const text = await extractCodeArtifactText( + Buffer.from('hello world\n', 'utf-8'), + 'note.txt', + 'text/plain', + 'utf8-text', + ); + expect(mockOfficeHtml).not.toHaveBeenCalled(); + expect(text).toBe('hello world\n'); + }); + + it('routes extensionless CSV-by-MIME (utf8-text category) through the office HTML path', async () => { + /* Regression for the Codex review on PR #12934. A tool emitting + * `data` with `text/csv` classifies as `utf8-text` (csv has no + * extension here, MIME is text/* which the classifier treats as + * utf8-text). The previous gate skipped the office-render branch + * because the extension wasn't in OFFICE_HTML_EXTENSIONS — so the + * raw CSV text shipped to the client, which routes by MIME to the + * SPREADSHEET bucket and expects a full HTML document. The fix + * shares the dispatcher's MIME-aware predicate so the gate fires + * here too. */ + mockOfficeHtml.mockResolvedValueOnce('
1
'); + const text = await extractCodeArtifactText( + Buffer.from('a,b\n1,2', 'utf-8'), + 'data', + 'text/csv', + 'utf8-text', + ); + expect(mockOfficeHtml).toHaveBeenCalledWith(expect.any(Buffer), 'data', 'text/csv'); + expect(text).toContain(''); + }); + + it.each([ + ['workbook', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'], + ['workbook', 'application/vnd.ms-excel'], + ['workbook', 'application/vnd.oasis.opendocument.spreadsheet'], + ['report', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'], + ['deck', 'application/vnd.openxmlformats-officedocument.presentationml.presentation'], + ])('routes extensionless office files by MIME alone (%s, %s)', async (name, mime) => { + mockOfficeHtml.mockResolvedValueOnce('x'); + const category = mime.includes('presentation') + ? 'pptx' + : mime.startsWith('text/') + ? 'utf8-text' + : 'document'; + const text = await extractCodeArtifactText(Buffer.from('PK'), name, mime, category); + expect(mockOfficeHtml).toHaveBeenCalledWith(expect.any(Buffer), name, mime); + expect(text).toContain(''); + }); + + it('returns null (and falls back to download UI) when the producer rejects a zip bomb', async () => { + /* Defense-in-depth check for SEC review on PR #12934. When + * `bufferToOfficeHtml` throws `ZipBombError` (because a zip-bomb + * DOCX/XLSX/PPTX got through the compressed-size gate), the outer + * extractor must swallow it and return null — that signals to the + * code-output controller to register the file as a regular + * download instead of a panel artifact. Crucially, it must NOT + * fall back to `extractDocument` text either: the client would + * inject that text into `index.html` and a literal `after

'); + expect(out).not.toMatch(/ { + const out = await sanitizeOfficeHtml(''); + expect(out).not.toMatch(/onerror=/i); + expect(out).toContain('https://x.test/a.png'); + }); + + test('strips javascript: URLs from anchors', async () => { + const out = await sanitizeOfficeHtml('click'); + expect(out).not.toMatch(/javascript:/i); + // The text survives even when the href is dropped. + expect(out).toContain('click'); + }); + + test('rejects data: URLs in (only may use data:)', async () => { + /* The Sandpack iframe sandbox does NOT gate `target="_blank"` + * navigations. A surviving `` would + * open attacker-controlled HTML in a new tab on click. The + * sanitizer scopes `data:` to only — global schemes are + * http/https/mailto. Regression guard for the Codex review on + * PR #12934. */ + const out = await sanitizeOfficeHtml( + 'click', + ); + expect(out).not.toContain('data:'); + expect(out).not.toContain('script'); + expect(out).toContain('click'); + }); + + test('preserves data: URLs in (mammoth inlines DOCX images as base64)', async () => { + const tinyPng = + 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='; + const out = await sanitizeOfficeHtml(`dot`); + expect(out).toContain(tinyPng); + expect(out).toContain('alt="dot"'); + }); + + test('preserves http(s) and mailto links on anchors', async () => { + const out = await sanitizeOfficeHtml( + 'site mail', + ); + expect(out).toContain('https://example.com'); + expect(out).toContain('mailto:a@b.test'); + }); + + test('forces target=_blank rel=noopener on surviving anchors', async () => { + const out = await sanitizeOfficeHtml('link'); + expect(out).toContain('target="_blank"'); + expect(out).toContain('rel="noopener noreferrer"'); + }); + + test('strips '); + expect(out).not.toMatch(/ { + if (sanitizeHtmlModule) { + return sanitizeHtmlModule; + } + const mod = await import('sanitize-html'); + sanitizeHtmlModule = (mod.default ?? mod) as typeof import('sanitize-html'); + return sanitizeHtmlModule; +} + +/** + * Sanitize HTML produced by mammoth / SheetJS / our own pptx renderer for + * embedding in the Sandpack `static` iframe. Allows the structural and + * formatting tags those producers emit, plus inline `data:` images (mammoth + * inlines DOCX images as base64). Strips `">` smuggled through DOCX/PPTX would + * survive sanitization and open attacker-controlled HTML in a new + * tab when clicked — the Sandpack iframe sandbox doesn't gate + * `target="_blank"` navigations. */ + allowedSchemes: ['http', 'https', 'mailto'], + allowedSchemesByTag: { + img: ['http', 'https', 'data'], + }, + transformTags: { + a: (tagName, attribs) => ({ + tagName, + attribs: { ...attribs, rel: 'noopener noreferrer', target: '_blank' }, + }), + }, + /* sanitize-html runs `allowedAttributes` BEFORE per-attribute filtering, so + * `style` only survives where we explicitly allow it (td/th/col/colgroup). + * For those we still want to drop color declarations — defense against the + * mammoth/SheetJS hardcoded color-on-white issue. */ + allowedStyles: { + '*': { + 'text-align': [/^left$|^right$|^center$|^justify$/], + 'font-weight': [/^[1-9]00$|^bold$|^normal$/], + 'font-style': [/^italic$|^normal$/], + width: [/^\d+(?:\.\d+)?(?:px|%|em|rem)?$/], + height: [/^\d+(?:\.\d+)?(?:px|%|em|rem)?$/], + 'min-width': [/^\d+(?:\.\d+)?(?:px|%|em|rem)?$/], + 'vertical-align': [/^top$|^middle$|^bottom$|^baseline$/], + }, + }, + }); +} + +/** + * Wrap a sanitized HTML body in a complete document with the styles we want + * inside the Sandpack iframe. The CSS palette uses `prefers-color-scheme` so + * the iframe inherits dark/light from its parent (Sandpack iframes inherit + * the prefers-color-scheme media query from the host document). + */ +function wrapAsDocument(bodyHtml: string, extraHeadHtml = ''): string { + return ` + + + + +Preview + + + +${bodyHtml} + +`; +} + +/* ============================================================================= + * DOCX → HTML + * ============================================================================= */ + +/** + * Style-map directives that broaden mammoth's default heading detection. + * Mammoth's stock map only promotes paragraphs whose ms-word style name + * matches `Heading 1`/`Heading 2`/etc. — useless for code-generated + * docs (e.g. python-docx output) that apply direct character formatting + * instead of named styles. The `Title`/`Subtitle` mappings catch the + * built-in title styles used by Word's Insert > Cover Page workflow; + * explicit `Heading 1` thru `Heading 6` mappings retain mammoth's + * defaults; and `:fresh` tells mammoth not to merge consecutive + * matching paragraphs into a single heading element. + */ +const DOCX_STYLE_MAP = [ + "p[style-name='Title'] => h1.lc-docx-title:fresh", + "p[style-name='Subtitle'] => h2.lc-docx-subtitle:fresh", + "p[style-name='Heading 1'] => h1:fresh", + "p[style-name='Heading 2'] => h2:fresh", + "p[style-name='Heading 3'] => h3:fresh", + "p[style-name='Heading 4'] => h4:fresh", + "p[style-name='Heading 5'] => h5:fresh", + "p[style-name='Heading 6'] => h6:fresh", + "p[style-name='Quote'] => blockquote:fresh", +]; + +/** + * CSS layered on top of `wrapAsDocument`'s base styles to give mammoth's + * flat output more visual structure. Mammoth strips the navy banners, + * cell shading, and column layouts that direct-formatted docs apply, so + * the source loses most of its presentation. We compensate with three + * targeted heuristics: + * + * 1. The first row of any `
` gets sticky-header styling + * regardless of `` (mammoth never emits ``). + * 2. Tables get alternating row stripes so dense data blocks stay + * scannable even without the source's hand-tuned shading. + * 3. A bold-only-child paragraph (`

X

`) is the + * python-docx idiom for a "section heading"; styled as a + * pseudo-h2 with a thin accent border so the document's structure + * survives the round-trip. + */ +const DOCX_EXTRA_CSS = ` +.lc-docx h1 { font-size: 1.5rem; font-weight: 700; margin: 1em 0 0.5em; padding-bottom: 0.3em; border-bottom: 2px solid var(--border); } +.lc-docx h2 { font-size: 1.2rem; font-weight: 600; margin: 1em 0 0.4em; padding-left: 0.6em; border-left: 3px solid var(--link); } +.lc-docx h3 { font-size: 1.05rem; font-weight: 600; margin: 0.8em 0 0.3em; color: var(--link); } +.lc-docx-title { text-align: center; border-bottom: none; } +.lc-docx-subtitle { text-align: center; border-left: none; padding-left: 0; color: var(--muted); font-style: italic; font-weight: 400; } +.lc-docx p { margin: 0.5em 0; } +.lc-docx p:has(> strong:only-child) { margin: 1em 0 0.4em; padding-left: 0.6em; border-left: 3px solid var(--link); font-size: 1.05rem; } +.lc-docx p:has(> strong:only-child) strong { font-weight: 600; } +.lc-docx table { width: 100%; max-width: 100%; } +.lc-docx td, .lc-docx th { white-space: normal; } +.lc-docx table tr:first-child td { background: var(--header-bg); font-weight: 600; } +.lc-docx table tr:nth-child(even):not(:first-child) td { background: var(--row-alt); } +.lc-docx ul, .lc-docx ol { margin: 0.5em 0; padding-left: 1.6em; } +.lc-docx li { margin: 0.15em 0; } +.lc-docx blockquote { border-left: 3px solid var(--border); color: var(--muted); margin: 0.8em 0; padding: 0.2em 0 0.2em 0.8em; } +`.trim(); + +/* ============================================================================= + * DOCX CDN renderer (high-fidelity path) + * + * Embeds the binary as base64 inside a self-contained HTML document and + * relies on `docx-preview` loaded from a pinned CDN URL with SRI + * integrity to render it inside the Sandpack iframe. The iframe is a + * real browser DOM — `docx-preview`'s "browser-first" design is a + * feature here, not a limitation: we get cell shading, run-level + * colors/fonts, headers/footers, columns, and inline images at no + * server-side parsing cost. + * + * Trade-offs vs the mammoth path: + * + Far higher visual fidelity (4/5 vs 2/5). + * + No server-side jsdom; no extra Node deps; iframe sandbox isolates + * any parser bug from the API process. + * − base64 inflates the binary by ~33%, so files above + * `MAX_DOCX_CDN_BINARY_BYTES` fall back to the mammoth path so the + * wrapped HTML doesn't blow the `MAX_TEXT_CACHE_BYTES` (512KB) cap + * on `attachment.text`. Telemetry should track how often we hit the + * fallback — if it's frequent, the next move is to lift the cap + * for office types specifically rather than embed via signed URL. + * + * Library + version pinning (jsdelivr SRI hashes computed at the + * version listed; refresh by `openssl dgst -sha384 -binary FILE | + * openssl base64 -A` on the file at the URL): + * docx-preview 0.3.7 — Apache-2.0, ~75KB UMD + * jszip 3.10.1 — MIT, ~98KB (peer dep of docx-preview) + * Both are pinned to specific minor versions; SRI guarantees the byte + * content can't change underneath us even if the version were ever + * republished. + */ +const DOCX_PREVIEW_CDN = { + jszip: { + src: 'https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js', + integrity: 'sha384-+mbV2IY1Zk/X1p/nWllGySJSUN8uMs+gUAN10Or95UBH0fpj6GfKgPmgC5EXieXG', + }, + docxPreview: { + src: 'https://cdn.jsdelivr.net/npm/docx-preview@0.3.7/dist/docx-preview.min.js', + integrity: 'sha384-Fw+ZM2MtvxCe867uRzZY5GtGP+gs0NLvrlJS768RZWuKhOHMN4Fln3i3gMt1NSyQ', + }, +} as const; + +/** + * Maximum DOCX binary size (in bytes) we'll embed via the CDN-rendered + * path. Empirical headroom: with ~33% base64 inflation and ~5KB of + * wrapper boilerplate, 350KB of binary fits well under the 512KB + * `MAX_TEXT_CACHE_BYTES` cap on `attachment.text` with margin to spare. + */ +const MAX_DOCX_CDN_BINARY_BYTES = 350 * 1024; + +/** + * Mirror of `MAX_TEXT_CACHE_BYTES` from `~/files/code/extract` — the + * 512 KB ceiling that `attachment.text` is truncated to before hitting + * the SSE wire and the database. We mirror (rather than import) to + * avoid the cycle: `extract.ts` already imports `bufferToOfficeHtml` + * from this module. The dispatcher uses this to drop CDN-with-fallback + * docs that would exceed the cap and fall back to mammoth-only. + * + * If the upstream constant ever changes, update this value too. The + * `cap-mirrors-extract` test in `html.spec.ts` pins the relationship. + */ +const OFFICE_HTML_OUTPUT_CAP = 512 * 1024; + +/** + * Build the CDN-rendered HTML document for a DOCX. The base64 payload + * lives inside a `` substring inside the binary. + * + * The CSP locks the page down to the pinned CDN host: scripts only + * from jsdelivr (with SRI), no outbound `fetch`/`XHR`, no eval, images + * only `self`/`data:`/`blob:` (docx-preview uses `URL.createObjectURL` + * for inline images), styles inline (`docx-preview` injects per-doc + * styles into `` at render time). + */ +function buildDocxCdnDocument(base64: string, mammothFallbackHtml: string): string { + /* `connect-src` allows fetches to: + * - `'self'`: the sandpack-static-server origin the iframe runs in + * (covers any same-origin sourcemap fetches the bundler embedded) + * - `https://cdn.jsdelivr.net`: where the renderer script came from + * (DevTools fetches `.min.js.map` from the same host as the script + * to map minified line numbers; with `'none'` the console fills + * with CSP violations every time DevTools is open) + * + * Exfiltration risk is minimal: the iframe is cross-origin to the + * LibreChat host so an attacker can't read application data from it, + * and the only meaningful target ('self' or jsdelivr) isn't useful + * for exfiltrating slide content to a host the attacker controls. */ + const csp = [ + "default-src 'none'", + "script-src https://cdn.jsdelivr.net 'unsafe-inline'", + "style-src 'unsafe-inline'", + "img-src 'self' data: blob:", + 'font-src data:', + "connect-src 'self' https://cdn.jsdelivr.net", + "base-uri 'none'", + "form-action 'none'", + ].join('; '); + /* Body styling for the embedded mammoth fallback. The CDN-rendered + * path normally hides this content, but on air-gapped networks where + * `cdn.jsdelivr.net` is blocked the fallback handler reveals it so + * the user gets a readable preview instead of the legacy "Preview + * unavailable" message — Codex P2 review on PR #12934. We inline the + * shared `DOCX_EXTRA_CSS` rules here (rather than `` to a + * cross-origin sheet) because the CSP locks `style-src` to inline + * only and the wrapped mammoth output uses the same `.lc-docx` + * classes. */ + return ` + + + + + +Preview + + + + + +
Loading preview…
+ + + + +`; +} + +/** + * Run mammoth + sanitization to produce the inner DOCX body HTML + * (the `
` contents). Shared between the standalone mammoth + * path and the CDN path's fallback embedding so both render through + * the exact same pipeline — no diverging sanitization rules. Codex P2 + * review on PR #12934. + */ +async function renderMammothBody(buffer: Buffer): Promise { + const { convertToHtml } = await import('mammoth'); + const result = await convertToHtml({ buffer }, { styleMap: DOCX_STYLE_MAP }); + return sanitizeOfficeHtml(result.value); +} + +async function wordDocToHtmlViaCdn(buffer: Buffer, mammothFallbackBody: string): Promise { + return buildDocxCdnDocument(buffer.toString('base64'), mammothFallbackBody); +} + +async function wordDocToHtmlViaMammoth(buffer: Buffer): Promise { + const sanitized = await renderMammothBody(buffer); + return wrapAsDocument(`
${sanitized}
`, DOCX_EXTRA_CSS); +} + +/** + * Whether the CDN-rendered DOCX path is enabled for this process. + * Operators on air-gapped or filtered corporate networks (where + * `cdn.jsdelivr.net` is blocked) should set + * `OFFICE_PREVIEW_DISABLE_CDN=true` so DOCX previews fall back to the + * server-side mammoth renderer instead of degrading to "Preview + * unavailable" in the iframe — Codex P2 review on PR #12934. + * + * Read at call time (rather than module load) so jest tests can flip + * the env in a `beforeEach` without `jest.resetModules()`. The cost is + * a single property access per render. Truthy values: `true`, `1`, + * `yes` (case-insensitive). Anything else (including unset) means + * "use the CDN path when the size dispatcher picks it." + */ +function isOfficePreviewCdnDisabled(): boolean { + const v = process.env.OFFICE_PREVIEW_DISABLE_CDN; + if (v == null) { + return false; + } + return /^(1|true|yes)$/i.test(v.trim()); +} + +/** + * Convert a `.docx` buffer to a sandboxed HTML document. Two render + * paths, chosen by file size: + * + * 1. **CDN-rendered (default for files ≤ 350 KB binary)**: embeds the + * binary as base64 and lets `docx-preview` render it inside the + * Sandpack iframe. High visual fidelity — preserves cell shading, + * run-level colors/fonts, headers/footers, columns, and images. + * The mammoth-rendered HTML is *also* embedded as a hidden + * `
` block; the iframe's bootstrap script + * reveals it whenever `docx-preview` fails to load (corporate + * firewall blocking jsdelivr, offline desktop, etc.) so air- + * gapped deployments still get a readable preview instead of a + * "Preview unavailable" message — Codex P2 review on PR #12934. + * 2. **Mammoth-only (fallback for larger files, files where the + * combined CDN-doc-with-fallback would blow the cache cap, OR + * when the CDN path is explicitly disabled via + * `OFFICE_PREVIEW_DISABLE_CDN=true`)**: server-side semantic HTML + * conversion. Lower fidelity (flat paragraphs, no shading) but + * produces compact output that fits the `MAX_TEXT_CACHE_BYTES` + * (512 KB) cap on `attachment.text` even for large documents, + * and works without external network. + * + * Both paths pre-flight through `assertSafeZipSize` so a zip-bomb DOCX + * is rejected before either renderer touches it — mammoth's internal + * extraction has no decompressed-size cap and would happily inflate a + * sub-1MB compressed bomb to 200+ MB of XML. See SEC review on PR + * #12934 for the original DoS finding. + */ +export async function wordDocToHtml(buffer: Buffer): Promise { + await assertSafeZipSize(buffer, { name: 'docx' }); + /* Opt-in LibreOffice path: highest fidelity for any DOCX feature + * mammoth/docx-preview can't reproduce (complex tables, drawing + * objects, charts, embedded objects). Returns null when disabled, + * binary missing, conversion failed, or output exceeds the cache + * cap — falls through to the existing pipeline in any of those + * cases so a misconfiguration doesn't break previews. */ + const lo = await tryLibreOfficePreview(buffer, 'docx', OFFICE_HTML_OUTPUT_CAP); + if (lo) { + return lo; + } + if (isOfficePreviewCdnDisabled() || buffer.length > MAX_DOCX_CDN_BINARY_BYTES) { + return wordDocToHtmlViaMammoth(buffer); + } + /* Render mammoth first so its sanitized output can be embedded as + * the iframe's air-gapped fallback. If the combined size would + * exceed the 512 KB cache cap, drop to mammoth-only — the user + * loses high-fidelity rendering but still sees the document. The + * size budget applies after mammoth runs because we can't know its + * output size from the binary size alone. */ + const mammothBody = await renderMammothBody(buffer); + const cdnDoc = await wordDocToHtmlViaCdn(buffer, mammothBody); + if (Buffer.byteLength(cdnDoc, 'utf-8') > OFFICE_HTML_OUTPUT_CAP) { + return wrapAsDocument(`
${mammothBody}
`, DOCX_EXTRA_CSS); + } + return cdnDoc; +} + +/* ============================================================================= + * XLSX / XLS / ODS → HTML (multi-sheet with pure-CSS tab strip) + * ============================================================================= */ + +/** A workbook sheet rendered to its `
` HTML and metadata. */ +interface RenderedSheet { + name: string; + html: string; + truncated: boolean; + totalRows: number; +} + +async function renderWorkbookSheets( + workbook: import('xlsx').WorkBook, + XLSX: typeof import('xlsx'), +): Promise { + const sheets: RenderedSheet[] = []; + for (const sheetName of workbook.SheetNames) { + const ws = workbook.Sheets[sheetName]; + const ref = ws['!ref']; + let totalRows = 0; + let truncated = false; + if (ref) { + const range = XLSX.utils.decode_range(ref); + totalRows = range.e.r - range.s.r + 1; + if (totalRows > SPREADSHEET_MAX_ROWS_PER_SHEET) { + const cappedEnd = range.s.r + SPREADSHEET_MAX_ROWS_PER_SHEET - 1; + ws['!ref'] = XLSX.utils.encode_range({ + s: range.s, + e: { r: cappedEnd, c: range.e.c }, + }); + truncated = true; + } + } + const html = XLSX.utils.sheet_to_html(ws, { editable: false, header: '', footer: '' }); + sheets.push({ name: sheetName, html, truncated, totalRows }); + } + return sheets; +} + +/** + * Build a self-contained HTML document with a pure-CSS tab strip for + * multi-sheet workbooks. Sheet switching uses radio inputs + `:checked ~` + * sibling selectors so no JavaScript is needed inside the iframe. + */ +function renderSpreadsheetHtml(sheets: RenderedSheet[]): string { + if (sheets.length === 0) { + return wrapAsDocument('

This workbook contains no sheets.

'); + } + + const tabStripCss = ` +.lc-sheet-tabs { display: flex; flex-wrap: wrap; gap: 2px; padding: 0 0 8px; border-bottom: 1px solid var(--border); margin-bottom: 12px; } +.lc-sheet-tab-radio { position: absolute; opacity: 0; pointer-events: none; } +.lc-sheet-tab-label { + cursor: pointer; + padding: 4px 12px; + border-radius: 6px 6px 0 0; + font-size: 0.8rem; + color: var(--muted); + border: 1px solid transparent; + border-bottom: none; + user-select: none; +} +.lc-sheet-tab-label:hover { color: var(--fg); background: var(--row-alt); } +.lc-sheet-panel { display: none; } +${sheets + .map( + (_, i) => + `#lc-sheet-tab-${i}:checked ~ .lc-sheet-tabs label[for="lc-sheet-tab-${i}"] { color: var(--fg); background: var(--tab-active-bg); border-color: var(--border); font-weight: 600; } +#lc-sheet-tab-${i}:checked ~ #lc-sheet-panel-${i} { display: block; }`, + ) + .join('\n')} +`.trim(); + + const radios = sheets + .map( + (_, i) => + ``, + ) + .join('\n'); + + const tabs = + sheets.length > 1 + ? `` + : ''; + + const panels = sheets + .map((s, i) => { + const banner = s.truncated + ? `
Showing first ${SPREADSHEET_MAX_ROWS_PER_SHEET.toLocaleString()} of ${s.totalRows.toLocaleString()} rows. Download the file to see the rest.
` + : ''; + return `
${banner}
${s.html}
`; + }) + .join('\n'); + + return wrapAsDocument(`${radios}\n${tabs}\n${panels}`, tabStripCss); +} + +function escapeHtml(input: string): string { + return input + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} + +/** + * Convert a workbook buffer (`.xlsx`, `.xls`, `.ods`) to a sandboxed HTML + * document. Each sheet is rendered as its own `
` and the document + * carries a pure-CSS tab strip for sheet switching. + * + * Pre-flights ZIP-backed formats (`.xlsx`/`.ods`) through + * `assertSafeZipSize` to reject zip bombs before SheetJS reaches them. + * `.xls` is a binary CFB format, not a ZIP — it doesn't have the + * decompression-amplification attack surface, so the safety check is + * skipped for it (yauzl would reject it as malformed anyway). + */ +export async function excelSheetToHtml(buffer: Buffer): Promise { + /* Cheap magic-byte check so we only run the ZIP validator on actual + * ZIP-backed inputs. `.xls` (BIFF/CFB) starts with `D0 CF 11 E0`; ZIPs + * start with `PK\x03\x04`. Skipping the validator on a non-ZIP input + * also avoids confusing yauzl errors leaking out as ZipBombError. */ + if (buffer.length >= 4 && buffer[0] === 0x50 && buffer[1] === 0x4b) { + await assertSafeZipSize(buffer, { name: 'spreadsheet' }); + } + const XLSX = await import('xlsx'); + const workbook = XLSX.read(buffer, { type: 'buffer' }); + const sheets = await renderWorkbookSheets(workbook, XLSX); + /* The per-sheet HTML from `sheet_to_html` is generally well-formed but we + * still sanitize it (defense in depth). The chrome (tab strip, banners) is + * our own code and doesn't need sanitization. */ + const sanitizedSheets = await Promise.all( + sheets.map(async (s) => ({ ...s, html: await sanitizeOfficeHtml(s.html) })), + ); + return renderSpreadsheetHtml(sanitizedSheets); +} + +/* ============================================================================= + * CSV → HTML + * ============================================================================= */ + +/** Convert a CSV buffer to a sandboxed HTML document with a single table. */ +export async function csvToHtml(buffer: Buffer): Promise { + const XLSX = await import('xlsx'); + const text = buffer.toString('utf-8'); + /* `XLSX.read` with `type: 'string'` accepts CSV as well as XML/JSON + * formats; the default sheet name for CSV is `Sheet1` which we relabel + * below for a friendlier tab. */ + const workbook = XLSX.read(text, { type: 'string', raw: true }); + const sheets = await renderWorkbookSheets(workbook, XLSX); + // Single sheet for CSV — relabel to "CSV" for clarity, no tab strip emitted. + const sanitized = await Promise.all( + sheets.map(async (s) => ({ ...s, name: 'CSV', html: await sanitizeOfficeHtml(s.html) })), + ); + return renderSpreadsheetHtml(sanitized); +} + +/* ============================================================================= + * PPTX → slide-list HTML + * ============================================================================= */ + +interface PptxSlide { + number: number; + title: string; + body: string[]; +} + +/** + * Stream `ppt/slides/slide*.xml` out of a PPTX buffer using yauzl. Mirrors + * the anti-zip-bomb pattern used by `extractOdtContentXml` in `crud.ts` — + * counts real decompressed bytes mid-inflate so a falsified central-directory + * `uncompressedSize` cannot bypass the cap. Returns slides in slide-number + * order; ignores everything else in the archive. + * + * Uses `yauzl.fromBuffer` (no disk I/O) — the safety pre-flight in + * `assertSafeZipSize` already proved the buffer is well-formed enough + * to walk, and fromBuffer keeps the hot path memory-only. + */ +function extractPptxSlideXml(buffer: Buffer): Promise> { + return new Promise((resolve, reject) => { + yauzl.fromBuffer(buffer, { lazyEntries: true }, (err, zipfile) => { + if (err) { + return reject(err); + } + if (!zipfile) { + return reject(new Error('Failed to open PPTX buffer')); + } + + let settled = false; + const slides: Array<{ number: number; xml: string }> = []; + const finish = (error: Error | null) => { + if (settled) { + return; + } + settled = true; + try { + zipfile.close(); + } catch { + /* zipfile.close() is best-effort — yauzl will throw if a + * stream is mid-flight. We've already settled the outer + * promise, so swallow this so the original error (if any) + * isn't replaced by a close-time exception. Mirrors + * `assertSafeZipSize`'s defensive pattern in zipSafety.ts. */ + } + if (error) { + reject(error); + } else { + slides.sort((a, b) => a.number - b.number); + resolve(slides); + } + }; + + zipfile.readEntry(); + zipfile.on('entry', (entry: yauzl.Entry) => { + const slideMatch = entry.fileName.match(/^ppt\/slides\/slide(\d+)\.xml$/); + if (!slideMatch) { + zipfile.readEntry(); + return; + } + if (slides.length >= PPTX_MAX_SLIDES) { + // Cap reached — drain remaining entries silently. + zipfile.readEntry(); + return; + } + const slideNumber = Number.parseInt(slideMatch[1], 10); + zipfile.openReadStream(entry, (streamErr, readStream) => { + if (streamErr || !readStream) { + return finish(streamErr ?? new Error('Failed to open slide stream')); + } + + let totalBytes = 0; + const chunks: Buffer[] = []; + + readStream.on('data', (chunk: Buffer) => { + totalBytes += chunk.byteLength; + if (totalBytes > PPTX_MAX_ENTRY_SIZE) { + readStream.destroy( + new Error( + `PPTX slide${slideNumber}.xml exceeds the ${PPTX_MAX_ENTRY_SIZE / megabyte}MB decompressed limit`, + ), + ); + return; + } + chunks.push(chunk); + }); + + readStream.on('end', () => { + slides.push({ number: slideNumber, xml: Buffer.concat(chunks).toString('utf-8') }); + zipfile.readEntry(); + }); + readStream.on('error', (readErr: Error) => finish(readErr)); + }); + }); + + zipfile.on('end', () => finish(null)); + zipfile.on('error', (zipErr: Error) => finish(zipErr)); + }); + }); +} + +/** + * Pull the visible text out of a slide XML. PPTX text lives in `` + * elements, grouped by `` (paragraph). We keep paragraph boundaries so + * the rendered slide preserves bullet/line structure. + * + * Pure regex (no DOMParser available in Node by default; the markup is + * tightly defined by the OOXML spec, so regex is robust enough for the text- + * only preview). + */ +function extractSlideText(xml: string): { title: string; body: string[] } { + const paragraphs: string[] = []; + const paragraphMatches = xml.matchAll(/]*>([\s\S]*?)<\/a:p>/g); + for (const match of paragraphMatches) { + const innerXml = match[1]; + const runs: string[] = []; + const runMatches = innerXml.matchAll(/]*>([\s\S]*?)<\/a:t>/g); + for (const run of runMatches) { + runs.push(decodeXmlEntities(run[1])); + } + const text = runs.join('').trim(); + if (text.length > 0) { + paragraphs.push(text); + } + } + const title = paragraphs[0] ?? ''; + const body = paragraphs.slice(1); + return { title, body }; +} + +const XML_ENTITIES: Record = { + '<': '<', + '>': '>', + '&': '&', + '"': '"', + ''': "'", +}; +function decodeXmlEntities(input: string): string { + return input.replace(/&(?:lt|gt|amp|quot|apos);/g, (m) => XML_ENTITIES[m] ?? m); +} + +/* CSS for the slide-list view. Extracted to a constant so it can also + * be inlined into the CDN doc when the slide-list is embedded as a + * fallback there — Codex feedback / manual e2e on PR #12934 ("pptx- + * preview created an empty wrapper for the pptxgenjs deck"). */ +const PPTX_SLIDE_LIST_CSS = ` +.lc-pptx-list { display: flex; flex-direction: column; gap: 16px; padding: 0; margin: 0; list-style: none; } +.lc-pptx-slide { + border: 1px solid var(--border); + border-radius: 8px; + padding: 16px 20px; + background: var(--bg); + position: relative; +} +.lc-pptx-slide-number { + display: inline-block; + font-size: 0.7rem; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--muted); + margin-bottom: 6px; +} +.lc-pptx-slide-title { font-size: 1.15rem; font-weight: 600; margin: 0 0 8px; } +.lc-pptx-slide-body { margin: 0; padding-left: 1.25em; } +.lc-pptx-slide-body li { margin: 0.2em 0; } +.lc-pptx-slide-empty { color: var(--muted); font-style: italic; margin: 0; } +`.trim(); + +/** + * Build the slide-list body HTML (just the `
    ` element, no document + * wrapper). Used both for standalone slide-list rendering and for + * embedding inside the CDN doc as an air-gap / parser-failure + * fallback — same pattern as the mammoth fallback inside the DOCX CDN + * doc. Returns an empty-state banner for zero-slide decks. + */ +function renderPptxSlidesBody(slides: PptxSlide[]): string { + if (slides.length === 0) { + return '

    This presentation contains no readable slides.

    '; + } + const items = slides + .map((slide) => { + const titleHtml = slide.title + ? `

    ${escapeHtml(slide.title)}

    ` + : ''; + const bodyHtml = + slide.body.length > 0 + ? `
      ${slide.body.map((line) => `
    • ${escapeHtml(line)}
    • `).join('')}
    ` + : !slide.title + ? '

    (empty slide)

    ' + : ''; + return `
  1. + Slide ${slide.number} + ${titleHtml} + ${bodyHtml} +
  2. `; + }) + .join('\n'); + return `
      ${items}
    `; +} + +function renderPptxSlidesHtml(slides: PptxSlide[]): string { + return wrapAsDocument(renderPptxSlidesBody(slides), PPTX_SLIDE_LIST_CSS); +} + +/** + * Convert a `.pptx` buffer to a slide-list HTML document — text-only + * preview rendering each slide as a card (slide number, title, body + * bullets). Honest about NOT preserving complex visual layouts, + * charts, theming, or embedded media. Used as the safe fallback path + * when the CDN renderer is disabled, the binary exceeds the embed + * size cap, or the high-fidelity path can't be reached. + * + * Pre-flights through `assertSafeZipSize` so a zip-bomb PPTX can't blow + * up the slide-XML extraction pass. + */ +export async function pptxToSlideListHtml(buffer: Buffer): Promise { + await assertSafeZipSize(buffer, { name: 'pptx' }); + const rawSlides = await extractPptxSlideXml(buffer); + const slides: PptxSlide[] = rawSlides.map(({ number, xml }) => { + const { title, body } = extractSlideText(xml); + return { number, title, body }; + }); + return renderPptxSlidesHtml(slides); +} + +/* ============================================================================= + * PPTX CDN renderer (high-fidelity path) + * + * Mirrors the DOCX CDN architecture: embeds the binary as base64 and + * lets `pptx-preview` (loaded from a pinned jsdelivr URL with SRI) do + * the rendering inside the Sandpack iframe. The library is ISC-licensed + * (npm package permits commercial use) and ships a UMD bundle that + * exposes a global `pptxPreview.init(container, options)` → + * `previewer.preview(arrayBuffer)` API per the package README. + * + * Trade-offs vs the slide-list path: + * + Far higher visual fidelity — preserves slide layouts, theme + * colors, fonts, basic shape positioning, embedded images. + * + No server-side rendering cost; iframe sandbox isolates parser + * bugs from the API process. + * − UMD bundle is ~1.36 MB (contains echarts, tslib, lodash, uuid, + * jszip). Loaded once per iframe lifetime; browser-cached after. + * − Renderer hasn't been broadly browser-tested by us across the + * full spectrum of decks. See PR #12934 commit notes — fallback + * paths cover the cases where we hit a class of files that don't + * render: client-side "Preview unavailable" UX (CDN unreachable + * or `typeof pptxPreview === 'undefined'`) and the + * `OFFICE_PREVIEW_DISABLE_CDN` env-var hatch (forces the slide- + * list server-side). + */ +const PPTX_PREVIEW_CDN = { + pptxPreview: { + src: 'https://cdn.jsdelivr.net/npm/pptx-preview@1.0.7/dist/pptx-preview.umd.js', + integrity: 'sha384-CwntHHT2FbwZXuCmbf6K93YaEB9xRVVLaqFJ7pdMykeQABb/3MA0sbt2lGbgi1Mr', + }, +} as const; + +/** + * Same 350 KB binary cap as DOCX — keeps the base64-inflated wrapped + * HTML under `MAX_TEXT_CACHE_BYTES` (512 KB) on `attachment.text`. + * PPTX files often exceed this once embedded media is involved; the + * dispatcher's slide-list fallback handles the larger cases. + */ +const MAX_PPTX_CDN_BINARY_BYTES = 350 * 1024; + +/** + * Build the CDN-rendered HTML document for a PPTX. Same wrapper shape + * and CSP as the DOCX equivalent — see `buildDocxCdnDocument` for the + * security-relevant rationale on script-src/connect-src/base-uri/etc. + * + * Slides need a fixed-size container for `pptx-preview` to render into + * — it computes per-slide layouts against the configured width/height + * rather than against the iframe's variable size. We use 960×540 + * (16:9 standard) and let CSS scale the rendered output to fit the + * iframe via `transform: scale(...)`. The slides scroll vertically + * once the renderer paints them. + */ +function buildPptxCdnDocument(base64: string, slideListFallbackBody: string): string { + /* PPTX-specific CSP relaxations vs DOCX: + * - `worker-src blob:` — pptx-preview's bundled echarts dep spins up + * Web Workers via blob: URLs for chart rendering. Without this, + * workers default to `default-src 'none'`, get blocked, and + * echarts throws an unhandled-rejection deep inside its async + * pipeline that we can't catch from the bootstrap script. The + * visible symptom is a black iframe with the renderer half- + * started. Allowing blob:-only workers is the minimum surface to + * unblock rendering without permitting arbitrary worker URLs. */ + /* `connect-src` allows fetches to: + * - `'self'`: the sandpack-static-server origin the iframe runs in + * (covers any same-origin fetches pptx-preview or its bundled + * echarts make at render time, plus same-origin sourcemap loads) + * - `https://cdn.jsdelivr.net`: where the renderer script came from + * (DevTools fetches `.min.js.map` from there to map minified + * line numbers; with `'none'` the console fills with CSP + * violations every time DevTools is open) + * + * Exfiltration risk is minimal: the iframe is cross-origin to the + * LibreChat host so an attacker can't read application data, and + * the only meaningful targets ('self' or jsdelivr) aren't useful + * for exfiltrating slide content to a host the attacker controls. */ + const csp = [ + "default-src 'none'", + "script-src https://cdn.jsdelivr.net 'unsafe-inline'", + "style-src 'unsafe-inline'", + 'worker-src blob:', + "img-src 'self' data: blob:", + 'font-src data:', + "connect-src 'self' https://cdn.jsdelivr.net", + "base-uri 'none'", + "form-action 'none'", + ].join('; '); + return ` + + + + + +Preview + + + + +
    Loading preview…
    + + + + +`; +} + +/** + * Render the slide-list body (just the `
      ` HTML) for a buffer. + * Used by `pptxToHtml` to embed as a fallback inside the CDN doc and + * by `pptxToSlideListHtmlInternal` for the standalone path. */ +async function renderPptxSlidesBodyForBuffer(buffer: Buffer): Promise { + const rawSlides = await extractPptxSlideXml(buffer); + const slides: PptxSlide[] = rawSlides.map(({ number, xml }) => { + const { title, body } = extractSlideText(xml); + return { number, title, body }; + }); + return renderPptxSlidesBody(slides); +} + +async function pptxToHtmlViaCdn(buffer: Buffer, slideListFallbackBody: string): Promise { + return buildPptxCdnDocument(buffer.toString('base64'), slideListFallbackBody); +} + +/** + * Convert a `.pptx` buffer to a sandboxed HTML document. Two render + * paths, chosen by file size and the `OFFICE_PREVIEW_DISABLE_CDN` + * escape hatch: + * + * 1. **CDN-rendered (default for files ≤ 350 KB binary)**: embeds + * the binary as base64 and lets `pptx-preview` render it inside + * the Sandpack iframe. High visual fidelity. + * 2. **Slide-list (fallback)**: server-side text-only extraction + * (each slide → card with title + bullets). Lower fidelity but + * compact, network-independent, and proven against arbitrary + * decks. Triggered when the binary exceeds the embed cap, when + * `OFFICE_PREVIEW_DISABLE_CDN=true` is set (air-gapped / + * filtered networks), or when the CDN script fails to load + * (handled iframe-side). + * + * Pre-flights through `assertSafeZipSize` so a zip-bomb PPTX is + * rejected before either renderer touches it. + */ +export async function pptxToHtml(buffer: Buffer): Promise { + await assertSafeZipSize(buffer, { name: 'pptx' }); + /* Opt-in LibreOffice path: PDF rendering of slides preserves layout, + * fonts, charts, and embedded objects far better than the slide-list + * fallback or even pptx-preview's canvas approach. Falls through on + * any error per the contract in `tryLibreOfficePreview`. */ + const lo = await tryLibreOfficePreview(buffer, 'pptx', OFFICE_HTML_OUTPUT_CAP); + if (lo) { + return lo; + } + if (isOfficePreviewCdnDisabled() || buffer.length > MAX_PPTX_CDN_BINARY_BYTES) { + return pptxToSlideListHtmlInternal(buffer); + } + /* Render the slide-list first so we can embed it inside the CDN doc + * as a runtime fallback. pptx-preview is unreliable on PPTXs not + * generated by Microsoft PowerPoint (e.g. pptxgenjs decks) — it + * silently produces empty wrappers. The iframe bootstrap detects + * the empty-render case and reveals this slide-list fallback so the + * user always gets readable content. Manual e2e on PR #12934. */ + const slideListBody = await renderPptxSlidesBodyForBuffer(buffer); + const cdnDoc = await pptxToHtmlViaCdn(buffer, slideListBody); + /* Combined size budget: if base64 binary + slide-list fallback + + * wrapper would exceed the cache cap, drop CDN entirely and ship + * the slide-list standalone. Same pattern as the DOCX dispatcher's + * size budget. */ + if (Buffer.byteLength(cdnDoc, 'utf-8') > OFFICE_HTML_OUTPUT_CAP) { + return pptxToSlideListHtmlInternal(buffer); + } + return cdnDoc; +} + +/** + * Slide-list path that skips its own `assertSafeZipSize` call — used + * by `pptxToHtml` after the dispatcher has already validated the + * buffer. `pptxToSlideListHtml` (public, validating) stays the entry + * point for tests and direct callers. + */ +async function pptxToSlideListHtmlInternal(buffer: Buffer): Promise { + const rawSlides = await extractPptxSlideXml(buffer); + const slides: PptxSlide[] = rawSlides.map(({ number, xml }) => { + const { title, body } = extractSlideText(xml); + return { number, title, body }; + }); + return renderPptxSlidesHtml(slides); +} + +/** + * Test-only re-exports. Each path has distinct visual-fidelity and + * size characteristics that warrant direct testing rather than + * round-tripping through the size-based dispatcher with synthetically + * padded fixtures. Not part of the public API — callers in production + * code should always go through `wordDocToHtml` / `pptxToHtml`. + */ +export const _internal = { + wordDocToHtmlViaCdn, + wordDocToHtmlViaMammoth, + MAX_DOCX_CDN_BINARY_BYTES, + OFFICE_HTML_OUTPUT_CAP, + pptxToHtmlViaCdn, + MAX_PPTX_CDN_BINARY_BYTES, +}; + +/* ============================================================================= + * Dispatcher + * ============================================================================= */ + +const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; +const PPTX_MIME = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'; +const ODS_MIME = 'application/vnd.oasis.opendocument.spreadsheet'; +const CSV_MIME_PATTERN = /^(text\/csv|application\/csv|text\/comma-separated-values)$/i; + +function extensionOf(name: string): string { + const dot = name.lastIndexOf('.'); + if (dot < 0 || dot === name.length - 1) { + return ''; + } + return name.slice(dot + 1).toLowerCase(); +} + +/** + * Strip MIME parameters (`; charset=utf-8`, `; boundary=...`) and + * lowercase the result so exact-match MIME comparisons survive servers + * and tool sandboxes that decorate Content-Type headers. Without this, + * an extensionless `text/csv; charset=utf-8` would slip past the bucket + * predicate (returning null) while the client's MIME router — which + * already strips parameters via `baseMime` — would route it to + * SPREADSHEET expecting an HTML body that the backend never produced. + * Mirrors `client/src/utils/artifacts.ts:baseMime`. + */ +function baseMime(mime: string): string { + if (!mime) { + return ''; + } + const semi = mime.indexOf(';'); + return (semi < 0 ? mime : mime.slice(0, semi)).trim().toLowerCase(); +} + +/** + * The four buckets the dispatcher can route to. Used by both + * `bufferToOfficeHtml` (which actually renders) and `officeHtmlBucket` + * (which the upstream gate in `extract.ts` calls to decide whether to + * invoke the dispatcher at all). Sharing one source of truth prevents the + * gate from going out of sync with the dispatcher — a previous version + * had a narrower extension-only gate that missed extensionless office + * files identified solely by MIME (e.g. a tool emitting `data` with + * `text/csv`), which then routed to the SPREADSHEET bucket on the + * client expecting full HTML and got raw text instead. + */ +export type OfficeHtmlBucket = 'docx' | 'spreadsheet' | 'csv' | 'pptx'; + +const OFFICE_EXTENSIONS: Record = { + docx: 'docx', + csv: 'csv', + xlsx: 'spreadsheet', + xls: 'spreadsheet', + ods: 'spreadsheet', + pptx: 'pptx', +}; + +/** + * Classify a file by extension OR MIME into the bucket the office HTML + * dispatcher would route it to, or `null` if no producer applies. + * + * **Extension always wins** over MIME for any known office extension. + * Without this precedence, a `deck.pptx` whose `Content-Type` was + * mislabeled `text/csv` (a known footgun when tool sandboxes ship + * generic MIMEs) would be routed to `csvToHtml` and try to parse a + * binary PPTX as CSV — yielding garbage at best, a parse exception at + * worst. + * + * **MIME fallback fires ONLY when the extension is empty.** This + * mirrors the client's `detectArtifactTypeFromFile` precedence: the + * client routes any file with a known non-office extension (`.txt`, + * `.md`, `.json`, etc.) to its non-office bucket REGARDLESS of MIME. + * If the server produced office HTML for `.txt + DOCX MIME`, the + * client would route by extension to PLAIN_TEXT, escape the HTML + * through the markdown viewer, and the user would see raw `...` + * markup instead of the rendered preview. Codex P2 review on PR + * #12934 — symmetric server/client extension precedence is the only + * way to avoid this kind of mismatch without a shared routing table. + * + * Trade-off: a true DOCX renamed to `myfile.bin` + DOCX MIME no longer + * routes through office HTML on the server (it falls to the existing + * extract path → `textFormat: 'text'` → client renders raw bytes via + * PLAIN_TEXT). Acceptable: extension-vs-MIME mismatch is user error, + * and the previous behavior would have surfaced as a security-gate + * fall-through to PLAIN_TEXT on the client anyway (no preview either + * way; the new behavior just avoids producing HTML the client can't + * use). + */ +export function officeHtmlBucket(name: string, mimeType: string): OfficeHtmlBucket | null { + const ext = extensionOf(name); + const byExtension = OFFICE_EXTENSIONS[ext]; + if (byExtension) { + return byExtension; + } + if (ext !== '') { + /* Non-empty non-office extension wins — see contract above. */ + return null; + } + /* MIME fallback for genuinely extensionless files (tool-emitted + * blobs with generic names, etc.). Strip parameters first — + * Content-Type headers routinely carry `; charset=utf-8` or + * `; boundary=...` decorations that would otherwise fail exact- + * match comparisons. */ + const normalized = baseMime(mimeType); + if (normalized === DOCX_MIME) { + return 'docx'; + } + if (CSV_MIME_PATTERN.test(normalized)) { + return 'csv'; + } + if (excelMimeTypes.test(normalized) || normalized === ODS_MIME) { + return 'spreadsheet'; + } + if (normalized === PPTX_MIME) { + return 'pptx'; + } + return null; +} + +/** + * Route an office-format buffer to the matching HTML producer. Returns `null` + * if the file isn't a recognized office type — caller should fall through to + * its existing text/binary handling. + */ +export async function bufferToOfficeHtml( + buffer: Buffer, + name: string, + mimeType: string, +): Promise { + const bucket = officeHtmlBucket(name, mimeType); + switch (bucket) { + case 'docx': + return wordDocToHtml(buffer); + case 'csv': + return csvToHtml(buffer); + case 'spreadsheet': + return excelSheetToHtml(buffer); + case 'pptx': + return pptxToHtml(buffer); + default: + return null; + } +} diff --git a/packages/api/src/files/documents/libreoffice.spec.ts b/packages/api/src/files/documents/libreoffice.spec.ts new file mode 100644 index 000000000000..449ade21bc11 --- /dev/null +++ b/packages/api/src/files/documents/libreoffice.spec.ts @@ -0,0 +1,385 @@ +import { spawnSync } from 'child_process'; +import * as fs from 'fs'; +import * as path from 'path'; +import { + _resetLibreOfficeProbeCache, + buildPdfEmbedDocument, + convertOfficeToPdf, + isLibreOfficeEnabled, + isLibreOfficeEnabledFor, + LibreOfficeConversionError, + LibreOfficeUnavailableError, + LIBREOFFICE_TIMEOUT_MS, + MAX_LIBREOFFICE_PDF_BYTES, + probeLibreOfficeBinary, + tryLibreOfficePreview, +} from './libreoffice'; + +/* Detect whether the host has a LibreOffice binary so the integration + * tests below can conditionally engage. Most CI runners don't have + * LibreOffice installed (it's a 500 MB dependency); we still exercise + * the env-gating, the wrapper builder, and the failure-fallthrough + * contract in those environments. */ +function hasLibreOfficeOnPath(): boolean { + for (const candidate of ['soffice', 'libreoffice']) { + const result = spawnSync(candidate, ['--version'], { stdio: 'ignore' }); + if (result.status === 0) { + return true; + } + } + return false; +} +const LIBREOFFICE_INSTALLED = hasLibreOfficeOnPath(); +const itIfLibreOffice = LIBREOFFICE_INSTALLED ? it : it.skip; + +/* Fixtures sit next to the source files (matches `html.spec.ts` resolution). */ +const FIXTURES_DIR = __dirname; +function readFixture(name: string): Buffer { + return fs.readFileSync(path.join(FIXTURES_DIR, name)); +} + +describe('libreoffice (env gating + wrapper)', () => { + const ORIGINAL_FLAG = process.env.OFFICE_PREVIEW_LIBREOFFICE; + + afterEach(() => { + _resetLibreOfficeProbeCache(); + if (ORIGINAL_FLAG === undefined) { + delete process.env.OFFICE_PREVIEW_LIBREOFFICE; + } else { + process.env.OFFICE_PREVIEW_LIBREOFFICE = ORIGINAL_FLAG; + } + }); + + describe('isLibreOfficeEnabled (any-format check)', () => { + it.each([['true'], ['1'], ['yes'], ['TRUE'], ['Yes'], [' true ']])( + 'returns true for %j (truthy = all formats enabled)', + (value) => { + process.env.OFFICE_PREVIEW_LIBREOFFICE = value; + expect(isLibreOfficeEnabled()).toBe(true); + }, + ); + + it.each([['false'], ['0'], ['no'], ['']])('returns false for %j', (value) => { + process.env.OFFICE_PREVIEW_LIBREOFFICE = value; + expect(isLibreOfficeEnabled()).toBe(false); + }); + + it('returns false when the env var is unset', () => { + delete process.env.OFFICE_PREVIEW_LIBREOFFICE; + expect(isLibreOfficeEnabled()).toBe(false); + }); + + it('returns true for a comma-separated format list (any format counts)', () => { + process.env.OFFICE_PREVIEW_LIBREOFFICE = 'pptx'; + expect(isLibreOfficeEnabled()).toBe(true); + process.env.OFFICE_PREVIEW_LIBREOFFICE = 'pptx,docx'; + expect(isLibreOfficeEnabled()).toBe(true); + }); + }); + + describe('isLibreOfficeEnabledFor (per-format opt-in)', () => { + /* Per-format gating lets operators trade off cold-start latency + * against fidelity per format. DOCX renders ~instantly via + * docx-preview, but PPTX needs LibreOffice for any reasonable + * fidelity (pptx-preview chokes on pptxgenjs decks). Operators + * should be able to enable LibreOffice for PPTX only. */ + it.each([ + ['true', 'pptx', true], + ['true', 'docx', true], + ['true', 'xlsx', true], + ['1', 'pptx', true], + ['yes', 'docx', true], + ['false', 'pptx', false], + ['', 'pptx', false], + ['pptx', 'pptx', true], + ['pptx', 'docx', false], + ['docx', 'pptx', false], + ['docx', 'docx', true], + ['pptx,docx', 'pptx', true], + ['pptx,docx', 'docx', true], + ['pptx,docx', 'xlsx', false], + [' pptx , docx ', 'pptx', true], // whitespace-tolerant + ['PPTX', 'pptx', true], // case-insensitive + ['pptx, ,docx', 'docx', true], // empty list entries dropped + ['pptx, ,docx', 'pptx', true], + ['pptx, ,docx', 'xlsx', false], + ])('env=%j format=%j → %s', (envValue, format, expected) => { + process.env.OFFICE_PREVIEW_LIBREOFFICE = envValue; + expect(isLibreOfficeEnabledFor(format)).toBe(expected); + }); + + it('returns false for any format when the env var is unset', () => { + delete process.env.OFFICE_PREVIEW_LIBREOFFICE; + expect(isLibreOfficeEnabledFor('pptx')).toBe(false); + expect(isLibreOfficeEnabledFor('docx')).toBe(false); + expect(isLibreOfficeEnabledFor('xlsx')).toBe(false); + }); + + it('matches format names case-insensitively', () => { + process.env.OFFICE_PREVIEW_LIBREOFFICE = 'pptx'; + expect(isLibreOfficeEnabledFor('PPTX')).toBe(true); + expect(isLibreOfficeEnabledFor('Pptx')).toBe(true); + }); + }); + + describe('buildPdfEmbedDocument', () => { + /* The wrapper structure is the security-relevant surface. These + * tests lock down the CSP, the iframe shape, and the fallback + * contract — independent of whether LibreOffice is actually + * installed. Same posture as the docx-preview / pptx-preview CDN + * wrappers. */ + const FAKE_PDF_B64 = 'JVBERi0xLjQK'; // "%PDF-1.4\n" base64 + + it('emits a complete sandboxed HTML document that renders via pdf.js to canvas', () => { + const html = buildPdfEmbedDocument(FAKE_PDF_B64); + expect(html).toMatch(/^/); + expect(html).toContain('Preview'); + /* PDF bytes embedded as a base64 data block — pdf.js decodes + * them at runtime and renders to canvas. We do NOT use any + * `