Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"@librechat/api": "*",
"@librechat/data-schemas": "*",
"@microsoft/microsoft-graph-client": "^3.0.7",
"@modelcontextprotocol/sdk": "^1.27.1",
"@modelcontextprotocol/sdk": "^1.29.0",
"@node-saml/passport-saml": "^5.1.0",
"@smithy/node-http-handler": "^4.4.5",
"ai-tokenizer": "^1.0.6",
Expand Down
34 changes: 34 additions & 0 deletions api/server/controllers/agents/v1.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ const {
collectEdgeAgentIds,
mergeAgentOcrConversion,
MAX_AVATAR_REFRESH_AGENTS,
collectToolResourceFileIds,
convertOcrToContextInPlace,
stripFileIdsFromToolResources,
} = require('@librechat/api');
const {
Time,
Expand Down Expand Up @@ -387,6 +389,38 @@ const updateAgentHandler = async (req, res) => {
updateData.tools = ocrConversion.tools;
}

/*
* Strip orphaned file_id stubs from the incoming payload (see issue #12776).
* Scoped to updates that actually touch tool_resources: if the save does not
* modify that field, the delete-time cleanup in processDeleteRequest and the
* one-off migration already cover pre-existing corruption, so there's no
* reason to pay an extra DB round-trip here. Wrapped in try/catch so a
* transient failure in this integrity check never turns a good save into 500.
*/
if (updateData.tool_resources) {
try {
const referencedFileIds = collectToolResourceFileIds(updateData.tool_resources);
if (referencedFileIds.length > 0) {
const existingFiles = await db.getFiles({ file_id: { $in: referencedFileIds } }, null, {
file_id: 1,
});
const existingIds = new Set((existingFiles ?? []).map((f) => f.file_id));
const orphans = referencedFileIds.filter((id) => !existingIds.has(id));
if (orphans.length > 0) {
logger.warn(
`[/Agents/:id] Pruning ${orphans.length} orphaned file reference(s) from agent ${id}`,
);
stripFileIdsFromToolResources(updateData.tool_resources, orphans);
}
}
} catch (orphanCheckError) {
logger.warn(
'[/Agents/:id] Orphan file check failed, skipping cleanup for this request',
orphanCheckError,
);
}
}

if (updateData.tools) {
const existingToolSet = new Set(existingAgent.tools ?? []);
const newMCPTools = updateData.tools.filter(
Expand Down
109 changes: 108 additions & 1 deletion api/server/controllers/agents/v1.spec.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const mongoose = require('mongoose');
const { nanoid } = require('nanoid');
const { v4: uuidv4 } = require('uuid');
const { agentSchema } = require('@librechat/data-schemas');
const { agentSchema, fileSchema } = require('@librechat/data-schemas');
const { FileSources, PermissionBits } = require('librechat-data-provider');
const { MongoMemoryServer } = require('mongodb-memory-server');

Expand Down Expand Up @@ -99,6 +99,9 @@ describe('Agent Controllers - Mass Assignment Protection', () => {
const mongoUri = mongoServer.getUri();
await mongoose.connect(mongoUri);
Agent = mongoose.models.Agent || mongoose.model('Agent', agentSchema);
// Register File so orphan-pruning tests (and the tool_resources validation
// test, which now needs real File docs for its ids) have a working model.
mongoose.models.File || mongoose.model('File', fileSchema);
}, 20000);

afterAll(async () => {
Expand Down Expand Up @@ -542,6 +545,23 @@ describe('Agent Controllers - Mass Assignment Protection', () => {
});

test('should validate tool_resources in updates', async () => {
// Back these ids with real File docs so the orphan-pruning added for
// issue #12776 does not strip them — this test is about OCR conversion
// and schema filtering, not file existence.
const File = mongoose.models.File;
for (const id of ['ocr1', 'ocr2', 'img1']) {
await File.create({
file_id: id,
user: existingAgentAuthorId,
filename: `${id}.txt`,
filepath: `/tmp/${id}`,
object: 'file',
type: 'text/plain',
bytes: 1,
source: FileSources.local,
});
}

mockReq.user.id = existingAgentAuthorId.toString();
mockReq.params.id = existingAgentId;
mockReq.body = {
Expand Down Expand Up @@ -729,6 +749,93 @@ describe('Agent Controllers - Mass Assignment Protection', () => {
}),
);
});

describe('orphan file_id pruning (issue #12776)', () => {
const File = () => mongoose.models.File;

const createFileDoc = async (file_id, userId) =>
File().create({
file_id,
user: userId,
filename: `${file_id}.txt`,
filepath: `/tmp/${file_id}`,
object: 'file',
type: 'text/plain',
bytes: 1,
source: FileSources.local,
});

beforeEach(async () => {
await File().deleteMany({});
});

test('strips orphan file_ids from incoming tool_resources before persisting', async () => {
const keeper = `file_${uuidv4()}`;
const orphan = `file_${uuidv4()}`;
await createFileDoc(keeper, existingAgentAuthorId);

mockReq.user.id = existingAgentAuthorId.toString();
mockReq.params.id = existingAgentId;
mockReq.body = {
tool_resources: {
file_search: { file_ids: [keeper, orphan] },
},
};

await updateAgentHandler(mockReq, mockRes);

const agentInDb = await Agent.findOne({ id: existingAgentId }).lean();
expect(agentInDb.tool_resources.file_search.file_ids).toEqual([keeper]);
});

test('leaves tool_resources alone when the update omits it', async () => {
const orphan = `file_${uuidv4()}`;
await Agent.updateOne(
{ id: existingAgentId },
{ $set: { tool_resources: { file_search: { file_ids: [orphan] } } } },
);

mockReq.user.id = existingAgentAuthorId.toString();
mockReq.params.id = existingAgentId;
mockReq.body = { name: 'Unrelated Rename' };

await updateAgentHandler(mockReq, mockRes);

const agentInDb = await Agent.findOne({ id: existingAgentId }).lean();
expect(agentInDb.name).toBe('Unrelated Rename');
// Save-time pruning is intentionally scoped to tool_resources updates.
// The delete-time fix and migration script cover the untouched case.
expect(agentInDb.tool_resources.file_search.file_ids).toEqual([orphan]);
});

test('swallows errors from the file-existence check and still completes the save', async () => {
const db = require('~/models');
const originalGetFiles = db.getFiles;
db.getFiles = jest.fn().mockRejectedValue(new Error('transient DB error'));

const orphan = `file_${uuidv4()}`;
mockReq.user.id = existingAgentAuthorId.toString();
mockReq.params.id = existingAgentId;
mockReq.body = {
name: 'Save Succeeds',
tool_resources: { file_search: { file_ids: [orphan] } },
};

try {
await updateAgentHandler(mockReq, mockRes);

expect(mockRes.status).not.toHaveBeenCalledWith(500);
expect(mockRes.json).toHaveBeenCalled();
const agentInDb = await Agent.findOne({ id: existingAgentId }).lean();
expect(agentInDb.name).toBe('Save Succeeds');
// Cleanup skipped on error, so the id remains — the delete-time path
// or the next successful save will reconcile it.
expect(agentInDb.tool_resources.file_search.file_ids).toEqual([orphan]);
} finally {
db.getFiles = originalGetFiles;
}
});
});
});

describe('Mass Assignment Attack Scenarios', () => {
Expand Down
203 changes: 203 additions & 0 deletions api/server/services/Files/process.integration.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
/**
* Integration test for the delete-time path of issue #12776.
*
* Covers the full flow through `processDeleteRequest`:
* 1. Real Agent + File docs in an in-memory Mongo.
* 2. Invoke the delete service.
* 3. Assert both the File record is gone and every agent's
* tool_resources.*.file_ids no longer references the deleted id.
*
* Uses FileSources.text so the strategy layer (disk / S3 / OpenAI) is a
* no-op — we don't need real filesystem access to exercise the agent
* reference cleanup, which is what issue #12776 is about.
*/

const mongoose = require('mongoose');
const { MongoMemoryServer } = require('mongodb-memory-server');
const { agentSchema, fileSchema, createMethods } = require('@librechat/data-schemas');
const { FileSources } = require('librechat-data-provider');

jest.mock('@librechat/data-schemas', () => {
const actual = jest.requireActual('@librechat/data-schemas');
return {
...actual,
logger: { warn: jest.fn(), debug: jest.fn(), error: jest.fn(), info: jest.fn() },
};
});

jest.mock('@librechat/agents', () => ({
EnvVar: { CODE_API_KEY: 'CODE_API_KEY' },
}));

jest.mock('@librechat/api', () => ({
sanitizeFilename: jest.fn((n) => n),
parseText: jest.fn().mockResolvedValue({ text: '', bytes: 0 }),
processAudioFile: jest.fn(),
}));

jest.mock('~/server/controllers/assistants/v2', () => ({
addResourceFileId: jest.fn(),
deleteResourceFileId: jest.fn(),
}));

jest.mock('~/server/controllers/assistants/helpers', () => ({
getOpenAIClient: jest.fn(),
}));

jest.mock('~/server/services/Tools/credentials', () => ({
loadAuthValues: jest.fn(),
}));

jest.mock('~/server/services/Files/strategies', () => ({
getStrategyFunctions: jest.fn(() => ({ deleteFile: jest.fn().mockResolvedValue(undefined) })),
}));

jest.mock('~/server/services/Files/Audio/STTService', () => ({
STTService: { getInstance: jest.fn() },
}));

jest.mock('~/server/services/Config', () => ({
checkCapability: jest.fn().mockResolvedValue(true),
}));

jest.mock('~/cache', () => ({
getLogStores: jest.fn(() => ({ get: jest.fn(), set: jest.fn(), delete: jest.fn() })),
}));

// Replace the mocked `~/models` from the sibling process.spec.js with real,
// mongoose-backed methods. All our in-memory models share this module.
jest.mock('~/models', () => {
const mongoose = require('mongoose');
const { createMethods } = require('@librechat/data-schemas');
return createMethods(mongoose, {
removeAllPermissions: jest.fn().mockResolvedValue(undefined),
});
});

require('module-alias/register');
const { processDeleteRequest } = require('./process');

describe('processDeleteRequest — agent reference cleanup (issue #12776)', () => {
let mongoServer;
let Agent;
let File;

beforeAll(async () => {
mongoServer = await MongoMemoryServer.create();
await mongoose.connect(mongoServer.getUri());

// createMethods (via ~/models) registers the File model as a side-effect,
// but we also need the Agent model registered before any queries run.
Agent = mongoose.models.Agent || mongoose.model('Agent', agentSchema);
File = mongoose.models.File || mongoose.model('File', fileSchema);
// Touch createMethods once so the migration/setup side-effects run.
createMethods(mongoose, { removeAllPermissions: jest.fn() });
}, 30000);

afterAll(async () => {
await mongoose.disconnect();
await mongoServer.stop();
});

beforeEach(async () => {
await Agent.deleteMany({});
await File.deleteMany({});
});

const seedFile = async (file_id, userId) =>
File.create({
file_id,
user: userId,
filename: `${file_id}.txt`,
filepath: `/tmp/${file_id}`,
object: 'file',
type: 'text/plain',
bytes: 1,
source: FileSources.text,
});

const seedAgent = async (authorId, tool_resources) =>
Agent.create({
id: `agent_${Math.random().toString(36).slice(2, 10)}`,
name: 'Integration Test Agent',
provider: 'test',
model: 'test-model',
author: authorId,
tool_resources,
});

const buildReq = (fileDocs, extraBody = {}) => ({
user: { id: fileDocs[0].user.toString() },
body: { files: fileDocs, ...extraBody },
config: { fileStrategy: 'local', fileConfig: {}, endpoints: {} },
});

test('strips deleted file_ids from every agent that referenced them', async () => {
const userId = new mongoose.Types.ObjectId();
const keeperId = `file_keeper_${Math.random().toString(36).slice(2, 10)}`;
const deletedId = `file_deleted_${Math.random().toString(36).slice(2, 10)}`;

const deletedFile = await seedFile(deletedId, userId);
await seedFile(keeperId, userId);

// Two agents both reference the file that's about to be deleted, plus the
// keeper. A third, unrelated agent has a different file_id and must not be
// touched by the cleanup.
const agentA = await seedAgent(userId, {
file_search: { file_ids: [deletedId, keeperId] },
});
const agentB = await seedAgent(userId, {
execute_code: { file_ids: [deletedId] },
});
const untouchedAgent = await seedAgent(userId, {
context: { file_ids: [keeperId] },
});

await processDeleteRequest({ req: buildReq([deletedFile.toObject()]), files: [deletedFile] });

expect(await File.findOne({ file_id: deletedId })).toBeNull();
expect(await File.findOne({ file_id: keeperId })).not.toBeNull();

const updatedA = await Agent.findOne({ id: agentA.id }).lean();
const updatedB = await Agent.findOne({ id: agentB.id }).lean();
const updatedUntouched = await Agent.findOne({ id: untouchedAgent.id }).lean();

expect(updatedA.tool_resources.file_search.file_ids).toEqual([keeperId]);
expect(updatedB.tool_resources.execute_code.file_ids).toEqual([]);
expect(updatedUntouched.tool_resources.context.file_ids).toEqual([keeperId]);
});

test('is a no-op when no agent references the deleted file', async () => {
const userId = new mongoose.Types.ObjectId();
const loneId = `file_lone_${Math.random().toString(36).slice(2, 10)}`;
const loneFile = await seedFile(loneId, userId);
const unrelatedAgent = await seedAgent(userId, {
file_search: { file_ids: ['other_id'] },
});

await processDeleteRequest({ req: buildReq([loneFile.toObject()]), files: [loneFile] });

expect(await File.findOne({ file_id: loneId })).toBeNull();
const after = await Agent.findOne({ id: unrelatedAgent.id }).lean();
expect(after.tool_resources.file_search.file_ids).toEqual(['other_id']);
});

test('still deletes the file when the agent cleanup step throws', async () => {
const userId = new mongoose.Types.ObjectId();
const targetId = `file_target_${Math.random().toString(36).slice(2, 10)}`;
const targetFile = await seedFile(targetId, userId);

const db = require('~/models');
const original = db.removeAgentResourceFilesFromAllAgents;
db.removeAgentResourceFilesFromAllAgents = jest
.fn()
.mockRejectedValue(new Error('simulated cleanup failure'));

try {
await processDeleteRequest({ req: buildReq([targetFile.toObject()]), files: [targetFile] });
expect(await File.findOne({ file_id: targetId })).toBeNull();
} finally {
db.removeAgentResourceFilesFromAllAgents = original;
}
});
});
Loading
Loading