Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions src/tools/BashExecutor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
import type * as t from '@/types';
import {
BASH_SHELL_GUIDANCE,
CODE_ARTIFACT_PATH_GUIDANCE,
appendCodeSessionFileSummary,
emptyOutputMessage,
buildCodeApiHttpErrorMessage,
getCodeBaseURL,
Expand All @@ -23,8 +26,9 @@ export const BashExecutionToolSchema = {
type: 'string',
description: `The bash command or script to execute.
- The environment is stateless; variables and state don't persist between executions.
- Generated files from previous executions are automatically available in "/mnt/data/".
- Files from previous executions are automatically available and can be modified in place.
- Prior /mnt/data files are available and can be modified in place.
- ${CODE_ARTIFACT_PATH_GUIDANCE}
- ${BASH_SHELL_GUIDANCE}
- Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
- Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
- IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
Expand All @@ -46,6 +50,8 @@ Runs bash commands and returns stdout/stderr output from a stateless execution e
Usage:
- No network access available.
- Generated files are automatically delivered; **DO NOT** provide download links.
- ${CODE_ARTIFACT_PATH_GUIDANCE}
- ${BASH_SHELL_GUIDANCE}
- NEVER use this tool to execute malicious commands.
`.trim();

Expand Down Expand Up @@ -166,11 +172,6 @@ function createBashExecutionTool(
}

const result: t.ExecuteResult = await response.json();
/* See `CodeExecutor.ts` — file listings were removed from the
* LLM-facing tool result. Bash especially benefits: models
* naturally `ls /mnt/data/` to discover what's available
* rather than relying on a prescriptive summary that
* misleads as often as it helps. */
let formattedOutput = '';
if (result.stdout) {
formattedOutput += `stdout:\n${result.stdout}\n`;
Expand All @@ -181,7 +182,7 @@ function createBashExecutionTool(

const hasFiles = result.files != null && result.files.length > 0;
return [
formattedOutput.trim(),
appendCodeSessionFileSummary(formattedOutput, result.files),
(hasFiles
? { session_id: result.session_id, files: result.files }
: {
Expand Down
12 changes: 9 additions & 3 deletions src/tools/BashProgrammaticToolCalling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ import {
executeTools,
formatCompletedResponse,
} from './ProgrammaticToolCalling';
import { getCodeBaseURL } from './CodeExecutor';
import {
BASH_SHELL_GUIDANCE,
CODE_ARTIFACT_PATH_GUIDANCE,
getCodeBaseURL,
} from './CodeExecutor';
import {
clampCodeApiRunTimeoutMs,
createCodeApiRunTimeoutSchema,
Expand Down Expand Up @@ -62,11 +66,13 @@ You MUST complete your entire workflow in ONE code block.
DO NOT split work across multiple calls expecting to reuse variables.`;

const CORE_RULES = `Rules:
- EVERYTHING in one call—no state persists between executions
- One call: state does not persist
- Tools are pre-defined as bash functions—DO NOT redefine them
- Each tool function accepts a JSON string argument
- Tool stdout is JSON, not raw text; use jq -r . for strings, jq -r .field for objects
- Only echo/printf output returns to the model
- Generated files are automatically available in /mnt/data/ for subsequent executions
- ${CODE_ARTIFACT_PATH_GUIDANCE}
- ${BASH_SHELL_GUIDANCE}
- timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;

const ADDITIONAL_RULES =
Expand Down
28 changes: 17 additions & 11 deletions src/tools/CodeExecutor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
import { getEnvironmentVariable } from '@langchain/core/utils/env';
import type * as t from '@/types';
import { appendCodeSessionFileSummary } from '@/tools/CodeSessionFileSummary';
import { EnvVar, Constants } from '@/common';

export {
appendCodeSessionFileSummary,
stripCodeSessionFileSummary,
} from '@/tools/CodeSessionFileSummary';

config();

export const getCodeBaseURL = (): string =>
Expand All @@ -15,6 +21,12 @@ export const getCodeBaseURL = (): string =>
export const emptyOutputMessage =
'stdout: Empty. Ensure you\'re writing output explicitly.\n';

export const CODE_ARTIFACT_PATH_GUIDANCE =
'Persist artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); `/tmp` and odd extensions are same-call scratch.';

export const BASH_SHELL_GUIDANCE =
'Bash: multi-line files use heredoc/printf; run Python via python3 -c/heredoc, not bare Python.';

const SUPPORTED_LANGUAGES = [
'py',
'js',
Expand Down Expand Up @@ -44,8 +56,8 @@ export const CodeExecutionToolSchema = {
type: 'string',
description: `The complete, self-contained code to execute, without any truncation or minimization.
- The environment is stateless; variables and imports don't persist between executions.
- Generated files from previous executions are automatically available in "/mnt/data/".
- Files from previous executions are automatically available and can be modified in place.
- Prior /mnt/data files are available and can be modified in place.
- ${CODE_ARTIFACT_PATH_GUIDANCE}
- Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
- Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
- IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
Expand Down Expand Up @@ -104,6 +116,7 @@ Runs code and returns stdout/stderr output from a stateless execution environmen
Usage:
- No network access available.
- Generated files are automatically delivered; **DO NOT** provide download links.
- ${CODE_ARTIFACT_PATH_GUIDANCE}
- NEVER use this tool to execute malicious code.
`.trim();

Expand All @@ -116,7 +129,7 @@ export const CodeExecutionToolDefinition = {
} as const;

function createCodeExecutionTool(
params: t.CodeExecutionToolParams = {}
params: t.CodeExecutionToolParams | null = {}
): DynamicStructuredTool {
return tool(
async (rawInput, config) => {
Expand Down Expand Up @@ -187,13 +200,6 @@ function createCodeExecutionTool(
}

const result: t.ExecuteResult = await response.json();
/* Output is stdout/stderr only — file listings were removed
* because the LLM-facing summary (split inherited/generated
* with prescriptive notes) caused more confusion than help,
* especially for bash where models naturally explore
* `/mnt/data/` themselves. The artifact still carries every
* file so the host's session map stays in sync; the LLM
* doesn't see them in the tool result text. */
let formattedOutput = '';
if (result.stdout) {
formattedOutput += `stdout:\n${result.stdout}\n`;
Expand All @@ -204,7 +210,7 @@ function createCodeExecutionTool(

const hasFiles = result.files != null && result.files.length > 0;
return [
formattedOutput.trim(),
appendCodeSessionFileSummary(formattedOutput, result.files),
(hasFiles
? { session_id: result.session_id, files: result.files }
: {
Expand Down
80 changes: 80 additions & 0 deletions src/tools/CodeSessionFileSummary.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import type * as t from '@/types';

const IMAGE_FILE_EXTENSIONS = new Set([
'.avif',
'.bmp',
'.gif',
'.ico',
'.jpeg',
'.jpg',
'.png',
'.tif',
'.tiff',
'.webp',
]);

const CODE_SESSION_FILE_SUMMARY_PATTERN =
/^Generated files:\nSession files: \d+ persisted file\(s\) are available in \/mnt\/data, including \d+ image\(s\)\. Use known \/mnt\/data paths directly in later code-tool calls\. The app displays files\/images automatically; do not invent download links or wrap generated images in Markdown\.$/;

function getFileExtension(name: string): string {
const lastSlash = name.lastIndexOf('/');
const basename = lastSlash >= 0 ? name.slice(lastSlash + 1) : name;
const lastDot = basename.lastIndexOf('.');
return lastDot >= 0 ? basename.slice(lastDot).toLowerCase() : '';
}

function isImageFile(file: Partial<t.FileRef> | null | undefined): boolean {
const name = file?.name;
return (
typeof name === 'string' &&
IMAGE_FILE_EXTENSIONS.has(getFileExtension(name))
);
}

function buildCodeSessionFileSummary(
fileCount: number,
imageCount: number
): string {
return (
'Generated files:\n' +
`Session files: ${fileCount} persisted file(s) are available in /mnt/data, including ${imageCount} image(s). ` +
'Use known /mnt/data paths directly in later code-tool calls. ' +
'The app displays files/images automatically; do not invent download links or wrap generated images in Markdown.'
);
}

function isGeneratedFile(file: Partial<t.FileRef> | null | undefined): boolean {
return file?.inherited !== true;
}

export function stripCodeSessionFileSummary(output: string): string {
const summaryStart = output.lastIndexOf('Generated files:');
if (summaryStart < 0) return output;
const beforeSummary = output.slice(0, summaryStart);
if (beforeSummary !== '' && !beforeSummary.endsWith('\n\n')) return output;
const maybeSummary = output.slice(summaryStart);
if (!CODE_SESSION_FILE_SUMMARY_PATTERN.test(maybeSummary)) return output;
return beforeSummary.trimEnd();
}

export function appendCodeSessionFileSummary(
output: string,
files: t.FileRefs | undefined
): string {
if (files == null || files.length === 0) {
return output.trim();
}

const generatedFiles = files.filter(isGeneratedFile);
if (generatedFiles.length === 0) {
return output.trim();
}

const imageCount = generatedFiles.filter(isImageFile).length;
const summary = buildCodeSessionFileSummary(
generatedFiles.length,
imageCount
);

return `${output.trimEnd()}\n\n${summary}`.trim();
}
22 changes: 13 additions & 9 deletions src/tools/ProgrammaticToolCalling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import type { ToolCall } from '@langchain/core/messages/tool';
import type { ProgrammaticToolCallingJsonSchema } from './ptcTimeout';
import type * as t from '@/types';
import {
CODE_ARTIFACT_PATH_GUIDANCE,
appendCodeSessionFileSummary,
buildCodeApiHttpErrorMessage,
emptyOutputMessage,
getCodeBaseURL,
Expand Down Expand Up @@ -36,15 +38,17 @@ You MUST complete your entire workflow in ONE code block: query → process →
DO NOT split work across multiple calls expecting to reuse variables.`;

const CORE_RULES = `Rules:
- EVERYTHING in one call—no state persists between executions
- Just write code with await—auto-wrapped in async context
- DO NOT define async def main() or call asyncio.run()
- One call: state does not persist
- Auto-wrapped async; use await, no main()/asyncio.run()
- Tools are pre-defined—DO NOT write function definitions
- Call tools with keyword args only (await tool(arg=value), never pass a dict)
- Tool results are decoded Python values (dict/list/str)
- Only print() output returns to the model
- ${CODE_ARTIFACT_PATH_GUIDANCE}
- timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;

const ADDITIONAL_RULES = `- Generated files are automatically available in /mnt/data/ for subsequent executions
- Tool names normalized: hyphens→underscores, keywords get \`_tool\` suffix`;
const ADDITIONAL_RULES =
'- Tool names normalized: hyphens→underscores, keywords get `_tool` suffix';

const EXAMPLES = `Example (Complete workflow in one call):
# Query data
Expand Down Expand Up @@ -678,9 +682,9 @@ export async function executeTools(
/**
* Formats the completed response for the agent.
*
* Output is stdout/stderr only — see `CodeExecutor.ts`. The
* artifact still carries every file so the host's session map
* stays in sync; the LLM doesn't see them in the tool result text.
* Output includes stdout/stderr plus a compact session-file summary
* when artifacts were persisted. The artifact still carries every
* file so the host's session map stays in sync.
*
* @param response - The completed API response
* @returns Tuple of [formatted string, artifact]
Expand All @@ -701,7 +705,7 @@ export function formatCompletedResponse(
}

return [
formatted.trim(),
appendCodeSessionFileSummary(formatted, response.files),
{
session_id: response.session_id,
files: response.files,
Expand Down
13 changes: 8 additions & 5 deletions src/tools/ToolNode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import {
buildReferenceKey,
ToolOutputReferenceRegistry,
} from '@/tools/toolOutputReferences';
import { stripCodeSessionFileSummary } from '@/tools/CodeSessionFileSummary';
import {
resolveLocalToolRegistry,
resolveLocalExecutionTools,
Expand Down Expand Up @@ -911,8 +912,9 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
* Both session_id and _injected_files are injected directly to invokeParams
* (not inside args) so they bypass Zod schema validation and reach config.toolCall.
*
* session_id is always injected when available (even without tracked files)
* so the CodeExecutor can fall back to the /files endpoint for session continuity.
* session_id is always injected when available, but concrete file refs
* still need to travel through `_injected_files`; the legacy
* `/files/<session_id>` fallback was removed from the executors.
*/
if (CODE_EXECUTION_TOOLS.has(call.name)) {
const codeSession = this.sessions?.get(Constants.EXECUTE_CODE) as
Expand Down Expand Up @@ -959,14 +961,15 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
if (this.toolOutputRegistry != null || unresolvedRefs.length > 0) {
if (typeof toolMsg.content === 'string') {
const rawContent = toolMsg.content;
const registryContent = stripCodeSessionFileSummary(rawContent);
const llmContent = truncateToolResultContent(
rawContent,
this.maxToolResultChars
);
toolMsg.content = llmContent;
const refMeta = this.recordOutputReference(
runId,
rawContent,
registryContent,
refKey,
unresolvedRefs
);
Expand Down Expand Up @@ -1015,7 +1018,7 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
);
const refMeta = this.recordOutputReference(
runId,
rawContent,
stripCodeSessionFileSummary(rawContent),
refKey,
unresolvedRefs
);
Expand Down Expand Up @@ -2661,7 +2664,7 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
: undefined;
const successRefMeta = this.recordOutputReference(
registryRunId,
registryRaw,
stripCodeSessionFileSummary(registryRaw),
refKey,
unresolved
);
Expand Down
5 changes: 5 additions & 0 deletions src/tools/__tests__/BashExecutor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ describe('buildBashExecutionToolDescription', () => {
).toBe(BashExecutionToolDescription);
});

it('warns about compact bash shell pitfalls', () => {
expect(BashExecutionToolDescription).toContain('heredoc/printf');
expect(BashExecutionToolDescription).toContain('not bare Python');
});

it('appends the tool-output references guide when enabled', () => {
const composed = buildBashExecutionToolDescription({
enableToolOutputReferences: true,
Expand Down
11 changes: 11 additions & 0 deletions src/tools/__tests__/CodeApiAuthHeaders.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,17 @@ describe('CodeAPI auth header injection', () => {
).not.toHaveProperty('authHeaders');
});

it('tolerates null params for direct code execution', async () => {
fetchMock.mockResolvedValueOnce(
jsonResponse({ session_id: 'session_123', stdout: '1\n' })
);
const tool = createCodeExecutionTool(null);

await expect(
tool.invoke({ lang: 'py', code: 'print(1)' })
).resolves.toBeDefined();
});

it('forwards Authorization for bash execution', async () => {
fetchMock.mockResolvedValueOnce(
jsonResponse({ session_id: 'session_123', stdout: '1\n' })
Expand Down
Loading
Loading