danny-avila · danny-avila · May 21, 2026 · May 21, 2026
diff --git a/src/tools/BashExecutor.ts b/src/tools/BashExecutor.ts
@@ -4,6 +4,9 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
 import { tool, DynamicStructuredTool } from '@langchain/core/tools';
 import type * as t from '@/types';
 import {
+  BASH_SHELL_GUIDANCE,
+  CODE_ARTIFACT_PATH_GUIDANCE,
+  appendCodeSessionFileSummary,
   emptyOutputMessage,
   buildCodeApiHttpErrorMessage,
   getCodeBaseURL,
@@ -23,8 +26,9 @@ export const BashExecutionToolSchema = {
       type: 'string',
       description: `The bash command or script to execute.
 - The environment is stateless; variables and state don't persist between executions.
-- Generated files from previous executions are automatically available in "/mnt/data/".
-- Files from previous executions are automatically available and can be modified in place.
+- Prior /mnt/data files are available and can be modified in place.
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
+- ${BASH_SHELL_GUIDANCE}
 - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
 - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
 - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -46,6 +50,8 @@ Runs bash commands and returns stdout/stderr output from a stateless execution e
 Usage:
 - No network access available.
 - Generated files are automatically delivered; **DO NOT** provide download links.
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
+- ${BASH_SHELL_GUIDANCE}
 - NEVER use this tool to execute malicious commands.
 `.trim();
 
@@ -166,11 +172,6 @@ function createBashExecutionTool(
         }
 
         const result: t.ExecuteResult = await response.json();
-        /* See `CodeExecutor.ts` — file listings were removed from the
-         * LLM-facing tool result. Bash especially benefits: models
-         * naturally `ls /mnt/data/` to discover what's available
-         * rather than relying on a prescriptive summary that
-         * misleads as often as it helps. */
         let formattedOutput = '';
         if (result.stdout) {
           formattedOutput += `stdout:\n${result.stdout}\n`;
@@ -181,7 +182,7 @@ function createBashExecutionTool(
 
         const hasFiles = result.files != null && result.files.length > 0;
         return [
-          formattedOutput.trim(),
+          appendCodeSessionFileSummary(formattedOutput, result.files),
           (hasFiles
             ? { session_id: result.session_id, files: result.files }
             : {

diff --git a/src/tools/BashProgrammaticToolCalling.ts b/src/tools/BashProgrammaticToolCalling.ts
@@ -8,7 +8,11 @@ import {
   executeTools,
   formatCompletedResponse,
 } from './ProgrammaticToolCalling';
-import { getCodeBaseURL } from './CodeExecutor';
+import {
+  BASH_SHELL_GUIDANCE,
+  CODE_ARTIFACT_PATH_GUIDANCE,
+  getCodeBaseURL,
+} from './CodeExecutor';
 import {
   clampCodeApiRunTimeoutMs,
   createCodeApiRunTimeoutSchema,
@@ -62,11 +66,13 @@ You MUST complete your entire workflow in ONE code block.
 DO NOT split work across multiple calls expecting to reuse variables.`;
 
 const CORE_RULES = `Rules:
-- EVERYTHING in one call—no state persists between executions
+- One call: state does not persist
 - Tools are pre-defined as bash functions—DO NOT redefine them
 - Each tool function accepts a JSON string argument
+- Tool stdout is JSON, not raw text; use jq -r . for strings, jq -r .field for objects
 - Only echo/printf output returns to the model
-- Generated files are automatically available in /mnt/data/ for subsequent executions
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
+- ${BASH_SHELL_GUIDANCE}
 - timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
 
 const ADDITIONAL_RULES =

diff --git a/src/tools/CodeExecutor.ts b/src/tools/CodeExecutor.ts
@@ -4,8 +4,14 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
 import { tool, DynamicStructuredTool } from '@langchain/core/tools';
 import { getEnvironmentVariable } from '@langchain/core/utils/env';
 import type * as t from '@/types';
+import { appendCodeSessionFileSummary } from '@/tools/CodeSessionFileSummary';
 import { EnvVar, Constants } from '@/common';
 
+export {
+  appendCodeSessionFileSummary,
+  stripCodeSessionFileSummary,
+} from '@/tools/CodeSessionFileSummary';
+
 config();
 
 export const getCodeBaseURL = (): string =>
@@ -15,6 +21,12 @@ export const getCodeBaseURL = (): string =>
 export const emptyOutputMessage =
   'stdout: Empty. Ensure you\'re writing output explicitly.\n';
 
+export const CODE_ARTIFACT_PATH_GUIDANCE =
+  'Persist artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); `/tmp` and odd extensions are same-call scratch.';
+
+export const BASH_SHELL_GUIDANCE =
+  'Bash: multi-line files use heredoc/printf; run Python via python3 -c/heredoc, not bare Python.';
+
 const SUPPORTED_LANGUAGES = [
   'py',
   'js',
@@ -44,8 +56,8 @@ export const CodeExecutionToolSchema = {
       type: 'string',
       description: `The complete, self-contained code to execute, without any truncation or minimization.
 - The environment is stateless; variables and imports don't persist between executions.
-- Generated files from previous executions are automatically available in "/mnt/data/".
-- Files from previous executions are automatically available and can be modified in place.
+- Prior /mnt/data files are available and can be modified in place.
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
 - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
 - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
 - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -104,6 +116,7 @@ Runs code and returns stdout/stderr output from a stateless execution environmen
 Usage:
 - No network access available.
 - Generated files are automatically delivered; **DO NOT** provide download links.
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
 - NEVER use this tool to execute malicious code.
 `.trim();
 
@@ -116,7 +129,7 @@ export const CodeExecutionToolDefinition = {
 } as const;
 
 function createCodeExecutionTool(
-  params: t.CodeExecutionToolParams = {}
+  params: t.CodeExecutionToolParams | null = {}
 ): DynamicStructuredTool {
   return tool(
     async (rawInput, config) => {
@@ -187,13 +200,6 @@ function createCodeExecutionTool(
         }
 
         const result: t.ExecuteResult = await response.json();
-        /* Output is stdout/stderr only — file listings were removed
-         * because the LLM-facing summary (split inherited/generated
-         * with prescriptive notes) caused more confusion than help,
-         * especially for bash where models naturally explore
-         * `/mnt/data/` themselves. The artifact still carries every
-         * file so the host's session map stays in sync; the LLM
-         * doesn't see them in the tool result text. */
         let formattedOutput = '';
         if (result.stdout) {
           formattedOutput += `stdout:\n${result.stdout}\n`;
@@ -204,7 +210,7 @@ function createCodeExecutionTool(
 
         const hasFiles = result.files != null && result.files.length > 0;
         return [
-          formattedOutput.trim(),
+          appendCodeSessionFileSummary(formattedOutput, result.files),
           (hasFiles
             ? { session_id: result.session_id, files: result.files }
             : {

diff --git a/src/tools/CodeSessionFileSummary.ts b/src/tools/CodeSessionFileSummary.ts
@@ -0,0 +1,80 @@
+import type * as t from '@/types';
+
+const IMAGE_FILE_EXTENSIONS = new Set([
+  '.avif',
+  '.bmp',
+  '.gif',
+  '.ico',
+  '.jpeg',
+  '.jpg',
+  '.png',
+  '.tif',
+  '.tiff',
+  '.webp',
+]);
+
+const CODE_SESSION_FILE_SUMMARY_PATTERN =
+  /^Generated files:\nSession files: \d+ persisted file\(s\) are available in \/mnt\/data, including \d+ image\(s\)\. Use known \/mnt\/data paths directly in later code-tool calls\. The app displays files\/images automatically; do not invent download links or wrap generated images in Markdown\.$/;
+
+function getFileExtension(name: string): string {
+  const lastSlash = name.lastIndexOf('/');
+  const basename = lastSlash >= 0 ? name.slice(lastSlash + 1) : name;
+  const lastDot = basename.lastIndexOf('.');
+  return lastDot >= 0 ? basename.slice(lastDot).toLowerCase() : '';
+}
+
+function isImageFile(file: Partial<t.FileRef> | null | undefined): boolean {
+  const name = file?.name;
+  return (
+    typeof name === 'string' &&
+    IMAGE_FILE_EXTENSIONS.has(getFileExtension(name))
+  );
+}
+
+function buildCodeSessionFileSummary(
+  fileCount: number,
+  imageCount: number
+): string {
+  return (
+    'Generated files:\n' +
+    `Session files: ${fileCount} persisted file(s) are available in /mnt/data, including ${imageCount} image(s). ` +
+    'Use known /mnt/data paths directly in later code-tool calls. ' +
+    'The app displays files/images automatically; do not invent download links or wrap generated images in Markdown.'
+  );
+}
+
+function isGeneratedFile(file: Partial<t.FileRef> | null | undefined): boolean {
+  return file?.inherited !== true;
+}
+
+export function stripCodeSessionFileSummary(output: string): string {
+  const summaryStart = output.lastIndexOf('Generated files:');
+  if (summaryStart < 0) return output;
+  const beforeSummary = output.slice(0, summaryStart);
+  if (beforeSummary !== '' && !beforeSummary.endsWith('\n\n')) return output;
+  const maybeSummary = output.slice(summaryStart);
+  if (!CODE_SESSION_FILE_SUMMARY_PATTERN.test(maybeSummary)) return output;
+  return beforeSummary.trimEnd();
+}
+
+export function appendCodeSessionFileSummary(
+  output: string,
+  files: t.FileRefs | undefined
+): string {
+  if (files == null || files.length === 0) {
+    return output.trim();
+  }
+
+  const generatedFiles = files.filter(isGeneratedFile);
+  if (generatedFiles.length === 0) {
+    return output.trim();
+  }
+
+  const imageCount = generatedFiles.filter(isImageFile).length;
+  const summary = buildCodeSessionFileSummary(
+    generatedFiles.length,
+    imageCount
+  );
+
+  return `${output.trimEnd()}\n\n${summary}`.trim();
+}
diff --git a/src/tools/ProgrammaticToolCalling.ts b/src/tools/ProgrammaticToolCalling.ts
@@ -7,6 +7,8 @@ import type { ToolCall } from '@langchain/core/messages/tool';
 import type { ProgrammaticToolCallingJsonSchema } from './ptcTimeout';
 import type * as t from '@/types';
 import {
+  CODE_ARTIFACT_PATH_GUIDANCE,
+  appendCodeSessionFileSummary,
   buildCodeApiHttpErrorMessage,
   emptyOutputMessage,
   getCodeBaseURL,
@@ -36,15 +38,17 @@ You MUST complete your entire workflow in ONE code block: query → process →
 DO NOT split work across multiple calls expecting to reuse variables.`;
 
 const CORE_RULES = `Rules:
-- EVERYTHING in one call—no state persists between executions
-- Just write code with await—auto-wrapped in async context
-- DO NOT define async def main() or call asyncio.run()
+- One call: state does not persist
+- Auto-wrapped async; use await, no main()/asyncio.run()
 - Tools are pre-defined—DO NOT write function definitions
+- Call tools with keyword args only (await tool(arg=value), never pass a dict)
+- Tool results are decoded Python values (dict/list/str)
 - Only print() output returns to the model
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
 - timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
 
-const ADDITIONAL_RULES = `- Generated files are automatically available in /mnt/data/ for subsequent executions
-- Tool names normalized: hyphens→underscores, keywords get \`_tool\` suffix`;
+const ADDITIONAL_RULES =
+  '- Tool names normalized: hyphens→underscores, keywords get `_tool` suffix';
 
 const EXAMPLES = `Example (Complete workflow in one call):
   # Query data
@@ -678,9 +682,9 @@ export async function executeTools(
 /**
  * Formats the completed response for the agent.
  *
- * Output is stdout/stderr only — see `CodeExecutor.ts`. The
- * artifact still carries every file so the host's session map
- * stays in sync; the LLM doesn't see them in the tool result text.
+ * Output includes stdout/stderr plus a compact session-file summary
+ * when artifacts were persisted. The artifact still carries every
+ * file so the host's session map stays in sync.
  *
  * @param response - The completed API response
  * @returns Tuple of [formatted string, artifact]
@@ -701,7 +705,7 @@ export function formatCompletedResponse(
   }
 
   return [
-    formatted.trim(),
+    appendCodeSessionFileSummary(formatted, response.files),
     {
       session_id: response.session_id,
       files: response.files,

diff --git a/src/tools/ToolNode.ts b/src/tools/ToolNode.ts
@@ -46,6 +46,7 @@ import {
   buildReferenceKey,
   ToolOutputReferenceRegistry,
 } from '@/tools/toolOutputReferences';
+import { stripCodeSessionFileSummary } from '@/tools/CodeSessionFileSummary';
 import {
   resolveLocalToolRegistry,
   resolveLocalExecutionTools,
@@ -911,8 +912,9 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
        * Both session_id and _injected_files are injected directly to invokeParams
        * (not inside args) so they bypass Zod schema validation and reach config.toolCall.
        *
-       * session_id is always injected when available (even without tracked files)
-       * so the CodeExecutor can fall back to the /files endpoint for session continuity.
+       * session_id is always injected when available, but concrete file refs
+       * still need to travel through `_injected_files`; the legacy
+       * `/files/<session_id>` fallback was removed from the executors.
        */
       if (CODE_EXECUTION_TOOLS.has(call.name)) {
         const codeSession = this.sessions?.get(Constants.EXECUTE_CODE) as
@@ -959,14 +961,15 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
         if (this.toolOutputRegistry != null || unresolvedRefs.length > 0) {
           if (typeof toolMsg.content === 'string') {
             const rawContent = toolMsg.content;
+            const registryContent = stripCodeSessionFileSummary(rawContent);
             const llmContent = truncateToolResultContent(
               rawContent,
               this.maxToolResultChars
             );
             toolMsg.content = llmContent;
             const refMeta = this.recordOutputReference(
               runId,
-              rawContent,
+              registryContent,
               refKey,
               unresolvedRefs
             );
@@ -1015,7 +1018,7 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
       );
       const refMeta = this.recordOutputReference(
         runId,
-        rawContent,
+        stripCodeSessionFileSummary(rawContent),
         refKey,
         unresolvedRefs
       );
@@ -2661,7 +2664,7 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
               : undefined;
           const successRefMeta = this.recordOutputReference(
             registryRunId,
-            registryRaw,
+            stripCodeSessionFileSummary(registryRaw),
             refKey,
             unresolved
           );

diff --git a/src/tools/__tests__/BashExecutor.test.ts b/src/tools/__tests__/BashExecutor.test.ts
@@ -18,6 +18,11 @@ describe('buildBashExecutionToolDescription', () => {
     ).toBe(BashExecutionToolDescription);
   });
 
+  it('warns about compact bash shell pitfalls', () => {
+    expect(BashExecutionToolDescription).toContain('heredoc/printf');
+    expect(BashExecutionToolDescription).toContain('not bare Python');
+  });
+
   it('appends the tool-output references guide when enabled', () => {
     const composed = buildBashExecutionToolDescription({
       enableToolOutputReferences: true,

diff --git a/src/tools/__tests__/CodeApiAuthHeaders.test.ts b/src/tools/__tests__/CodeApiAuthHeaders.test.ts
@@ -165,6 +165,17 @@ describe('CodeAPI auth header injection', () => {
     ).not.toHaveProperty('authHeaders');
   });
 
+  it('tolerates null params for direct code execution', async () => {
+    fetchMock.mockResolvedValueOnce(
+      jsonResponse({ session_id: 'session_123', stdout: '1\n' })
+    );
+    const tool = createCodeExecutionTool(null);
+
+    await expect(
+      tool.invoke({ lang: 'py', code: 'print(1)' })
+    ).resolves.toBeDefined();
+  });
+
   it('forwards Authorization for bash execution', async () => {
     fetchMock.mockResolvedValueOnce(
       jsonResponse({ session_id: 'session_123', stdout: '1\n' })