Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/tools/BashExecutor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import type * as t from '@/types';
import {
BASH_SHELL_GUIDANCE,
CODE_ARTIFACT_PATH_GUIDANCE,
appendTmpScratchReminder,
appendCodeSessionFileSummary,
emptyOutputMessage,
buildCodeApiHttpErrorMessage,
Expand Down Expand Up @@ -111,7 +112,7 @@ export const BashExecutionToolDefinition = {
} as const;

function createBashExecutionTool(
params: t.BashExecutionToolParams = {}
params: t.BashExecutionToolParams | null = {}
): DynamicStructuredTool {
return tool(
async (rawInput, config) => {
Expand Down Expand Up @@ -180,9 +181,13 @@ function createBashExecutionTool(
}
if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;

const outputWithReminder = appendTmpScratchReminder(
formattedOutput,
command
);
const hasFiles = result.files != null && result.files.length > 0;
return [
appendCodeSessionFileSummary(formattedOutput, result.files),
appendCodeSessionFileSummary(outputWithReminder, result.files),
(hasFiles
? { session_id: result.session_id, files: result.files }
: {
Expand Down
12 changes: 6 additions & 6 deletions src/tools/BashProgrammaticToolCalling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ const CORE_RULES = `Rules:
- One call: state does not persist
- Tools are pre-defined as bash functions—DO NOT redefine them
- Each tool function accepts a JSON string argument
- Tool stdout is JSON, not raw text; use jq -r . for strings, jq -r .field for objects
- When parsing saved tool stdout with jq, use jq -r 'fromjson? // . | ...' so object and stringified-JSON results both work
- Only echo/printf output returns to the model
- ${CODE_ARTIFACT_PATH_GUIDANCE}
- ${BASH_SHELL_GUIDANCE}
Expand All @@ -84,11 +84,11 @@ const EXAMPLES = `Example (Complete workflow in one call):
echo "$data" | jq '.[] | .name'

Example (Parallel calls):
web_search '{"query": "SF weather"}' > /tmp/sf.txt &
web_search '{"query": "NY weather"}' > /tmp/ny.txt &
web_search '{"query": "SF weather"}' > /mnt/data/sf.json &
web_search '{"query": "NY weather"}' > /mnt/data/ny.json &
wait
echo "SF: $(cat /tmp/sf.txt)"
echo "NY: $(cat /tmp/ny.txt)"`;
echo "SF: $(jq -r . /mnt/data/sf.json)"
echo "NY: $(jq -r . /mnt/data/ny.json)"`;

const CODE_PARAM_DESCRIPTION = `Bash code that calls tools programmatically. Tools are available as bash functions.

Expand Down Expand Up @@ -375,7 +375,7 @@ export function createBashProgrammaticToolCallingTool(
// ====================================================================

if (response.status === 'completed') {
return formatCompletedResponse(response);
return formatCompletedResponse(response, code);
}

if (response.status === 'error') {
Expand Down
20 changes: 18 additions & 2 deletions src/tools/CodeExecutor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,23 @@ export const emptyOutputMessage =
'stdout: Empty. Ensure you\'re writing output explicitly.\n';

export const CODE_ARTIFACT_PATH_GUIDANCE =
'Persist artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); `/tmp` and odd extensions are same-call scratch.';
'Persist handoff artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); `/tmp` and odd extensions are same-call scratch only, not later-call storage.';

export const BASH_SHELL_GUIDANCE =
'Bash: multi-line files use heredoc/printf; run Python via python3 -c/heredoc, not bare Python.';

const TMP_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/tmp(?:\/|\b)/;

export const TMP_SCRATCH_OUTPUT_REMINDER =
'Note: /tmp files are same-call scratch only and were not persisted; use /mnt/data for files needed later.';

export function appendTmpScratchReminder(output: string, code: string): string {
if (!TMP_PATH_PATTERN.test(code)) {
return output;
}
return `${output.trimEnd()}\n${TMP_SCRATCH_OUTPUT_REMINDER}\n`;
}

const SUPPORTED_LANGUAGES = [
'py',
'js',
Expand Down Expand Up @@ -208,9 +220,13 @@ function createCodeExecutionTool(
}
if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;

const outputWithReminder = appendTmpScratchReminder(
formattedOutput,
code
);
const hasFiles = result.files != null && result.files.length > 0;
return [
appendCodeSessionFileSummary(formattedOutput, result.files),
appendCodeSessionFileSummary(outputWithReminder, result.files),
(hasFiles
? { session_id: result.session_id, files: result.files }
: {
Expand Down
10 changes: 7 additions & 3 deletions src/tools/ProgrammaticToolCalling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
buildCodeApiHttpErrorMessage,
emptyOutputMessage,
getCodeBaseURL,
appendTmpScratchReminder,
resolveCodeApiAuthHeaders,
} from './CodeExecutor';
import {
Expand Down Expand Up @@ -690,7 +691,8 @@ export async function executeTools(
* @returns Tuple of [formatted string, artifact]
*/
export function formatCompletedResponse(
response: t.ProgrammaticExecutionResponse
response: t.ProgrammaticExecutionResponse,
sourceCode = ''
): [string, t.ProgrammaticExecutionArtifact] {
let formatted = '';

Expand All @@ -704,8 +706,10 @@ export function formatCompletedResponse(
formatted += `stderr:\n${response.stderr}\n`;
}

const outputWithReminder = appendTmpScratchReminder(formatted, sourceCode);

return [
appendCodeSessionFileSummary(formatted, response.files),
appendCodeSessionFileSummary(outputWithReminder, response.files),
{
session_id: response.session_id,
files: response.files,
Expand Down Expand Up @@ -863,7 +867,7 @@ export function createProgrammaticToolCallingTool(
// ====================================================================

if (response.status === 'completed') {
return formatCompletedResponse(response);
return formatCompletedResponse(response, code);
}

if (response.status === 'error') {
Expand Down
1 change: 1 addition & 0 deletions src/tools/__tests__/BashExecutor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ describe('buildBashExecutionToolDescription', () => {
it('warns about compact bash shell pitfalls', () => {
expect(BashExecutionToolDescription).toContain('heredoc/printf');
expect(BashExecutionToolDescription).toContain('not bare Python');
expect(BashExecutionToolDescription).toContain('not later-call storage');
});

it('appends the tool-output references guide when enabled', () => {
Expand Down
27 changes: 24 additions & 3 deletions src/tools/__tests__/ProgrammaticToolCalling.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ describe('ProgrammaticToolCalling', () => {
const schema = createBashProgrammaticToolCallingSchema();
const description = schema.properties.code.description;

expect(description).toContain('Tool stdout is JSON');
expect(description).toContain('not raw text');
expect(description).toContain('use jq -r . for strings');
expect(description).toContain('parsing saved tool stdout with jq');
expect(description).toContain('jq -r \'fromjson? // . | ...\'');
expect(description).toContain('stringified-JSON results');
expect(description).toContain('/mnt/data/sf.json');
expect(description).toContain('not later-call storage');
});
});

Expand Down Expand Up @@ -678,6 +680,25 @@ for member in team:
expect(output).toContain('stderr:\nWarning: deprecated function');
});

it('adds a /tmp scratch reminder when source code used /tmp', () => {
const response: t.ProgrammaticExecutionResponse = {
status: 'completed',
stdout: 'done\n',
stderr: '',
files: [],
session_id: 'sess_abc123',
};

const [output] = formatCompletedResponse(
response,
'tool "{}" > /tmp/result.json'
);

expect(output).toContain('stdout:\ndone');
expect(output).toContain('/tmp files are same-call scratch only');
expect(output).toContain('use /mnt/data for files needed later');
});

it('preserves files on the artifact and summarizes them without listing paths', () => {
const response: t.ProgrammaticExecutionResponse = {
status: 'completed',
Expand Down
Loading