Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/tools/BashExecutor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import type * as t from '@/types';
import {
BASH_SHELL_GUIDANCE,
CODE_ARTIFACT_PATH_GUIDANCE,
appendFailedExecutionFileReminder,
appendTmpScratchReminder,
appendCodeSessionFileSummary,
emptyOutputMessage,
Expand Down Expand Up @@ -195,8 +196,12 @@ function createBashExecutionTool(
}) satisfies t.CodeExecutionArtifact,
];
} catch (error) {
const messageWithReminder = appendFailedExecutionFileReminder(
(error as Error | undefined)?.message ?? '',
command
);
throw new Error(
`Execution error:\n\n${(error as Error | undefined)?.message}`
`Execution error:\n\n${messageWithReminder}`
);
}
},
Expand Down
14 changes: 10 additions & 4 deletions src/tools/BashProgrammaticToolCalling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
import {
BASH_SHELL_GUIDANCE,
CODE_ARTIFACT_PATH_GUIDANCE,
appendFailedExecutionFileReminder,
getCodeBaseURL,
} from './CodeExecutor';
import {
Expand Down Expand Up @@ -69,7 +70,8 @@ const CORE_RULES = `Rules:
- One call: state does not persist
- Tools are pre-defined as bash functions—DO NOT redefine them
- Each tool function accepts a JSON string argument
- When parsing saved tool stdout with jq, use jq -r 'fromjson? // . | ...' so object and stringified-JSON results both work
- Save tool output with raw=$(tool '{}'); printf '%s\n' "$raw" > /mnt/data/file.json; direct tool > file may be empty
- jq: use fromjson? // . on saved tool stdout and again on JSON-string fields; check types since arrays may contain strings
- Only echo/printf output returns to the model
- ${CODE_ARTIFACT_PATH_GUIDANCE}
- ${BASH_SHELL_GUIDANCE}
Expand All @@ -84,8 +86,8 @@ const EXAMPLES = `Example (Complete workflow in one call):
echo "$data" | jq '.[] | .name'

Example (Parallel calls):
web_search '{"query": "SF weather"}' > /mnt/data/sf.json &
web_search '{"query": "NY weather"}' > /mnt/data/ny.json &
{ sf=$(web_search '{"query": "SF weather"}'); printf '%s\n' "$sf" > /mnt/data/sf.json; } &
{ ny=$(web_search '{"query": "NY weather"}'); printf '%s\n' "$ny" > /mnt/data/ny.json; } &
wait
echo "SF: $(jq -r . /mnt/data/sf.json)"
echo "NY: $(jq -r . /mnt/data/ny.json)"`;
Expand Down Expand Up @@ -389,8 +391,12 @@ export function createBashProgrammaticToolCallingTool(

throw new Error(`Unexpected response status: ${response.status}`);
} catch (error) {
const messageWithReminder = appendFailedExecutionFileReminder(
(error as Error).message,
code
);
throw new Error(
`Bash programmatic execution failed: ${(error as Error).message}`
`Bash programmatic execution failed: ${messageWithReminder}`
);
}
},
Expand Down
25 changes: 23 additions & 2 deletions src/tools/CodeExecutor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,40 @@ export const emptyOutputMessage =
'stdout: Empty. Ensure you\'re writing output explicitly.\n';

export const CODE_ARTIFACT_PATH_GUIDANCE =
'Persist handoff artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); `/tmp` and odd extensions are same-call scratch only, not later-call storage.';
'Persist handoff artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); failed executions do not register new files; `/tmp` and odd extensions are same-call scratch only, not later-call storage.';

export const BASH_SHELL_GUIDANCE =
'Bash: multi-line files use heredoc/printf; run Python via python3 -c/heredoc, not bare Python.';

const TMP_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/tmp(?:\/|\b)/;
const MNT_DATA_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/mnt\/data(?:\/|\b)/;

export const TMP_SCRATCH_OUTPUT_REMINDER =
'Note: /tmp files are same-call scratch only and were not persisted; use /mnt/data for files needed later.';

export const FAILED_EXECUTION_FILE_REMINDER =
'Note: any files written during this failed call were not registered for later calls; fix the error and rerun before relying on them.';

export function appendTmpScratchReminder(output: string, code: string): string {
if (!TMP_PATH_PATTERN.test(code)) {
return output;
}
return `${output.trimEnd()}\n${TMP_SCRATCH_OUTPUT_REMINDER}\n`;
}

export function appendFailedExecutionFileReminder(
output: string,
code: string
): string {
if (
!MNT_DATA_PATH_PATTERN.test(code) ||
output.includes(FAILED_EXECUTION_FILE_REMINDER)
) {
return output;
}
return `${output.trimEnd()}\n${FAILED_EXECUTION_FILE_REMINDER}\n`;
}

const SUPPORTED_LANGUAGES = [
'py',
'js',
Expand Down Expand Up @@ -234,8 +251,12 @@ function createCodeExecutionTool(
}) satisfies t.CodeExecutionArtifact,
];
} catch (error) {
const messageWithReminder = appendFailedExecutionFileReminder(
(error as Error | undefined)?.message ?? '',
code
);
throw new Error(
`Execution error:\n\n${(error as Error | undefined)?.message}`
`Execution error:\n\n${messageWithReminder}`
);
}
},
Expand Down
7 changes: 6 additions & 1 deletion src/tools/ProgrammaticToolCalling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import type * as t from '@/types';
import {
CODE_ARTIFACT_PATH_GUIDANCE,
appendCodeSessionFileSummary,
appendFailedExecutionFileReminder,
buildCodeApiHttpErrorMessage,
emptyOutputMessage,
getCodeBaseURL,
Expand Down Expand Up @@ -881,8 +882,12 @@ export function createProgrammaticToolCallingTool(

throw new Error(`Unexpected response status: ${response.status}`);
} catch (error) {
const messageWithReminder = appendFailedExecutionFileReminder(
(error as Error).message,
code
);
throw new Error(
`Programmatic execution failed: ${(error as Error).message}`
`Programmatic execution failed: ${messageWithReminder}`
);
}
},
Expand Down
3 changes: 3 additions & 0 deletions src/tools/__tests__/BashExecutor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ describe('buildBashExecutionToolDescription', () => {
it('warns about compact bash shell pitfalls', () => {
expect(BashExecutionToolDescription).toContain('heredoc/printf');
expect(BashExecutionToolDescription).toContain('not bare Python');
expect(BashExecutionToolDescription).toContain(
'failed executions do not register new files'
);
expect(BashExecutionToolDescription).toContain('not later-call storage');
});

Expand Down
32 changes: 32 additions & 0 deletions src/tools/__tests__/CodeApiAuthHeaders.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,38 @@ describe('CodeAPI auth header injection', () => {
);
});

it('reminds that failed bash programmatic executions do not register new files', async () => {
fetchMock.mockResolvedValueOnce(
jsonResponse({
status: 'error',
error: 'jq failed',
stderr: 'jq: Cannot index string with string "name"',
})
);
const tool = createBashProgrammaticToolCallingTool();

await expect(
tool.invoke(
{
code: [
'lookup_user "{}" > /mnt/data/user.json',
'jq -r \'.result.name\' /mnt/data/user.json',
].join('\n'),
},
{
toolCall: {
name: 'bash_programmatic_code_execution',
args: {},
toolMap: toolMap(),
toolDefs,
},
}
)
).rejects.toThrow(
'files written during this failed call were not registered for later calls'
);
});

it('fetches session files with the CodeAPI resource scope and auth headers', async () => {
fetchMock.mockResolvedValueOnce(
jsonResponse([
Expand Down
11 changes: 8 additions & 3 deletions src/tools/__tests__/ProgrammaticToolCalling.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,15 @@ describe('ProgrammaticToolCalling', () => {
const schema = createBashProgrammaticToolCallingSchema();
const description = schema.properties.code.description;

expect(description).toContain('parsing saved tool stdout with jq');
expect(description).toContain('jq -r \'fromjson? // . | ...\'');
expect(description).toContain('stringified-JSON results');
expect(description).toContain('jq: use fromjson? // .');
expect(description).toContain('again on JSON-string fields');
expect(description).toContain('arrays may contain strings');
expect(description).toContain('raw=$(tool');
expect(description).toContain('direct tool > file may be empty');
expect(description).toContain('/mnt/data/sf.json');
expect(description).toContain(
'failed executions do not register new files'
);
expect(description).toContain('not later-call storage');
});
});
Expand Down
Loading