Skip to content

Commit ecc838b

Browse files
fix: enable parallel tool calls by default (#2111)
1 parent 4609cff commit ecc838b

6 files changed

Lines changed: 50 additions & 7 deletions

File tree

benchmarks/README.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,22 @@ A flexible evaluation framework for running automated tests and benchmarks again
44

55
## Quick Start
66

7+
### Setup
8+
9+
Before running evaluations, create a `forgee` symlink to the debug binary:
10+
11+
```bash
12+
# Create symlink in your PATH (e.g., ~/bin or /usr/local/bin)
13+
ln -sf /path/to/code-forge/target/debug/forge ~/forgee
14+
15+
# Or if ~/bin is in your PATH
16+
ln -sf $(pwd)/target/debug/forge ~/bin/forgee
17+
```
18+
19+
**Why is this needed?** Tasks execute in temporary directories, so relative paths like `../../target/debug/forge` won't work. The `forgee` symlink provides a stable absolute path that works from any directory.
20+
21+
### Running Evaluations
22+
723
```bash
824
# Run an evaluation
925
npm run eval ./evals/create_skill/task.yml
@@ -58,12 +74,12 @@ before_run:
5874

5975
# Required: Command(s) to execute for each test case
6076
# Single command
61-
run: ../../target/debug/forge -p '{{prompt}}'
77+
run: forgee -p '{{prompt}}'
6278

6379
# Or multiple commands (executed sequentially)
6480
run:
6581
- echo "Step 1: {{task}}"
66-
- ../../target/debug/forge -p '{{prompt}}'
82+
- forgee -p '{{prompt}}'
6783
- echo "Step 2: Complete"
6884

6985
# Execution configuration
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
before_run:
2-
- cargo build
3-
run: FORGE_DEBUG_REQUESTS='{{tmp_dir}}/context.json' ../../target/debug/forge -p '{{skill}}'
1+
# No before_run needed - binary should be pre-built
2+
run: FORGE_DEBUG_REQUESTS='{{dir}}/context.json' forgee -p '{{skill}}'
43
parallelism: 10
54
timeout: 60
65
early_exit: true
76
validations:
87
- name: "Uses create-skill tool"
98
type: shell
10-
command: "jq -e '[.messages[]?.tool_calls[]? | select(.function.name == \"skill\") | .function.arguments | fromjson | .name == \"create-skill\"] | any' {{tmp_dir}}/context.json"
9+
command: "jq -e '[.messages[]?.tool_calls[]? | select(.function.name == \"skill\") | .function.arguments | fromjson | .name == \"create-skill\"] | any' {{dir}}/context.json"
1110
sources:
1211
- csv: create_skill_tasks.csv
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
debug/
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
task
2+
"Read the contents of AGENTS.md, README.md, and Cargo.toml files"
3+
"Show me AGENTS.md, forge.schema.json, and rust-toolchain.toml files"
4+
"I need to see Cargo.toml, LICENSE, and README.md files"
5+
"Display the contents of forge.schema.json, AGENTS.md, and Cargo.toml simultaneously"
6+
"Read LICENSE, README.md, and rust-toolchain.toml in parallel"
7+
"Show me the files: AGENTS.md, Cargo.toml, forge.schema.json, and LICENSE"
8+
"I want to see README.md, rust-toolchain.toml, and forge.schema.json at once"
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# No before_run needed - binary should be pre-built
2+
run:
3+
- # Clone into `tmp/task` dir
4+
- git clone --depth=1 --branch main https://github.com/antinomyhq/forge .
5+
- FORGE_OVERRIDE_PROVIDER=open_router FORGE_OVERRIDE_MODEL={{model}} FORGE_DEBUG_REQUESTS='{{dir}}/context.json' forgee -p '{{task}}'
6+
parallelism: 5
7+
timeout: 120
8+
early_exit: true
9+
validations:
10+
- name: "Uses parallel tool calls"
11+
type: shell
12+
command: |
13+
# Check if there's at least one message with 2 or more tool calls executed in parallel
14+
jq -e '[.messages[]?.tool_calls? | select(. != null and length >= 2)] | length > 0' {{dir}}/context.json
15+
sources:
16+
- value:
17+
- model: "openai/gpt-5.2"
18+
- csv: parallel_tool_calls_tasks.csv

crates/forge_app/src/dto/openai/request.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,8 @@ impl From<Context> for Request {
365365
models: Default::default(),
366366
route: Default::default(),
367367
provider: Default::default(),
368-
parallel_tool_calls: Some(false),
368+
parallel_tool_calls: Some(true), /* Default to true, transformers will adjust based
369+
* on model capabilities */
369370
stream_options: Some(StreamOptions { include_usage: Some(true) }),
370371
session_id: context.conversation_id.map(|id| id.to_string()),
371372
reasoning: context.reasoning,

0 commit comments

Comments
 (0)