-
Notifications
You must be signed in to change notification settings - Fork 394
Expand file tree
/
Copy paththinking_budget.yaml
More file actions
79 lines (66 loc) · 2.8 KB
/
Copy paththinking_budget.yaml
File metadata and controls
79 lines (66 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Run the demo command with:
# docker agent run thinking_budget.yaml -c demo
agents:
root:
model: gpt-5-mini-min # <- try with gpt-5-mini-high
# model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high or claude-opus-4-6-adaptive
# model: gemini-2-5-flash-dynamic-thinking # <- try with -no-thinking, -low or -high variants
description: a helpful assistant that thinks
instruction: you are a helpful assistant who can also use tools, but only if you need to
commands:
demo: "hey i need python code for a mandelbrot fractal"
toolsets:
- type: shell
models:
gpt-5-mini-min:
provider: openai
model: gpt-5-mini
thinking_budget: minimal # <- openai supports "minimal", "low", "medium", "high", "xhigh"
gpt-5-mini-high:
provider: openai
model: gpt-5-mini
thinking_budget: high
claude-4-5-sonnet-min:
provider: anthropic
model: claude-sonnet-4-5-20250929
thinking_budget: 1024 # <- explicit token budget (1024-32768) for older models
claude-4-5-sonnet-high:
provider: anthropic
model: claude-sonnet-4-5-20250929
thinking_budget: 32768 # <- explicit token budget (32768 is the Anthropic suggested maximum)
provider_opts:
interleaved_thinking: true # <- enables interleaved thinking, aka tool calling during model reasoning
claude-opus-4-6-adaptive:
provider: anthropic
model: claude-opus-4-6
thinking_budget: adaptive # <- lets the model decide when and how much to think (recommended for 4.6)
claude-opus-4-6-low:
provider: anthropic
model: claude-opus-4-6
thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "xhigh", "max" (same as adaptive/<effort>)
claude-opus-4-7-summarized:
provider: anthropic
model: claude-opus-4-6 # <- Opus 4.7 hides thinking by default; use the same flag with any recent Claude model
thinking_budget: adaptive
provider_opts:
thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7 defaults to omitted)
gemini-2-5-flash-dynamic-thinking:
provider: google
model: gemini-2.5-flash
thinking_budget: -1 # <- google only, dynamic thinking
gemini-2-5-flash-no-thinking:
provider: google
model: gemini-2.5-flash
thinking_budget: 0 # <- google only, no thinking
gemini-2-5-flash-low:
provider: google
model: gemini-2.5-flash
thinking_budget: 1024
gemini-2-5-flash-high:
provider: google
model: gemini-2.5-flash
thinking_budget: 24576 # <- google's maximum thinking budget for all models except Gemini 2.5 Pro (max 32768)
bedrock-opus-4-8-adaptive:
provider: amazon-bedrock
model: global.anthropic.claude-opus-4-8
thinking_budget: adaptive/high # <- Opus 4.6+ reject token budgets on Bedrock too; token values are auto-converted to adaptive