Skip to content

Commit 342276b

Browse files
authored
Merge pull request #2935 from rumpl/mcp-embedded-resource-media
feat(mcp): forward embedded resources to model providers
2 parents bbe7282 + 93b8b04 commit 342276b

20 files changed

Lines changed: 764 additions & 157 deletions

File tree

cmd/wasm/runtime_wasm.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -638,11 +638,15 @@ func (rt *wasmRuntime) processToolCalls(ctx context.Context, calls []tools.ToolC
638638
ToolError: err != nil,
639639
})
640640

641-
resultMessages = append(resultMessages, chat.Message{
641+
msg := chat.Message{
642642
Role: chat.MessageRoleTool,
643643
ToolCallID: tc.ID,
644644
Content: output,
645-
})
645+
}
646+
if toolResult != nil && (len(toolResult.Images) > 0 || len(toolResult.Documents) > 0) {
647+
msg.MultiContent = chat.BuildToolResultMultiContent(output, toolResult.Images, toolResult.Documents)
648+
}
649+
resultMessages = append(resultMessages, msg)
646650
}
647651

648652
return resultMessages, handoffAgent, nil

pkg/chat/tool_result.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package chat
2+
3+
import (
4+
"cmp"
5+
"encoding/base64"
6+
"log/slog"
7+
8+
"github.com/docker/docker-agent/pkg/tools"
9+
)
10+
11+
// BuildToolResultMultiContent attaches inline media and documents to a tool
12+
// response as a MultiContent payload that providers can convert to native
13+
// multimodal tool-result content where supported.
14+
func BuildToolResultMultiContent(text string, images []tools.MediaContent, documents []tools.DocumentContent) []MessagePart {
15+
parts := make([]MessagePart, 0, 1+len(documents)+len(images))
16+
parts = append(parts, MessagePart{Type: MessagePartTypeText, Text: text})
17+
for _, doc := range documents {
18+
part, ok := ToolDocumentPart(doc)
19+
if ok {
20+
parts = append(parts, part)
21+
}
22+
}
23+
for _, img := range images {
24+
parts = append(parts, MessagePart{
25+
Type: MessagePartTypeImageURL,
26+
ImageURL: &MessageImageURL{
27+
URL: "data:" + img.MimeType + ";base64," + img.Data,
28+
Detail: ImageURLDetailAuto,
29+
},
30+
})
31+
}
32+
return parts
33+
}
34+
35+
// ToolDocumentPart converts a tool-returned document payload into a chat
36+
// document part. It returns false when the payload is empty or malformed.
37+
func ToolDocumentPart(content tools.DocumentContent) (MessagePart, bool) {
38+
doc := Document{
39+
Name: cmp.Or(content.Name, "document"),
40+
MimeType: content.MimeType,
41+
}
42+
43+
switch {
44+
case content.Text != "":
45+
doc.Size = int64(len(content.Text))
46+
doc.Source = DocumentSource{InlineText: content.Text}
47+
if doc.MimeType == "" {
48+
doc.MimeType = "text/plain"
49+
}
50+
case content.Data != "":
51+
data, err := base64.StdEncoding.DecodeString(content.Data)
52+
if err != nil {
53+
slog.Warn("Dropping tool document with invalid base64 payload", "name", content.Name, "mime", content.MimeType, "error", err)
54+
return MessagePart{}, false
55+
}
56+
doc.Size = int64(len(data))
57+
doc.Source = DocumentSource{InlineData: data}
58+
if doc.MimeType == "" {
59+
doc.MimeType = DetectMimeTypeByContent(data)
60+
}
61+
default:
62+
return MessagePart{}, false
63+
}
64+
65+
return MessagePart{Type: MessagePartTypeDocument, Document: &doc}, true
66+
}

pkg/model/provider/anthropic/attachments.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ import (
2525
// - unsupported / no content → nil (logged as warning)
2626
func convertDocument(ctx context.Context, doc chat.Document, id modelsdev.ID, store *modelsdev.Store) ([]anthropic.ContentBlockParamUnion, error) {
2727
mc := modelinfo.LoadCaps(store, id)
28+
if !mc.Supports(doc.MimeType) && modelinfo.IsClaude(ctx, store, id) {
29+
mc = modelinfo.CapsWith(true, true)
30+
}
2831
return convertDocumentWithCaps(ctx, doc, mc)
2932
}
3033

pkg/model/provider/anthropic/beta_converter.go

Lines changed: 86 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,20 @@ func (c *Client) convertBetaMessages(ctx context.Context, messages []chat.Messag
103103
// Collect consecutive tool messages and merge them into a single user message
104104
// This is required by Anthropic API: all tool_result blocks for tool_use blocks
105105
// from the same assistant message must be in the same user message
106-
toolResultBlocks := []anthropic.BetaContentBlockParamUnion{
107-
convertBetaToolResultBlock(msg),
106+
firstBlock, err := c.convertBetaToolResultBlock(ctx, msg)
107+
if err != nil {
108+
return nil, err
108109
}
110+
toolResultBlocks := []anthropic.BetaContentBlockParamUnion{firstBlock}
109111

110112
// Look ahead for consecutive tool messages and merge them
111113
j := i + 1
112114
for j < len(messages) && messages[j].Role == chat.MessageRoleTool {
113-
toolResultBlocks = append(toolResultBlocks, convertBetaToolResultBlock(&messages[j]))
115+
block, err := c.convertBetaToolResultBlock(ctx, &messages[j])
116+
if err != nil {
117+
return nil, err
118+
}
119+
toolResultBlocks = append(toolResultBlocks, block)
114120
j++
115121
}
116122

@@ -135,62 +141,111 @@ func (c *Client) convertBetaMessages(ctx context.Context, messages []chat.Messag
135141
// convertBetaUserMultiContent converts user message multi-content parts to Beta API content blocks.
136142
// It handles text, images (base64 and URL), and file uploads via the Files API.
137143
// convertBetaToolResultBlock converts a tool message to a Beta API tool_result block,
138-
// including any image content from MultiContent.
139-
func convertBetaToolResultBlock(msg *chat.Message) anthropic.BetaContentBlockParamUnion {
140-
if !hasImageMultiContent(msg.MultiContent) {
141-
// tool_result must be present for every preceding tool_use; we cannot skip
142-
// it. Normalize whitespace-only content to empty string rather than skipping.
143-
content := msg.Content
144-
if strings.TrimSpace(content) == "" {
145-
content = ""
146-
}
144+
// including inline image and document content from MultiContent.
145+
func (c *Client) convertBetaToolResultBlock(ctx context.Context, msg *chat.Message) (anthropic.BetaContentBlockParamUnion, error) {
146+
if !hasRichToolResultMultiContent(msg.MultiContent) {
147147
return anthropic.BetaContentBlockParamUnion{
148148
OfToolResult: &anthropic.BetaToolResultBlockParam{
149149
ToolUseID: msg.ToolCallID,
150+
IsError: anthropic.Bool(msg.IsError),
150151
Content: []anthropic.BetaToolResultBlockParamContentUnion{
151-
{OfText: &anthropic.BetaTextBlockParam{Text: content}},
152+
{OfText: &anthropic.BetaTextBlockParam{Text: toolResultText(msg.Content)}},
152153
},
153154
},
154-
}
155+
}, nil
155156
}
156157

157158
var content []anthropic.BetaToolResultBlockParamContentUnion
158159
for _, part := range msg.MultiContent {
159160
switch part.Type {
160161
case chat.MessagePartTypeText:
162+
if strings.TrimSpace(part.Text) == "" {
163+
continue
164+
}
161165
content = append(content, anthropic.BetaToolResultBlockParamContentUnion{
162166
OfText: &anthropic.BetaTextBlockParam{Text: part.Text},
163167
})
164168
case chat.MessagePartTypeImageURL:
165-
// Note: superseded by MessagePartTypeDocument.
166-
if part.ImageURL == nil {
169+
content = append(content, betaToolResultImageContent(part)...)
170+
case chat.MessagePartTypeDocument:
171+
if part.Document == nil {
167172
continue
168173
}
169-
if strings.HasPrefix(part.ImageURL.URL, "data:") {
170-
urlParts := strings.SplitN(part.ImageURL.URL, ",", 2)
171-
if len(urlParts) == 2 {
172-
mediaType := extractMediaType(urlParts[0])
173-
content = append(content, anthropic.BetaToolResultBlockParamContentUnion{
174-
OfImage: &anthropic.BetaImageBlockParam{
175-
Source: anthropic.BetaImageBlockParamSourceUnion{
176-
OfBase64: &anthropic.BetaBase64ImageSourceParam{
177-
Data: urlParts[1],
178-
MediaType: anthropic.BetaBase64ImageSourceMediaType(mediaType),
179-
},
180-
},
181-
},
182-
})
183-
}
174+
docBlocks, err := c.convertDoc(ctx, *part.Document)
175+
if err != nil {
176+
return anthropic.BetaContentBlockParamUnion{}, fmt.Errorf("failed to convert tool result document %q: %w", part.Document.Name, err)
184177
}
178+
content = append(content, stdBlocksToBetaToolResultContent(docBlocks)...)
185179
}
186180
}
181+
if len(content) == 0 {
182+
content = append(content, anthropic.BetaToolResultBlockParamContentUnion{
183+
OfText: &anthropic.BetaTextBlockParam{Text: toolResultText(msg.Content)},
184+
})
185+
}
187186

188187
return anthropic.BetaContentBlockParamUnion{
189188
OfToolResult: &anthropic.BetaToolResultBlockParam{
190189
ToolUseID: msg.ToolCallID,
191190
Content: content,
191+
IsError: anthropic.Bool(msg.IsError),
192+
},
193+
}, nil
194+
}
195+
196+
func betaToolResultImageContent(part chat.MessagePart) []anthropic.BetaToolResultBlockParamContentUnion {
197+
if part.ImageURL == nil || !strings.HasPrefix(part.ImageURL.URL, "data:") {
198+
return nil
199+
}
200+
urlParts := strings.SplitN(part.ImageURL.URL, ",", 2)
201+
if len(urlParts) != 2 {
202+
return nil
203+
}
204+
mediaType := extractMediaType(urlParts[0])
205+
return []anthropic.BetaToolResultBlockParamContentUnion{{
206+
OfImage: &anthropic.BetaImageBlockParam{
207+
Source: anthropic.BetaImageBlockParamSourceUnion{
208+
OfBase64: &anthropic.BetaBase64ImageSourceParam{
209+
Data: urlParts[1],
210+
MediaType: anthropic.BetaBase64ImageSourceMediaType(mediaType),
211+
},
212+
},
192213
},
214+
}}
215+
}
216+
217+
func stdBlocksToBetaToolResultContent(blocks []anthropic.ContentBlockParamUnion) []anthropic.BetaToolResultBlockParamContentUnion {
218+
out := make([]anthropic.BetaToolResultBlockParamContentUnion, 0, len(blocks))
219+
for _, b := range blocks {
220+
switch {
221+
case b.OfText != nil:
222+
out = append(out, anthropic.BetaToolResultBlockParamContentUnion{
223+
OfText: &anthropic.BetaTextBlockParam{Text: b.OfText.Text},
224+
})
225+
case b.OfImage != nil && b.OfImage.Source.OfBase64 != nil:
226+
out = append(out, anthropic.BetaToolResultBlockParamContentUnion{
227+
OfImage: &anthropic.BetaImageBlockParam{
228+
Source: anthropic.BetaImageBlockParamSourceUnion{
229+
OfBase64: &anthropic.BetaBase64ImageSourceParam{
230+
Data: b.OfImage.Source.OfBase64.Data,
231+
MediaType: anthropic.BetaBase64ImageSourceMediaType(b.OfImage.Source.OfBase64.MediaType),
232+
},
233+
},
234+
},
235+
})
236+
case b.OfDocument != nil && b.OfDocument.Source.OfBase64 != nil:
237+
out = append(out, anthropic.BetaToolResultBlockParamContentUnion{
238+
OfDocument: &anthropic.BetaRequestDocumentBlockParam{
239+
Source: anthropic.BetaRequestDocumentBlockSourceUnionParam{
240+
OfBase64: &anthropic.BetaBase64PDFSourceParam{
241+
Data: b.OfDocument.Source.OfBase64.Data,
242+
},
243+
},
244+
},
245+
})
246+
}
193247
}
248+
return out
194249
}
195250

196251
func (c *Client) convertBetaUserMultiContent(ctx context.Context, parts []chat.MessagePart) ([]anthropic.BetaContentBlockParamUnion, error) {

0 commit comments

Comments
 (0)