Skip to content

Commit edef1e5

Browse files
authored
Merge pull request #2738 from simonferquel-clanker/fix/modelcaps-qualified-model-id
fix: pass fully-qualified provider/model ID to modelcaps.Load
2 parents 675057f + a490b33 commit edef1e5

22 files changed

Lines changed: 297 additions & 89 deletions

pkg/attachment/modelcaps/modelcaps_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,51 @@ func buildStore(providers map[string]modelsdev.Provider) *modelsdev.Store {
1313
return modelsdev.NewDatabaseStore(db)
1414
}
1515

16+
// TestLoadFromStore_QualifiedIDRequired is the regression test for the bug
17+
// fixed by pass-fully-qualified-provider-model-ID: modelcaps.Load (and
18+
// LoadFromStore) requires a "provider/model" key to find a model in the
19+
// models.dev database. A bare model name without the provider prefix must
20+
// NOT resolve to vision capabilities — it falls back to text-only.
21+
//
22+
// Before the fix, callers passed c.ModelConfig.Model (e.g. "claude-sonnet-4-6")
23+
// instead of c.ModelConfig.Provider+"/"+c.ModelConfig.Model; the lookup always
24+
// missed and all image / PDF attachments were silently dropped.
25+
func TestLoadFromStore_QualifiedIDRequired(t *testing.T) {
26+
store := buildStore(map[string]modelsdev.Provider{
27+
"anthropic": {
28+
Models: map[string]modelsdev.Model{
29+
"claude-sonnet-4-6": {
30+
Name: "Claude Sonnet 4.6",
31+
Modalities: modelsdev.Modalities{
32+
Input: []string{"text", "image", "pdf"},
33+
Output: []string{"text"},
34+
},
35+
},
36+
},
37+
},
38+
})
39+
40+
// Bare model name (the original bug): must fall back to conservative text-only caps.
41+
bareID := "claude-sonnet-4-6"
42+
mcBare := modelcaps.LoadFromStore(store, bareID)
43+
if mcBare.Supports("image/jpeg") {
44+
t.Errorf("bare model name %q must NOT resolve to vision caps: image/jpeg should be dropped", bareID)
45+
}
46+
if mcBare.Supports("application/pdf") {
47+
t.Errorf("bare model name %q must NOT resolve to vision caps: application/pdf should be dropped", bareID)
48+
}
49+
50+
// Fully-qualified ID (the fix): must resolve to vision+pdf caps.
51+
qualifiedID := "anthropic/claude-sonnet-4-6"
52+
mcQualified := modelcaps.LoadFromStore(store, qualifiedID)
53+
if !mcQualified.Supports("image/jpeg") {
54+
t.Errorf("qualified ID %q must resolve to vision caps: image/jpeg should be passed through", qualifiedID)
55+
}
56+
if !mcQualified.Supports("application/pdf") {
57+
t.Errorf("qualified ID %q must resolve to vision caps: application/pdf should be passed through", qualifiedID)
58+
}
59+
}
60+
1661
func TestLoadFromStore_VisionModel(t *testing.T) {
1762
store := buildStore(map[string]modelsdev.Provider{
1863
"anthropic": {

pkg/model/provider/anthropic/attachments.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,19 @@ import (
1212
"github.com/docker/docker-agent/pkg/attachment"
1313
"github.com/docker/docker-agent/pkg/attachment/modelcaps"
1414
"github.com/docker/docker-agent/pkg/chat"
15+
"github.com/docker/docker-agent/pkg/modelsdev"
1516
)
1617

17-
// convertDocument converts a chat.Document to standard Anthropic SDK content blocks
18-
// (not the Beta API).
18+
// convertDocumentFromStore converts a chat.Document to standard Anthropic SDK content blocks
19+
// using an explicit modelsdev.Store for capability lookup.
1920
//
2021
// Routing:
2122
// - image/* with InlineData → ImageBlockParam (base64 source)
2223
// - application/pdf with InlineData → DocumentBlockParam (base64)
2324
// - text with InlineText → TextBlockParam with TXTEnvelope
2425
// - unsupported / no content → nil (logged as warning)
25-
func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]anthropic.ContentBlockParamUnion, error) {
26-
mc, _ := modelcaps.Load(modelID)
26+
func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]anthropic.ContentBlockParamUnion, error) {
27+
mc := modelcaps.LoadFromStore(store, modelID)
2728
return convertDocumentWithCaps(ctx, doc, mc)
2829
}
2930

pkg/model/provider/anthropic/attachments_test.go

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ import (
99

1010
"github.com/docker/docker-agent/pkg/attachment/modelcaps"
1111
"github.com/docker/docker-agent/pkg/chat"
12+
"github.com/docker/docker-agent/pkg/config/latest"
13+
"github.com/docker/docker-agent/pkg/model/provider/base"
14+
"github.com/docker/docker-agent/pkg/modelsdev"
1215
)
1316

1417
// minJPEG is a minimal JPEG magic-byte header for use in tests.
@@ -51,14 +54,66 @@ func TestConvertDocumentAnthropic_StrategyB64_PDF(t *testing.T) {
5154
assert.Nil(t, blocks[0].OfText, "expected no text block for PDF")
5255
}
5356

57+
// TestConvertDocumentAnthropic_QualifiedIDRequired is the regression test for
58+
// the bug where convertUserMultiContent passed only c.ModelConfig.Model (bare
59+
// model name) to convertDocument instead of c.ModelConfig.Provider+"/"+c.ModelConfig.Model.
60+
// When the bare name was used, modelcaps.Load always missed the model and all
61+
// image/PDF attachments were silently dropped.
62+
//
63+
// The test constructs a Client with Provider="anthropic" and Model="claude-sonnet-4-6",
64+
// injects a fake modelsdev.Store, and calls convertUserMultiContent directly.
65+
// The image block must be present in the output — which only happens if the
66+
// fully-qualified "anthropic/claude-sonnet-4-6" was used for the caps lookup.
67+
func TestConvertDocumentAnthropic_QualifiedIDRequired(t *testing.T) {
68+
store := modelsdev.NewDatabaseStore(&modelsdev.Database{
69+
Providers: map[string]modelsdev.Provider{
70+
"anthropic": {
71+
Models: map[string]modelsdev.Model{
72+
"claude-sonnet-4-6": {
73+
Modalities: modelsdev.Modalities{
74+
Input: []string{"text", "image", "pdf"},
75+
},
76+
},
77+
},
78+
},
79+
},
80+
})
81+
82+
c := &Client{
83+
Config: base.Config{
84+
ModelConfig: latest.ModelConfig{
85+
Provider: "anthropic",
86+
Model: "claude-sonnet-4-6",
87+
},
88+
},
89+
modelsStore: store,
90+
}
91+
92+
parts := []chat.MessagePart{
93+
{
94+
Type: chat.MessagePartTypeDocument,
95+
Document: &chat.Document{
96+
Name: "photo.jpg",
97+
MimeType: "image/jpeg",
98+
Source: chat.DocumentSource{InlineData: minJPEG},
99+
},
100+
},
101+
}
102+
103+
blocks, err := c.convertUserMultiContent(t.Context(), parts)
104+
require.NoError(t, err)
105+
require.Len(t, blocks, 1, "image must not be dropped when provider+model ID is used for caps lookup")
106+
assert.NotNil(t, blocks[0].OfImage, "expected native image block")
107+
}
108+
54109
func TestConvertDocumentAnthropic_StrategyTXT(t *testing.T) {
55110
doc := chat.Document{
56111
Name: "spec.md",
57112
MimeType: "text/markdown",
58113
Source: chat.DocumentSource{InlineText: "## Specification"},
59114
}
60115

61-
blocks, err := convertDocument(t.Context(), doc, "")
116+
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
62117
require.NoError(t, err)
63118
require.Len(t, blocks, 1)
64119
require.NotNil(t, blocks[0].OfText)
@@ -74,7 +129,7 @@ func TestConvertDocumentAnthropic_StrategyTXT_Envelope(t *testing.T) {
74129
Source: chat.DocumentSource{InlineText: "some notes"},
75130
}
76131

77-
blocks, err := convertDocument(t.Context(), doc, "")
132+
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
78133
require.NoError(t, err)
79134
require.Len(t, blocks, 1)
80135
require.NotNil(t, blocks[0].OfText)
@@ -89,7 +144,7 @@ func TestConvertDocumentAnthropic_Drop_NoContent(t *testing.T) {
89144
Source: chat.DocumentSource{},
90145
}
91146

92-
blocks, err := convertDocument(t.Context(), doc, "")
147+
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
93148
require.NoError(t, err)
94149
assert.Nil(t, blocks, "should be dropped when no inline content")
95150
}

pkg/model/provider/anthropic/beta_converter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ func (c *Client) convertBetaUserMultiContent(ctx context.Context, parts []chat.M
277277

278278
case chat.MessagePartTypeDocument:
279279
if part.Document != nil {
280-
stdBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Model)
280+
stdBlocks, err := c.convertDoc(ctx, *part.Document)
281281
if err != nil {
282282
return nil, fmt.Errorf("failed to convert document attachment %q: %w", part.Document.Name, err)
283283
}

pkg/model/provider/anthropic/client.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/docker/docker-agent/pkg/model/provider/base"
2424
"github.com/docker/docker-agent/pkg/model/provider/options"
2525
"github.com/docker/docker-agent/pkg/model/provider/providerutil"
26+
"github.com/docker/docker-agent/pkg/modelsdev"
2627
"github.com/docker/docker-agent/pkg/tools"
2728
)
2829

@@ -33,6 +34,7 @@ type Client struct {
3334

3435
clientFn func(context.Context) (anthropic.Client, error)
3536
fileManager *FileManager
37+
modelsStore *modelsdev.Store // initialised in NewClient; overrideable in tests
3638
}
3739

3840
// NewClient creates a new Anthropic client from the provided configuration
@@ -67,6 +69,13 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
6769
},
6870
}
6971

72+
store, err := modelsdev.NewStore()
73+
if err != nil {
74+
slog.WarnContext(ctx, "anthropic: failed to load models.dev store, attachments will use conservative caps", "error", err)
75+
store = modelsdev.NewDatabaseStore(&modelsdev.Database{}) // empty: conservative text-only
76+
}
77+
anthropicClient.modelsStore = store
78+
7079
if gateway := globalOptions.Gateway(); gateway == "" {
7180
authOpts, err := buildDirectAuthOptions(ctx, cfg, env)
7281
if err != nil {
@@ -313,6 +322,12 @@ func (c *Client) CreateChatCompletionStream(
313322
return ad, nil
314323
}
315324

325+
// convertDoc converts a document attachment using the client's model ID
326+
// and the store initialized at construction time.
327+
func (c *Client) convertDoc(ctx context.Context, doc chat.Document) ([]anthropic.ContentBlockParamUnion, error) {
328+
return convertDocumentFromStore(ctx, doc, c.ID(), c.modelsStore)
329+
}
330+
316331
func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ([]anthropic.MessageParam, error) {
317332
var anthropicMessages []anthropic.MessageParam
318333
// Track whether the last appended assistant message included tool_use blocks
@@ -553,7 +568,7 @@ func (c *Client) convertUserMultiContent(ctx context.Context, parts []chat.Messa
553568

554569
case chat.MessagePartTypeDocument:
555570
if part.Document != nil {
556-
docBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Model)
571+
docBlocks, err := c.convertDoc(ctx, *part.Document)
557572
if err != nil {
558573
return nil, fmt.Errorf("failed to convert document attachment %q: %w", part.Document.Name, err)
559574
}

pkg/model/provider/bedrock/attachments.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/docker/docker-agent/pkg/attachment"
1414
"github.com/docker/docker-agent/pkg/attachment/modelcaps"
1515
"github.com/docker/docker-agent/pkg/chat"
16+
"github.com/docker/docker-agent/pkg/modelsdev"
1617
)
1718

1819
// imageFormatFromMIME maps a MIME type to a Bedrock ImageFormat.
@@ -32,15 +33,16 @@ func imageFormatFromMIME(mimeType string) (types.ImageFormat, bool) {
3233
}
3334
}
3435

35-
// convertDocument converts a chat.Document to zero or more Bedrock ContentBlocks.
36+
// convertDocumentFromStore converts a chat.Document to zero or more Bedrock ContentBlocks
37+
// using the provided modelsdev.Store for capability lookup.
3638
//
3739
// Routing:
3840
// - image/* with InlineData → ContentBlockMemberImage
3941
// - application/pdf with InlineData → ContentBlockMemberDocument (PDF)
4042
// - text/* with InlineText → ContentBlockMemberText with TXTEnvelope
4143
// - unsupported / no content → nil (logged as warning)
42-
func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]types.ContentBlock, error) {
43-
mc, _ := modelcaps.Load(modelID)
44+
func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]types.ContentBlock, error) {
45+
mc := modelcaps.LoadFromStore(store, modelID)
4446
return convertDocumentWithCaps(ctx, doc, mc)
4547
}
4648

pkg/model/provider/bedrock/attachments_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func TestConvertDocumentBedrock_StrategyTXT(t *testing.T) {
7979
Source: chat.DocumentSource{InlineText: "## Notes"},
8080
}
8181

82-
blocks, err := convertDocument(t.Context(), doc, "")
82+
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
8383
require.NoError(t, err)
8484
require.Len(t, blocks, 1)
8585
textBlock, ok := blocks[0].(*types.ContentBlockMemberText)
@@ -96,7 +96,7 @@ func TestConvertDocumentBedrock_StrategyTXT_Envelope(t *testing.T) {
9696
Source: chat.DocumentSource{InlineText: "a,b"},
9797
}
9898

99-
blocks, err := convertDocument(t.Context(), doc, "")
99+
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
100100
require.NoError(t, err)
101101
require.Len(t, blocks, 1)
102102
textBlock, ok := blocks[0].(*types.ContentBlockMemberText)
@@ -111,7 +111,7 @@ func TestConvertDocumentBedrock_Drop_NoContent(t *testing.T) {
111111
Source: chat.DocumentSource{},
112112
}
113113

114-
blocks, err := convertDocument(t.Context(), doc, "")
114+
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
115115
require.NoError(t, err)
116116
assert.Nil(t, blocks, "should be nil when no inline content")
117117
}

pkg/model/provider/bedrock/client.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ type Client struct {
3030
base.Config
3131

3232
bedrockClient *bedrockruntime.Client
33-
cachingSupported bool // Cached at init time for efficiency
33+
cachingSupported bool // Cached at init time for efficiency
34+
modelsStore *modelsdev.Store // initialised in NewClient
3435
}
3536

3637
// bearerTokenTransport adds Authorization header with bearer token to requests
@@ -116,6 +117,12 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
116117
// Uses models.dev cache pricing as proxy for capability detection.
117118
cachingSupported := detectCachingSupport(ctx, cfg.Model)
118119

120+
attachStore, err := modelsdev.NewStore()
121+
if err != nil {
122+
slog.WarnContext(ctx, "bedrock: failed to load models.dev store, attachments will use conservative caps", "error", err)
123+
attachStore = modelsdev.NewDatabaseStore(&modelsdev.Database{})
124+
}
125+
119126
slog.DebugContext(ctx, "Bedrock client created successfully",
120127
"model", cfg.Model,
121128
"region", awsCfg.Region,
@@ -129,6 +136,7 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
129136
},
130137
bedrockClient: bedrockClient,
131138
cachingSupported: cachingSupported,
139+
modelsStore: attachStore,
132140
}, nil
133141
}
134142

@@ -236,7 +244,7 @@ func (c *Client) buildConverseStreamInput(ctx context.Context, messages []chat.M
236244
enableCaching := c.promptCachingEnabled()
237245

238246
// Convert and set messages (excluding system)
239-
input.Messages, input.System = convertMessages(ctx, messages, c.ModelConfig.Model, enableCaching)
247+
input.Messages, input.System = convertMessages(ctx, messages, c.ID(), c.modelsStore, enableCaching)
240248

241249
// Compute thinking fields first — its presence drives the inference config.
242250
additionalFields := c.buildAdditionalModelRequestFields()

0 commit comments

Comments
 (0)