diff --git a/agentscope-core/src/main/java/io/agentscope/core/formatter/AbstractBaseFormatter.java b/agentscope-core/src/main/java/io/agentscope/core/formatter/AbstractBaseFormatter.java index 751d31243..16bab108e 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/formatter/AbstractBaseFormatter.java +++ b/agentscope-core/src/main/java/io/agentscope/core/formatter/AbstractBaseFormatter.java @@ -18,6 +18,7 @@ import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.Base64Source; import io.agentscope.core.message.ContentBlock; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.HintBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.MessageMetadataKeys; @@ -134,7 +135,8 @@ protected boolean hasMediaContent(Msg msg) { for (ContentBlock block : msg.getContent()) { if (block instanceof ImageBlock || block instanceof AudioBlock - || block instanceof VideoBlock) { + || block instanceof VideoBlock + || block instanceof DataBlock) { return true; } } @@ -218,6 +220,9 @@ protected String convertToolResultToString(List output) { } else if (block instanceof VideoBlock vb) { String reference = convertMediaBlockToTextReference(vb, "video"); textualOutput.add(reference); + } else if (block instanceof DataBlock db) { + String reference = convertMediaBlockToTextReference(db, "data"); + textualOutput.add(reference); } // Other block types (e.g., ThinkingBlock) are ignored } @@ -272,6 +277,8 @@ private Source getSourceFromBlock(ContentBlock block) { return ab.getSource(); } else if (block instanceof VideoBlock vb) { return vb.getSource(); + } else if (block instanceof DataBlock db) { + return db.getSource(); } throw new IllegalArgumentException("Unsupported block type: " + block.getClass()); } diff --git a/agentscope-core/src/main/java/io/agentscope/core/formatter/dashscope/DashScopeMediaConverter.java b/agentscope-core/src/main/java/io/agentscope/core/formatter/dashscope/DashScopeMediaConverter.java index 77fdabf11..204f259b6 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/formatter/dashscope/DashScopeMediaConverter.java +++ b/agentscope-core/src/main/java/io/agentscope/core/formatter/dashscope/DashScopeMediaConverter.java @@ -19,6 +19,7 @@ import io.agentscope.core.formatter.dashscope.dto.DashScopeContentPart; import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.Base64Source; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.Source; import io.agentscope.core.message.URLSource; @@ -192,4 +193,72 @@ public DashScopeContentPart convertAudioBlockToContentPart(AudioBlock audioBlock String audioUrl = convertAudioBlockToUrl(audioBlock); return DashScopeContentPart.audio(audioUrl); } + + /** + * Convert DataBlock to DashScopeContentPart by resolving the MIME type and routing + * to the appropriate image / audio / video slot. + * + *

MIME type resolution order: + *

    + *
  1. {@code Base64Source.mediaType} — always explicit
  2. + *
  3. {@code URLSource.mimeType} — caller-supplied hint for extension-less URLs
  4. + *
  5. {@code MediaUtils.determineMediaType(url)} — extension-based inference
  6. + *
+ * + * @param dataBlock The data block to convert + * @return DashScopeContentPart for the resolved media type + * @throws Exception If conversion fails or MIME type cannot be resolved + */ + public DashScopeContentPart convertDataBlockToContentPart(DataBlock dataBlock) + throws Exception { + Source source = dataBlock.getSource(); + String mimeType = resolveMimeType(source); + + if (mimeType.startsWith("image/")) { + String url = sourceToUrl(source); + return DashScopeContentPart.builder().image(url).build(); + } else if (mimeType.startsWith("audio/")) { + String url = sourceToUrl(source); + return DashScopeContentPart.audio(url); + } else if (mimeType.startsWith("video/")) { + String url = sourceToUrl(source); + return DashScopeContentPart.builder().video(url).build(); + } else { + throw new IllegalArgumentException( + "Cannot route DataBlock: unrecognised MIME type '" + mimeType + "'"); + } + } + + // resolve MIME type from any Source subtype + private String resolveMimeType(Source source) { + if (source instanceof Base64Source b64) { + return b64.getMediaType(); + } + if (source instanceof URLSource urlSource) { + String hint = urlSource.getMimeType(); + if (hint != null && !hint.isBlank()) { + return hint; + } + String inferred = MediaUtils.determineMediaType(urlSource.getUrl()); + if (!"application/octet-stream".equals(inferred)) { + return inferred; + } + throw new IllegalArgumentException( + "Cannot determine MIME type for URL '" + + urlSource.getUrl() + + "'; set URLSource.mimeType explicitly"); + } + throw new IllegalArgumentException("Unsupported source type: " + source.getClass()); + } + + // convert any Source to a URL/data-URL string + private String sourceToUrl(Source source) throws Exception { + if (source instanceof URLSource urlSource) { + return MediaUtils.urlToProtocolUrl(urlSource.getUrl()); + } + if (source instanceof Base64Source b64) { + return String.format("data:%s;base64,%s", b64.getMediaType(), b64.getData()); + } + throw new IllegalArgumentException("Unsupported source type: " + source.getClass()); + } } diff --git a/agentscope-core/src/main/java/io/agentscope/core/formatter/dashscope/DashScopeMessageConverter.java b/agentscope-core/src/main/java/io/agentscope/core/formatter/dashscope/DashScopeMessageConverter.java index 2c5ee017c..590f9e200 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/formatter/dashscope/DashScopeMessageConverter.java +++ b/agentscope-core/src/main/java/io/agentscope/core/formatter/dashscope/DashScopeMessageConverter.java @@ -19,6 +19,7 @@ import io.agentscope.core.formatter.dashscope.dto.DashScopeMessage; import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.ContentBlock; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.HintBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.MessageMetadataKeys; @@ -127,6 +128,15 @@ private DashScopeMessage convertToMultimodalContent(Msg msg) { DashScopeContentPart.text( "[Audio - processing failed: " + e.getMessage() + "]")); } + } else if (block instanceof DataBlock dataBlock) { + try { + contents.add(mediaConverter.convertDataBlockToContentPart(dataBlock)); + } catch (Exception e) { + log.warn("Failed to process DataBlock: {}", e.getMessage()); + contents.add( + DashScopeContentPart.text( + "[Media - processing failed: " + e.getMessage() + "]")); + } } else if (block instanceof HintBlock hb) { contents.add(DashScopeContentPart.text(hb.getHint())); } else if (block instanceof ThinkingBlock) { @@ -286,7 +296,8 @@ private boolean hasMediaContent(List blocks) { for (ContentBlock block : blocks) { if (block instanceof ImageBlock || block instanceof AudioBlock - || block instanceof VideoBlock) { + || block instanceof VideoBlock + || block instanceof DataBlock) { return true; } } @@ -331,6 +342,15 @@ private List convertContentBlocks(List block DashScopeContentPart.text( "[Video - processing failed: " + e.getMessage() + "]")); } + } else if (block instanceof DataBlock db) { + try { + content.add(mediaConverter.convertDataBlockToContentPart(db)); + } catch (Exception e) { + log.warn("Failed to process DataBlock in tool result: {}", e.getMessage()); + content.add( + DashScopeContentPart.text( + "[Media - processing failed: " + e.getMessage() + "]")); + } } } return content; diff --git a/agentscope-core/src/main/java/io/agentscope/core/message/URLSource.java b/agentscope-core/src/main/java/io/agentscope/core/message/URLSource.java index 35d25e2b2..c7e4fe32b 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/message/URLSource.java +++ b/agentscope-core/src/main/java/io/agentscope/core/message/URLSource.java @@ -16,6 +16,7 @@ package io.agentscope.core.message; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; @@ -35,20 +36,40 @@ * *

Using URL sources is more efficient for large media files and allows * the system to stream content rather than loading everything into memory. + * + *

When the URL has no file extension (e.g. CDN signed URLs), set {@code mimeType} + * explicitly so converters can route the content to the correct media slot without + * relying on extension-based inference. */ +@JsonInclude(JsonInclude.Include.NON_NULL) public class URLSource extends Source { private final String url; + @JsonProperty("mime_type") + private final String mimeType; + /** * Creates a new URL source for JSON deserialization. * * @param url The URL pointing to the media content + * @param mimeType Optional MIME type hint (e.g. "image/jpeg"); may be null * @throws NullPointerException if url is null */ @JsonCreator - public URLSource(@JsonProperty("url") String url) { + public URLSource(@JsonProperty("url") String url, @JsonProperty("mime_type") String mimeType) { this.url = Objects.requireNonNull(url, "url cannot be null"); + this.mimeType = mimeType; + } + + /** + * Creates a new URL source without a MIME type hint. + * + * @param url The URL pointing to the media content + * @throws NullPointerException if url is null + */ + public URLSource(String url) { + this(url, null); } /** @@ -60,6 +81,19 @@ public String getUrl() { return url; } + /** + * Gets the optional MIME type hint for this URL source. + * + *

When present, converters use this value instead of inferring the type + * from the URL's file extension. Useful for extension-less URLs such as + * CDN signed links or API-generated media endpoints. + * + * @return The MIME type (e.g. "image/jpeg"), or null if not set + */ + public String getMimeType() { + return mimeType; + } + /** * Creates a new builder for constructing URLSource instances. * @@ -76,6 +110,8 @@ public static class Builder { private String url; + private String mimeType; + /** * Sets the URL for the media content. * @@ -88,13 +124,24 @@ public Builder url(String url) { } /** - * Builds a new URLSource with the configured URL. + * Sets an optional MIME type hint for extension-less URLs. + * + * @param mimeType The MIME type (e.g. "video/mp4") + * @return This builder for chaining + */ + public Builder mimeType(String mimeType) { + this.mimeType = mimeType; + return this; + } + + /** + * Builds a new URLSource with the configured fields. * * @return A new URLSource instance * @throws NullPointerException if url is null */ public URLSource build() { - return new URLSource(url); + return new URLSource(url, mimeType); } } } diff --git a/agentscope-core/src/test/java/io/agentscope/core/formatter/dashscope/DashScopeMediaConverterTest.java b/agentscope-core/src/test/java/io/agentscope/core/formatter/dashscope/DashScopeMediaConverterTest.java index a7e5a461d..2370a011a 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/formatter/dashscope/DashScopeMediaConverterTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/formatter/dashscope/DashScopeMediaConverterTest.java @@ -18,9 +18,11 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import io.agentscope.core.formatter.dashscope.dto.DashScopeContentPart; import io.agentscope.core.message.Base64Source; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.URLSource; import io.agentscope.core.message.VideoBlock; @@ -348,4 +350,109 @@ void testVideoBlockDefaultConstructorNullParameters() { assertNull(videoBlock.getMaxPixels()); assertNull(videoBlock.getTotalPixels()); } + + @Test + void testConvertDataBlockImageRemoteUrl() throws Exception { + DataBlock block = + DataBlock.builder() + .source(URLSource.builder().url("https://example.com/photo.png").build()) + .build(); + + DashScopeContentPart result = converter.convertDataBlockToContentPart(block); + + assertNotNull(result); + assertEquals("https://example.com/photo.png", result.getImage()); + } + + @Test + void testConvertDataBlockImageBase64() throws Exception { + DataBlock block = + DataBlock.builder() + .source( + Base64Source.builder() + .mediaType("image/png") + .data("iVBORw0KGgo=") + .build()) + .build(); + + DashScopeContentPart result = converter.convertDataBlockToContentPart(block); + + assertNotNull(result); + assertEquals("data:image/png;base64,iVBORw0KGgo=", result.getImage()); + } + + @Test + void testConvertDataBlockVideoRemoteUrl() throws Exception { + DataBlock block = + DataBlock.builder() + .source(URLSource.builder().url("https://example.com/clip.mp4").build()) + .build(); + + DashScopeContentPart result = converter.convertDataBlockToContentPart(block); + + assertNotNull(result); + assertEquals("https://example.com/clip.mp4", result.getVideoAsString()); + } + + @Test + void testConvertDataBlockAudioBase64() throws Exception { + DataBlock block = + DataBlock.builder() + .source( + Base64Source.builder() + .mediaType("audio/mp3") + .data("ZmFrZSBhdWRpbyBkYXRh") + .build()) + .build(); + + DashScopeContentPart result = converter.convertDataBlockToContentPart(block); + + assertNotNull(result); + assertEquals("data:audio/mp3;base64,ZmFrZSBhdWRpbyBkYXRh", result.getAudio()); + } + + @Test + void testConvertDataBlockWithMimeTypeHintOverridesExtension() throws Exception { + // mimeType hint should take precedence over extension-based inference + DataBlock block = + DataBlock.builder() + .source( + URLSource.builder() + .url("https://cdn.example.com/media/abc123") + .mimeType("image/jpeg") + .build()) + .build(); + + DashScopeContentPart result = converter.convertDataBlockToContentPart(block); + + assertNotNull(result); + assertNotNull(result.getImage()); + } + + @Test + void testConvertDataBlockNoExtensionNoHintThrows() { + DataBlock block = + DataBlock.builder() + .source( + URLSource.builder() + .url("https://cdn.example.com/media/abc123") + .build()) + .build(); + + assertThrows(Exception.class, () -> converter.convertDataBlockToContentPart(block)); + } + + @Test + void testConvertDataBlockUnknownMimeTypeThrows() { + DataBlock block = + DataBlock.builder() + .source( + Base64Source.builder() + .mediaType("application/octet-stream") + .data("ZmFrZQ==") + .build()) + .build(); + + assertThrows(Exception.class, () -> converter.convertDataBlockToContentPart(block)); + } } diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/main/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMediaConverter.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/main/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMediaConverter.java index d4b7b5fde..bd8f55588 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/main/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMediaConverter.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/main/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMediaConverter.java @@ -20,6 +20,7 @@ import com.anthropic.models.messages.UrlImageSource; import io.agentscope.core.formatter.MediaUtils; import io.agentscope.core.message.Base64Source; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.Source; import io.agentscope.core.message.URLSource; @@ -79,4 +80,83 @@ public ImageBlockParam convertImageBlock(ImageBlock imageBlock) throws Exception throw new IllegalArgumentException("Unsupported source type: " + source.getClass()); } } + + /** + * Convert DataBlock to Anthropic ImageBlockParam by resolving MIME type and routing to image. + * + *

Anthropic currently supports image modality only via this SDK type. Audio and video + * DataBlocks will throw {@link IllegalArgumentException} since the Anthropic API does not + * expose a generic binary content block param yet. + * + *

MIME type resolution order: + *

    + *
  1. {@code Base64Source.mediaType} — always explicit
  2. + *
  3. {@code URLSource.mimeType} — caller-supplied hint for extension-less URLs
  4. + *
  5. {@code MediaUtils.determineMediaType(url)} — extension-based inference
  6. + *
+ * + * @param dataBlock The data block to convert + * @return ImageBlockParam for Anthropic API + * @throws Exception If conversion fails or MIME type resolves to a non-image category + */ + public ImageBlockParam convertDataBlock(DataBlock dataBlock) throws Exception { + Source source = dataBlock.getSource(); + String mimeType = resolveMimeType(source); + + if (!mimeType.startsWith("image/")) { + throw new IllegalArgumentException( + "Anthropic API only supports image DataBlocks; got MIME type: " + mimeType); + } + + if (source instanceof URLSource urlSource) { + String url = urlSource.getUrl(); + if (MediaUtils.isLocalFile(url)) { + String base64Data = MediaUtils.fileToBase64(url); + return ImageBlockParam.builder() + .source( + Base64ImageSource.builder() + .data(base64Data) + .mediaType(Base64ImageSource.MediaType.of(mimeType)) + .build()) + .build(); + } else { + // mimeType already verified to be image/* above; skip extension check + // so that extension-less CDN URLs with an explicit mimeType hint work + return ImageBlockParam.builder() + .source(UrlImageSource.builder().url(url).build()) + .build(); + } + } else if (source instanceof Base64Source base64Source) { + return ImageBlockParam.builder() + .source( + Base64ImageSource.builder() + .data(base64Source.getData()) + .mediaType(Base64ImageSource.MediaType.of(mimeType)) + .build()) + .build(); + } else { + throw new IllegalArgumentException("Unsupported source type: " + source.getClass()); + } + } + + private String resolveMimeType(Source source) { + if (source instanceof Base64Source b64) { + return b64.getMediaType(); + } + if (source instanceof URLSource urlSource) { + String hint = urlSource.getMimeType(); + if (hint != null && !hint.isBlank()) { + return hint; + } + String inferred = MediaUtils.determineMediaType(urlSource.getUrl()); + if (!"application/octet-stream".equals(inferred)) { + return inferred; + } + throw new IllegalArgumentException( + "Cannot determine MIME type for URL '" + + urlSource.getUrl() + + "'; set URLSource.mimeType explicitly"); + } + throw new IllegalArgumentException("Unsupported source type: " + source.getClass()); + } } diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/main/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMessageConverter.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/main/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMessageConverter.java index f616cbe52..09850c006 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/main/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMessageConverter.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/main/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMessageConverter.java @@ -24,6 +24,7 @@ import com.anthropic.models.messages.ToolResultBlockParam; import com.anthropic.models.messages.ToolUseBlockParam; import io.agentscope.core.message.ContentBlock; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.HintBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.Msg; @@ -155,6 +156,21 @@ private MessageParam convertMessageContent( + "]") .build())); } + } else if (block instanceof DataBlock db) { + try { + ImageBlockParam imageParam = mediaConverter.convertDataBlock(db); + contentBlocks.add(ContentBlockParam.ofImage(imageParam)); + } catch (Exception e) { + log.warn("Failed to process DataBlock: {}", e.getMessage()); + contentBlocks.add( + ContentBlockParam.ofText( + TextBlockParam.builder() + .text( + "[Media - processing failed: " + + e.getMessage() + + "]") + .build())); + } } else if (block instanceof ToolUseBlock tub) { contentBlocks.add( ContentBlockParam.ofToolUse( diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/test/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMediaConverterTest.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/test/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMediaConverterTest.java index 4ca3e04d7..7519b6057 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/test/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMediaConverterTest.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-anthropic/src/test/java/io/agentscope/extensions/model/anthropic/formatter/AnthropicMediaConverterTest.java @@ -24,6 +24,7 @@ import com.anthropic.models.messages.ImageBlockParam; import com.anthropic.models.messages.UrlImageSource; import io.agentscope.core.message.Base64Source; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.Source; import io.agentscope.core.message.URLSource; @@ -173,4 +174,86 @@ void testConvertImageBlockWithGifMediaType() throws Exception { // Custom source type for testing unsupported sources private static class CustomSource extends Source {} + + @Test + void testConvertDataBlockWithBase64Source() throws Exception { + Base64Source source = + Base64Source.builder() + .data("ZmFrZSBpbWFnZSBjb250ZW50") + .mediaType("image/png") + .build(); + DataBlock block = DataBlock.builder().source(source).build(); + + ImageBlockParam result = converter.convertDataBlock(block); + + assertNotNull(result); + assertTrue(result.source().isBase64()); + Base64ImageSource base64Source = result.source().asBase64(); + assertEquals("ZmFrZSBpbWFnZSBjb250ZW50", base64Source.data()); + assertEquals("image/png", base64Source.mediaType().toString()); + } + + @Test + void testConvertDataBlockWithRemoteURLAndExtension() throws Exception { + String remoteUrl = "https://example.com/photo.jpg"; + URLSource source = URLSource.builder().url(remoteUrl).build(); + DataBlock block = DataBlock.builder().source(source).build(); + + ImageBlockParam result = converter.convertDataBlock(block); + + assertNotNull(result); + assertTrue(result.source().isUrl()); + assertEquals(remoteUrl, result.source().asUrl().url()); + } + + @Test + void testConvertDataBlockWithMimeTypeHintExtensionlessUrl() throws Exception { + // Extension-less CDN URL with explicit mimeType hint — the primary use case + String cdnUrl = "https://cdn.example.com/media/abc123"; + URLSource source = URLSource.builder().url(cdnUrl).mimeType("image/png").build(); + DataBlock block = DataBlock.builder().source(source).build(); + + ImageBlockParam result = converter.convertDataBlock(block); + + assertNotNull(result); + assertTrue(result.source().isUrl()); + assertEquals(cdnUrl, result.source().asUrl().url()); + } + + @Test + void testConvertDataBlockWithLocalFile() throws Exception { + URLSource source = URLSource.builder().url(tempImageFile.toString()).build(); + DataBlock block = DataBlock.builder().source(source).build(); + + ImageBlockParam result = converter.convertDataBlock(block); + + assertNotNull(result); + assertTrue(result.source().isBase64()); + byte[] decoded = Base64.getDecoder().decode(result.source().asBase64().data()); + assertEquals("fake image content", new String(decoded)); + } + + @Test + void testConvertDataBlockNonImageMimeTypeThrows() { + // Anthropic only supports image — audio/video DataBlocks must throw + Base64Source source = + Base64Source.builder() + .data("ZmFrZSBhdWRpbyBjb250ZW50") + .mediaType("audio/mp3") + .build(); + DataBlock block = DataBlock.builder().source(source).build(); + + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, () -> converter.convertDataBlock(block)); + assertTrue(ex.getMessage().contains("image")); + } + + @Test + void testConvertDataBlockNoExtensionNoHintThrows() { + URLSource source = URLSource.builder().url("https://cdn.example.com/media/abc123").build(); + DataBlock block = DataBlock.builder().source(source).build(); + + assertThrows(Exception.class, () -> converter.convertDataBlock(block)); + } } diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiConversationMerger.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiConversationMerger.java index 2ed6b3d21..9a8ff8b9c 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiConversationMerger.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiConversationMerger.java @@ -19,6 +19,7 @@ import com.google.genai.types.Part; import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.ContentBlock; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.HintBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.Msg; @@ -135,6 +136,15 @@ public Content mergeToContent( } // Add video as separate Part parts.add(mediaConverter.convertToInlineDataPart(vb)); + + } else if (block instanceof DataBlock db) { + // Flush accumulated text as a Part + if (!accumulatedText.isEmpty()) { + parts.add(Part.builder().text(String.join("\n", accumulatedText)).build()); + accumulatedText.clear(); + } + // Add data block as separate Part + parts.add(mediaConverter.convertToInlineDataPart(db)); } } } diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiMediaConverter.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiMediaConverter.java index 1a527bfcc..abaa922ec 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiMediaConverter.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiMediaConverter.java @@ -17,8 +17,10 @@ import com.google.genai.types.Blob; import com.google.genai.types.Part; +import io.agentscope.core.formatter.MediaUtils; import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.Base64Source; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.Source; import io.agentscope.core.message.URLSource; @@ -88,6 +90,70 @@ public Part convertToInlineDataPart(VideoBlock block) { return convertMediaBlockToInlineDataPart(block.getSource(), "video"); } + /** + * Convert DataBlock to Gemini Part with inline data. + * + *

MIME type resolution order: + *

    + *
  1. {@code Base64Source.mediaType} — always explicit
  2. + *
  3. {@code URLSource.mimeType} — caller-supplied hint for extension-less URLs
  4. + *
  5. Extension-based inference via {@link #getMimeType}
  6. + *
+ * + * @param block DataBlock to convert + * @return Part object containing inline data + */ + public Part convertToInlineDataPart(DataBlock block) { + Source source = block.getSource(); + byte[] data; + String mimeType; + + if (source instanceof Base64Source base64Source) { + data = Base64.getDecoder().decode(base64Source.getData()); + mimeType = base64Source.getMediaType(); + } else if (source instanceof URLSource urlSource) { + String url = urlSource.getUrl(); + try { + data = readFileAsBytes(url); + } catch (IOException e) { + throw new RuntimeException("Failed to read DataBlock file: " + url, e); + } + String hint = urlSource.getMimeType(); + if (hint != null && !hint.isBlank()) { + mimeType = hint; + } else { + mimeType = resolveMimeTypeFromUrl(url); + } + } else { + throw new IllegalArgumentException( + "Unsupported source type: " + source.getClass().getName()); + } + + Blob blob = Blob.builder().data(data).mimeType(mimeType).build(); + return Part.builder().inlineData(blob).build(); + } + + // infer mimeType from URL extension via MediaUtils (handles query strings correctly) + private String resolveMimeTypeFromUrl(String url) { + String ext = MediaUtils.getExtension(url); + if (ext.isEmpty()) { + throw new IllegalArgumentException( + "Cannot determine MIME type for URL '" + + url + + "'; set URLSource.mimeType explicitly"); + } + for (Map.Entry> entry : SUPPORTED_EXTENSIONS.entrySet()) { + if (entry.getValue().contains(ext)) { + String category = entry.getKey(); + return category + "/" + ("jpg".equals(ext) ? "jpeg" : ext); + } + } + throw new IllegalArgumentException( + "Cannot determine MIME type for URL '" + + url + + "'; set URLSource.mimeType explicitly"); + } + /** * Convert a media source to Gemini Part with inline data. * diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiMessageConverter.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiMessageConverter.java index 14ed08ed3..4ca629ed4 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiMessageConverter.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/main/java/io/agentscope/extensions/model/gemini/formatter/GeminiMessageConverter.java @@ -22,6 +22,7 @@ import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.Base64Source; import io.agentscope.core.message.ContentBlock; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.HintBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.Msg; @@ -172,6 +173,9 @@ public List convertMessages(List msgs) { } else if (block instanceof VideoBlock vb) { parts.add(mediaConverter.convertToInlineDataPart(vb)); + } else if (block instanceof DataBlock db) { + parts.add(mediaConverter.convertToInlineDataPart(db)); + } else if (block instanceof HintBlock hb) { parts.add(Part.builder().text(hb.getHint()).build()); @@ -237,6 +241,10 @@ private String convertToolResultToString(List output) { } else if (block instanceof VideoBlock vb) { String reference = convertMediaBlockToTextReference(vb, "video"); textualOutput.add(reference); + + } else if (block instanceof DataBlock db) { + String reference = convertMediaBlockToTextReference(db, "data"); + textualOutput.add(reference); } // Other block types are ignored } @@ -302,6 +310,8 @@ private Source extractSourceFromBlock(ContentBlock block) { return ab.getSource(); } else if (block instanceof VideoBlock vb) { return vb.getSource(); + } else if (block instanceof DataBlock db) { + return db.getSource(); } throw new IllegalArgumentException("Unsupported block type: " + block.getClass()); } diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/test/java/io/agentscope/extensions/model/gemini/formatter/GeminiMediaConverterTest.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/test/java/io/agentscope/extensions/model/gemini/formatter/GeminiMediaConverterTest.java index 10f0ecfc2..716605882 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/test/java/io/agentscope/extensions/model/gemini/formatter/GeminiMediaConverterTest.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-gemini/src/test/java/io/agentscope/extensions/model/gemini/formatter/GeminiMediaConverterTest.java @@ -25,6 +25,7 @@ import com.google.genai.types.Part; import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.Base64Source; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.URLSource; import io.agentscope.core.message.VideoBlock; @@ -166,4 +167,94 @@ void testBase64EncodingDecoding() { assertArrayEquals(originalText.getBytes(), resultData); } + + @Test + void testConvertDataBlockWithBase64Source() { + Base64Source source = + Base64Source.builder() + .data("ZmFrZSBpbWFnZSBjb250ZW50") + .mediaType("image/png") + .build(); + DataBlock block = DataBlock.builder().source(source).build(); + + Part result = converter.convertToInlineDataPart(block); + + assertNotNull(result); + assertTrue(result.inlineData().isPresent()); + Blob blob = result.inlineData().get(); + assertArrayEquals("fake image content".getBytes(), blob.data().get()); + assertEquals("image/png", blob.mimeType().get()); + } + + @Test + void testConvertDataBlockWithURLSourceAndExtension() { + URLSource source = URLSource.builder().url(tempImageFile.toString()).build(); + DataBlock block = DataBlock.builder().source(source).build(); + + Part result = converter.convertToInlineDataPart(block); + + assertNotNull(result); + assertTrue(result.inlineData().isPresent()); + assertEquals("image/png", result.inlineData().get().mimeType().get()); + } + + @Test + void testConvertDataBlockWithURLSourceMimeTypeHint() { + // Extension-less URL with explicit mimeType hint + URLSource source = + URLSource.builder() + .url(tempImageFile.toString().replaceAll("\\.png$", "")) + .mimeType("image/png") + .build(); + // Rename the temp file to have no extension would be complex; instead use a URL-style path + // Just verify the hint takes precedence by using a URL with a different extension + URLSource sourceWithHint = + URLSource.builder().url(tempAudioFile.toString()).mimeType("image/png").build(); + DataBlock block = DataBlock.builder().source(sourceWithHint).build(); + + Part result = converter.convertToInlineDataPart(block); + + assertNotNull(result); + // mimeType hint overrides extension-based inference + assertEquals("image/png", result.inlineData().get().mimeType().get()); + } + + @Test + void testConvertDataBlockWithURLSourceNoExtensionNoHintThrows() { + URLSource source = URLSource.builder().url("https://cdn.example.com/media/abc123").build(); + DataBlock block = DataBlock.builder().source(source).build(); + + // Remote URL with no extension and no hint — should throw + assertThrows(RuntimeException.class, () -> converter.convertToInlineDataPart(block)); + } + + @Test + void testConvertDataBlockVideoBase64() { + Base64Source source = + Base64Source.builder() + .data("ZmFrZSB2aWRlbyBjb250ZW50") + .mediaType("video/mp4") + .build(); + DataBlock block = DataBlock.builder().source(source).build(); + + Part result = converter.convertToInlineDataPart(block); + + assertNotNull(result); + assertEquals("video/mp4", result.inlineData().get().mimeType().get()); + } + + @Test + void testConvertDataBlockAudioBase64() { + Base64Source source = + Base64Source.builder() + .data("ZmFrZSBhdWRpbyBjb250ZW50") + .mediaType("audio/mp3") + .build(); + DataBlock block = DataBlock.builder().source(source).build(); + + Part result = converter.convertToInlineDataPart(block); + + assertNotNull(result); + assertEquals("audio/mp3", result.inlineData().get().mimeType().get()); + } } diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-openai/src/main/java/io/agentscope/extensions/model/openai/formatter/OpenAIConverterUtils.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-openai/src/main/java/io/agentscope/extensions/model/openai/formatter/OpenAIConverterUtils.java index 580efda2f..c914747aa 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-openai/src/main/java/io/agentscope/extensions/model/openai/formatter/OpenAIConverterUtils.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-openai/src/main/java/io/agentscope/extensions/model/openai/formatter/OpenAIConverterUtils.java @@ -15,6 +15,7 @@ */ package io.agentscope.extensions.model.openai.formatter; +import io.agentscope.core.formatter.MediaUtils; import io.agentscope.core.message.Base64Source; import io.agentscope.core.message.Source; import io.agentscope.core.message.URLSource; @@ -112,4 +113,39 @@ public static String convertVideoSourceToUrl(Source source) { throw new IllegalArgumentException("Unknown source type: " + source.getClass()); } } + + /** + * Resolve the MIME type from a Source. + * + *

Resolution order: + *

    + *
  1. {@code Base64Source.mediaType} — always explicit
  2. + *
  3. {@code URLSource.mimeType} — caller-supplied hint for extension-less URLs
  4. + *
  5. {@code MediaUtils.determineMediaType(url)} — extension-based inference
  6. + *
+ * + * @param source The source to resolve MIME type from + * @return MIME type string (e.g. "image/jpeg") + * @throws IllegalArgumentException if the type cannot be determined or source type is unknown + */ + public static String resolveMimeType(Source source) { + if (source instanceof Base64Source b64) { + return b64.getMediaType(); + } + if (source instanceof URLSource urlSource) { + String hint = urlSource.getMimeType(); + if (hint != null && !hint.isBlank()) { + return hint; + } + String inferred = MediaUtils.determineMediaType(urlSource.getUrl()); + if (!"application/octet-stream".equals(inferred)) { + return inferred; + } + throw new IllegalArgumentException( + "Cannot determine MIME type for URL '" + + urlSource.getUrl() + + "'; set URLSource.mimeType explicitly"); + } + throw new IllegalArgumentException("Unsupported source type: " + source.getClass()); + } } diff --git a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-openai/src/main/java/io/agentscope/extensions/model/openai/formatter/OpenAIMessageConverter.java b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-openai/src/main/java/io/agentscope/extensions/model/openai/formatter/OpenAIMessageConverter.java index 01c4c72ed..d22855f72 100644 --- a/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-openai/src/main/java/io/agentscope/extensions/model/openai/formatter/OpenAIMessageConverter.java +++ b/agentscope-extensions/agentscope-extensions-model/agentscope-extensions-model-openai/src/main/java/io/agentscope/extensions/model/openai/formatter/OpenAIMessageConverter.java @@ -18,6 +18,7 @@ import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.Base64Source; import io.agentscope.core.message.ContentBlock; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.HintBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.MessageMetadataKeys; @@ -249,6 +250,46 @@ private List convertContentBlocks(List blocks) OpenAIContentPart.text( "[Video - processing failed: " + errorMsg + "]")); } + } else if (block instanceof DataBlock db) { + try { + Source source = db.getSource(); + if (source == null) { + log.warn("DataBlock has null source, skipping"); + continue; + } + String mimeType = OpenAIConverterUtils.resolveMimeType(source); + if (mimeType.startsWith("image/")) { + contentParts.add( + OpenAIContentPart.imageUrl(convertImageSourceToUrl(source))); + } else if (mimeType.startsWith("video/")) { + contentParts.add( + OpenAIContentPart.videoUrl(convertVideoSourceToUrl(source))); + } else if (mimeType.startsWith("audio/")) { + if (source instanceof Base64Source b64) { + String format = detectAudioFormat(b64.getMediaType()); + contentParts.add(OpenAIContentPart.inputAudio(b64.getData(), format)); + } else { + log.warn( + "URL-based audio DataBlock not supported by OpenAI input_audio;" + + " using text reference"); + contentParts.add( + OpenAIContentPart.text( + "[Audio URL: " + ((URLSource) source).getUrl() + "]")); + } + } else { + log.warn("DataBlock has unroutable MIME type '{}', skipping", mimeType); + contentParts.add( + OpenAIContentPart.text( + "[Media - unsupported MIME type: " + mimeType + "]")); + } + } catch (Exception e) { + String errorMsg = + e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); + log.warn("Failed to process DataBlock: {}", errorMsg); + contentParts.add( + OpenAIContentPart.text( + "[Media - processing failed: " + errorMsg + "]")); + } } else if (block instanceof ToolUseBlock) { log.warn("ToolUseBlock is not supported in user messages"); } else if (block instanceof ToolResultBlock) { @@ -432,7 +473,8 @@ private boolean hasMediaContent(List blocks) { for (ContentBlock block : blocks) { if (block instanceof ImageBlock || block instanceof AudioBlock - || block instanceof VideoBlock) { + || block instanceof VideoBlock + || block instanceof DataBlock) { return true; } }