Skip to content

[Inference] Add image-to-image support for Replicate #1564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/inference/src/lib/getProviderHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
"text-to-image": new Replicate.ReplicateTextToImageTask(),
"text-to-speech": new Replicate.ReplicateTextToSpeechTask(),
"text-to-video": new Replicate.ReplicateTextToVideoTask(),
"image-to-image": new Replicate.ReplicateImageToImageTask(),
},
sambanova: {
conversational: new Sambanova.SambanovaConversationalTask(),
Expand Down
65 changes: 63 additions & 2 deletions packages/inference/src/providers/replicate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@
*/
import { InferenceClientProviderOutputError } from "../errors.js";
import { isUrl } from "../lib/isUrl.js";
import type { BodyParams, HeaderParams, UrlParams } from "../types.js";
import type { BodyParams, HeaderParams, RequestArgs, UrlParams } from "../types.js";
import { omit } from "../utils/omit.js";
import { TaskProviderHelper, type TextToImageTaskHelper, type TextToVideoTaskHelper } from "./providerHelper.js";
import {
TaskProviderHelper,
type ImageToImageTaskHelper,
type TextToImageTaskHelper,
type TextToVideoTaskHelper,
} from "./providerHelper.js";
import type { ImageToImageArgs } from "../tasks/cv/imageToImage.js";
import { base64FromBytes } from "../utils/base64FromBytes.js";
export interface ReplicateOutput {
output?: string | string[];
}
Expand Down Expand Up @@ -152,3 +159,57 @@ export class ReplicateTextToVideoTask extends ReplicateTask implements TextToVid
throw new InferenceClientProviderOutputError("Received malformed response from Replicate text-to-video API");
}
}

export class ReplicateImageToImageTask extends ReplicateTask implements ImageToImageTaskHelper {
override preparePayload(params: BodyParams<ImageToImageArgs>): Record<string, unknown> {
return {
input: {
...omit(params.args, ["inputs", "parameters"]),
...params.args.parameters,
input_image: params.args.inputs, // This will be processed in preparePayloadAsync
},
version: params.model.includes(":") ? params.model.split(":")[1] : undefined,
};
}

async preparePayloadAsync(args: ImageToImageArgs): Promise<RequestArgs> {
const { inputs, ...restArgs } = args;

// Convert Blob to base64 data URL
const bytes = new Uint8Array(await inputs.arrayBuffer());
const base64 = base64FromBytes(bytes);
const imageInput = `data:${inputs.type || "image/jpeg"};base64,${base64}`;

return {
...restArgs,
inputs: imageInput,
};
}

override async getResponse(response: ReplicateOutput): Promise<Blob> {
if (
typeof response === "object" &&
!!response &&
"output" in response &&
Array.isArray(response.output) &&
response.output.length > 0 &&
typeof response.output[0] === "string"
) {
const urlResponse = await fetch(response.output[0]);
return await urlResponse.blob();
}

if (
typeof response === "object" &&
!!response &&
"output" in response &&
typeof response.output === "string" &&
isUrl(response.output)
) {
const urlResponse = await fetch(response.output);
return await urlResponse.blob();
}

throw new InferenceClientProviderOutputError("Received malformed response from Replicate image-to-image API");
}
}
12 changes: 12 additions & 0 deletions packages/inference/test/InferenceClient.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,18 @@ describe.skip("InferenceClient", () => {

expect(res).toBeInstanceOf(Blob);
});

it("imageToImage - FLUX Kontext Dev", async () => {
const res = await client.imageToImage({
model: "black-forest-labs/flux-kontext-dev",
provider: "replicate",
inputs: new Blob([readTestFile("stormtrooper_depth.png")], { type: "image/png" }),
parameters: {
prompt: "Change the stormtrooper armor to golden color while keeping the same pose and helmet design",
},
});
expect(res).toBeInstanceOf(Blob);
});
},
TIMEOUT
);
Expand Down