Skip to content

Commit c3c6313

Browse files
authored
Added "system_prompt" input to Gemini nodes (Comfy-Org#11177)
1 parent 85c4b4a commit c3c6313

File tree

2 files changed

+51
-11
lines changed

2 files changed

+51
-11
lines changed

comfy_api_nodes/apis/gemini_api.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,7 @@ class GeminiSystemInstructionContent(BaseModel):
8484
description="A list of ordered parts that make up a single message. "
8585
"Different parts may have different IANA MIME types.",
8686
)
87-
role: GeminiRole = Field(
88-
...,
89-
description="The identity of the entity that creates the message. "
90-
"The following values are supported: "
91-
"user: This indicates that the message is sent by a real person, typically a user-generated message. "
92-
"model: This indicates that the message is generated by the model. "
93-
"The model value is used to insert messages from model into the conversation during multi-turn conversations. "
94-
"For non-multi-turn conversations, this field can be left blank or unset.",
95-
)
87+
role: GeminiRole | None = Field(..., description="The role field of systemInstruction may be ignored.")
9688

9789

9890
class GeminiFunctionDeclaration(BaseModel):

comfy_api_nodes/nodes_gemini.py

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
GeminiMimeType,
2727
GeminiPart,
2828
GeminiRole,
29+
GeminiSystemInstructionContent,
30+
GeminiTextPart,
2931
Modality,
3032
)
3133
from comfy_api_nodes.util import (
@@ -42,6 +44,14 @@
4244

4345
GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
4446
GEMINI_MAX_INPUT_FILE_SIZE = 20 * 1024 * 1024 # 20 MB
47+
GEMINI_IMAGE_SYS_PROMPT = (
48+
"You are an expert image-generation engine. You must ALWAYS produce an image.\n"
49+
"Interpret all user input—regardless of "
50+
"format, intent, or abstraction—as literal visual directives for image composition.\n"
51+
"If a prompt is conversational or lacks specific visual details, "
52+
"you must creatively invent a concrete visual scenario that depicts the concept.\n"
53+
"Prioritize generating the visual representation above any text, formatting, or conversational requests."
54+
)
4555

4656

4757
class GeminiModel(str, Enum):
@@ -276,6 +286,13 @@ def define_schema(cls):
276286
tooltip="Optional file(s) to use as context for the model. "
277287
"Accepts inputs from the Gemini Generate Content Input Files node.",
278288
),
289+
IO.String.Input(
290+
"system_prompt",
291+
multiline=True,
292+
default="",
293+
optional=True,
294+
tooltip="Foundational instructions that dictate an AI's behavior.",
295+
),
279296
],
280297
outputs=[
281298
IO.String.Output(),
@@ -348,6 +365,7 @@ async def execute(
348365
audio: Input.Audio | None = None,
349366
video: Input.Video | None = None,
350367
files: list[GeminiPart] | None = None,
368+
system_prompt: str = "",
351369
) -> IO.NodeOutput:
352370
validate_string(prompt, strip_whitespace=False)
353371

@@ -364,7 +382,10 @@ async def execute(
364382
if files is not None:
365383
parts.extend(files)
366384

367-
# Create response
385+
gemini_system_prompt = None
386+
if system_prompt:
387+
gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None)
388+
368389
response = await sync_op(
369390
cls,
370391
endpoint=ApiEndpoint(path=f"{GEMINI_BASE_ENDPOINT}/{model}", method="POST"),
@@ -374,7 +395,8 @@ async def execute(
374395
role=GeminiRole.user,
375396
parts=parts,
376397
)
377-
]
398+
],
399+
systemInstruction=gemini_system_prompt,
378400
),
379401
response_model=GeminiGenerateContentResponse,
380402
price_extractor=calculate_tokens_price,
@@ -524,6 +546,13 @@ def define_schema(cls):
524546
"'IMAGE+TEXT' to return both the generated image and a text response.",
525547
optional=True,
526548
),
549+
IO.String.Input(
550+
"system_prompt",
551+
multiline=True,
552+
default=GEMINI_IMAGE_SYS_PROMPT,
553+
optional=True,
554+
tooltip="Foundational instructions that dictate an AI's behavior.",
555+
),
527556
],
528557
outputs=[
529558
IO.Image.Output(),
@@ -547,6 +576,7 @@ async def execute(
547576
files: list[GeminiPart] | None = None,
548577
aspect_ratio: str = "auto",
549578
response_modalities: str = "IMAGE+TEXT",
579+
system_prompt: str = "",
550580
) -> IO.NodeOutput:
551581
validate_string(prompt, strip_whitespace=True, min_length=1)
552582
parts: list[GeminiPart] = [GeminiPart(text=prompt)]
@@ -560,6 +590,10 @@ async def execute(
560590
if files is not None:
561591
parts.extend(files)
562592

593+
gemini_system_prompt = None
594+
if system_prompt:
595+
gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None)
596+
563597
response = await sync_op(
564598
cls,
565599
endpoint=ApiEndpoint(path=f"{GEMINI_BASE_ENDPOINT}/{model}", method="POST"),
@@ -571,6 +605,7 @@ async def execute(
571605
responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
572606
imageConfig=None if aspect_ratio == "auto" else image_config,
573607
),
608+
systemInstruction=gemini_system_prompt,
574609
),
575610
response_model=GeminiGenerateContentResponse,
576611
price_extractor=calculate_tokens_price,
@@ -641,6 +676,13 @@ def define_schema(cls):
641676
tooltip="Optional file(s) to use as context for the model. "
642677
"Accepts inputs from the Gemini Generate Content Input Files node.",
643678
),
679+
IO.String.Input(
680+
"system_prompt",
681+
multiline=True,
682+
default=GEMINI_IMAGE_SYS_PROMPT,
683+
optional=True,
684+
tooltip="Foundational instructions that dictate an AI's behavior.",
685+
),
644686
],
645687
outputs=[
646688
IO.Image.Output(),
@@ -665,6 +707,7 @@ async def execute(
665707
response_modalities: str,
666708
images: Input.Image | None = None,
667709
files: list[GeminiPart] | None = None,
710+
system_prompt: str = "",
668711
) -> IO.NodeOutput:
669712
validate_string(prompt, strip_whitespace=True, min_length=1)
670713

@@ -680,6 +723,10 @@ async def execute(
680723
if aspect_ratio != "auto":
681724
image_config.aspectRatio = aspect_ratio
682725

726+
gemini_system_prompt = None
727+
if system_prompt:
728+
gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None)
729+
683730
response = await sync_op(
684731
cls,
685732
ApiEndpoint(path=f"{GEMINI_BASE_ENDPOINT}/{model}", method="POST"),
@@ -691,6 +738,7 @@ async def execute(
691738
responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
692739
imageConfig=image_config,
693740
),
741+
systemInstruction=gemini_system_prompt,
694742
),
695743
response_model=GeminiGenerateContentResponse,
696744
price_extractor=calculate_tokens_price,

0 commit comments

Comments
 (0)