TJC-LP · arcaputo3 · Mar 9, 2026 · Feb 28, 2026 · Feb 28, 2026 · Mar 8, 2026
diff --git a/.mcp.json b/.mcp.json
@@ -5,7 +5,11 @@
       "args": ["run", "sanzaru"],
       "env": {
         "OPENAI_API_KEY": "${OPENAI_API_KEY}",
-        "SANZARU_MEDIA_PATH": "${SANZARU_MEDIA_PATH}"
+        "SANZARU_MEDIA_PATH": "${SANZARU_MEDIA_PATH}",
+        "GOOGLE_API_KEY": "${GOOGLE_API_KEY}",
+        "GOOGLE_GENAI_USE_VERTEXAI": "${GOOGLE_GENAI_USE_VERTEXAI}",
+        "GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}",
+        "GOOGLE_CLOUD_LOCATION": "${GOOGLE_CLOUD_LOCATION}"
       }
     }
   }

diff --git a/pyproject.toml b/pyproject.toml
@@ -72,9 +72,13 @@ audio = [
 image = [
   "pillow>=12.0.0",
 ]
+google = [
+  "google-genai>=1.0.0",
+  "pillow>=12.0.0",
+]
 databricks = []  # httpx already a core dep; extra exists for signaling intent
 all = [
-  "sanzaru[video,audio,image]",  # databricks intentionally excluded from "all"
+  "sanzaru[video,audio,image,google]",  # databricks intentionally excluded from "all"
 ]
 
 [dependency-groups]

diff --git a/src/sanzaru/config.py b/src/sanzaru/config.py
@@ -8,15 +8,20 @@
 - Logging setup
 """
 
+from __future__ import annotations
+
 import logging
 import os
 import pathlib
 import sys
 from functools import lru_cache
-from typing import Literal
+from typing import TYPE_CHECKING, Literal
 
 from openai import AsyncOpenAI
 
+if TYPE_CHECKING:
+    from google import genai
+
 # ---------- Logging configuration ----------
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 logging.basicConfig(
@@ -43,6 +48,69 @@ def get_client() -> AsyncOpenAI:
     return AsyncOpenAI(api_key=api_key)
 
 
+# ---------- Google Gen AI client (stateless) ----------
+def get_google_client() -> genai.Client:
+    """Get a Google Gen AI client instance.
+
+    Supports Vertex AI (ADC or Express mode) and Gemini Developer API via env var auto-detection.
+
+    Auth is fully driven by environment variables — no explicit credential loading required.
+
+    Vertex AI (GOOGLE_GENAI_USE_VERTEXAI=True):
+      Standard mode (ADC) — for teams using service accounts, gcloud, or attached SA:
+        GOOGLE_CLOUD_PROJECT=<project>      (required)
+        GOOGLE_CLOUD_LOCATION=<region>      (optional, default: us-central1)
+        GOOGLE_APPLICATION_CREDENTIALS=...  (optional — SA key file, WIF config, etc.)
+
+      Express mode — simplified access for paid-tier projects via API key:
+        GOOGLE_API_KEY=<google-cloud-api-key>
+        GOOGLE_CLOUD_PROJECT=<project>      (optional, but recommended)
+        GOOGLE_CLOUD_LOCATION=<region>      (optional, default: us-central1)
+
+    Gemini Developer API (no GOOGLE_GENAI_USE_VERTEXAI):
+        GOOGLE_API_KEY=<gemini-api-key>
+
+    Returns:
+        Configured Google Gen AI Client
+
+    Raises:
+        ImportError: If google-genai package is not installed
+        RuntimeError: If required environment variables are not set
+    """
+    try:
+        from google import genai
+    except ImportError as e:
+        raise ImportError("google-genai package is required. Install with: uv add 'sanzaru[google]'") from e
+
+    use_vertex = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "").lower() in ("true", "1")
+
+    if use_vertex:
+        project = os.getenv("GOOGLE_CLOUD_PROJECT")
+        location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
+        api_key = os.getenv("GOOGLE_API_KEY")
+
+        if not project and not api_key:
+            raise RuntimeError(
+                "Vertex AI requires GOOGLE_CLOUD_PROJECT (ADC/service-account auth) "
+                "or GOOGLE_API_KEY (Express mode) when GOOGLE_GENAI_USE_VERTEXAI=True"
+            )
+
+        # API key and project/location are mutually exclusive in the SDK.
+        # Express mode: api_key only. Standard mode: project + location + ADC.
+        if api_key:
+            return genai.Client(vertexai=True, api_key=api_key)
+        return genai.Client(vertexai=True, project=project, location=location)
+
+    api_key = os.getenv("GOOGLE_API_KEY")
+    if not api_key:
+        raise RuntimeError(
+            "Google credentials not configured. "
+            "Set GOOGLE_GENAI_USE_VERTEXAI=True + GOOGLE_CLOUD_PROJECT (Vertex AI) "
+            "or GOOGLE_API_KEY (Gemini Developer API)"
+        )
+    return genai.Client(api_key=api_key)
+
+
 # ---------- Path configuration (runtime) ----------
 
 # Mapping from path_type to (individual env var, subdirectory under SANZARU_MEDIA_PATH)

diff --git a/src/sanzaru/descriptions.py b/src/sanzaru/descriptions.py
@@ -185,98 +185,100 @@
 
 # ==================== IMAGE GENERATION TOOL DESCRIPTIONS ====================
 
-CREATE_IMAGE = """Non-blocking async image generation with gpt-image-1.5 support.
+CREATE_IMAGE = """Create an async image generation job via OpenAI Responses API.
 
-Creates images from text prompts OR edits existing images by providing reference images.
-Returns immediately with a response_id - use get_image_status() to poll for completion.
-Supports iterative refinement via previous_response_id.
+Returns immediately with a response_id. Poll with get_image_status() until completed, then download_image().
+Best for: parallel generation (multiple images at once) and iterative refinement chains (previous_response_id).
 
-**Best for:** parallel generation (multiple images at once), iterative refinement chains,
-and workflows where you need to do other work while images generate.
-For simple one-shot generation, generate_image is simpler (no polling needed).
-
-**Text-only generation (no input_images):**
-- Generates image from scratch based on prompt
-
-**Image editing (with input_images):**
-- Modifies existing images based on prompt
-- Combines multiple images into new composition
-- First image receives highest detail preservation
-- Prompt describes desired changes, not what's already in images
+For synchronous one-shot generation (no polling), use generate_image instead.
+For Google Nano Banana generation, use create_image_google.
 
 Parameters:
-- prompt: Text description (required)
-  * Without input_images: Describe what to generate
-  * With input_images: Describe what changes to make
-- model: Mainline model - "gpt-5.2" (default), "gpt-5.1", "gpt-5", etc.
-- tool_config: Optional ImageGeneration configuration object (optional)
-  * Supports all fields: model, size, quality, moderation, input_fidelity, etc.
-  * MCP library handles serialization automatically
-  * See examples below for common configurations
-- previous_response_id: Refine previous image iteratively (optional)
-- input_images: List of filenames from IMAGE_PATH (optional)
-  * Example: ["cat.png"] or ["lotion.jpg", "soap.png", "bomb.jpg"]
-  * Use list_reference_images() to discover available images
-  * Supported formats: JPEG, PNG, WEBP
-- mask_filename: PNG with alpha channel for inpainting (optional)
-  * Defines which region of first input image to edit
-  * Transparent = edit this area, black = keep original
-  * Requires input_images parameter
-
-**Image generation models (tool_config.model):**
-- gpt-image-1.5: STATE-OF-THE-ART (RECOMMENDED) - Best quality, better instruction following, improved text rendering
-- gpt-image-1: High quality image generation
-- gpt-image-1-mini: Fast, cost-effective generation
-
-Common tool_config examples:
-
-Best quality with GPT Image 1.5:
-  tool_config={"type": "image_generation", "model": "gpt-image-1.5"}
-
-Fast generation with mini model:
-  tool_config={"type": "image_generation", "model": "gpt-image-1-mini"}
-
-Lower content moderation:
-  tool_config={"type": "image_generation", "moderation": "low"}
-
-High-fidelity with custom settings:
-  tool_config={
-      "type": "image_generation",
-      "model": "gpt-image-1.5",
-      "quality": "high",
-      "input_fidelity": "high",
-      "size": "1536x1024"
-  }
+- prompt: Text description of image to generate (required)
+- model: OpenAI model ID (default: "gpt-5.2")
+- tool_config: ImageGeneration config object to control the image generation tool:
+  * gpt-image-1.5: STATE-OF-THE-ART (recommended)
+  * gpt-image-1: High quality
+  * gpt-image-1-mini: Fast, cost-effective
+- previous_response_id: Refine a previous generation iteratively (optional)
+- input_images: List of reference image filenames from IMAGE_PATH (optional)
+- mask_filename: PNG with alpha channel for inpainting (optional, requires input_images)
+
+Returns ImageResponse with {id, status, created_at} — poll then download.
 
 Workflows:
 
-1. Text-only generation (recommended):
+1. Text-only generation:
    create_image("sunset over mountains", tool_config={"type": "image_generation", "model": "gpt-image-1.5"})
 
-2. Single image editing:
+2. Image editing:
    create_image("add a flamingo to the pool", input_images=["lounge.png"])
 
 3. Multi-image composition:
-   create_image("gift basket with all these items", input_images=["lotion.png", "soap.png", "bomb.jpg"])
+   create_image("gift basket with all items", input_images=["lotion.png", "soap.png"])
 
-4. High-fidelity logo placement:
-   create_image(
-       "add logo to woman's shirt",
-       input_images=["woman.jpg", "logo.png"],
-       tool_config={"type": "image_generation", "input_fidelity": "high"}
-   )
-
-5. Masked inpainting:
+4. Masked inpainting:
    create_image("add flamingo", input_images=["pool.png"], mask_filename="pool_mask.png")
 
-6. Fast generation with mini model:
-   create_image("quick sketch of a cat", tool_config={"type": "image_generation", "model": "gpt-image-1-mini"})
-
-7. Iterative refinement:
+5. Iterative refinement:
    resp1 = create_image("a cyberpunk character")
    resp2 = create_image("add neon details", previous_response_id=resp1.id)
 
-Returns ImageResponse with: id, status, created_at"""
+tool_config examples:
+Best quality: {"type": "image_generation", "model": "gpt-image-1.5"}
+Fast: {"type": "image_generation", "model": "gpt-image-1-mini"}
+High-fidelity: {"type": "image_generation", "model": "gpt-image-1.5", "quality": "high", "size": "1536x1024"}"""
+
+CREATE_IMAGE_GOOGLE = """Generate an image using Google Nano Banana (Gemini image models). Synchronous — image ready immediately.
+
+No polling required. Returns the saved filename, dimensions, and format directly.
+Supports reference images for editing, style transfer, and multi-image composition (up to 14 images).
+
+Models:
+- "gemini-3.1-flash-image-preview": Nano Banana 2 (DEFAULT, RECOMMENDED) — Flash speed + Pro quality, thinking-enhanced
+- "gemini-3-pro-image-preview": Nano Banana Pro — max quality, complex instructions, slowest
+- "gemini-2.5-flash-image": Nano Banana — fastest, high-volume generation
+
+Parameters:
+- prompt: Text description (required). When using input_images, describe only the desired edits/transformation.
+- model: Google model ID (default: "gemini-3.1-flash-image-preview")
+- aspect_ratio: "1:1" (default), "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "21:9", "5:4", "4:5"
+- image_size: Output resolution: "1K" (default), "2K", "4K"
+- filename: Custom output filename (auto-generated if omitted)
+- input_images: List of reference image filenames from IMAGE_PATH (optional, max 14).
+  Supported formats: JPEG, PNG, WEBP. Use list_reference_images to find available images.
+- safety_settings: List of {"category", "threshold"} dicts. All OFF by default.
+  Categories: HARM_CATEGORY_HATE_SPEECH, HARM_CATEGORY_DANGEROUS_CONTENT,
+  HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_HARASSMENT
+  Thresholds: "OFF" (default), "BLOCK_LOW_AND_ABOVE", "BLOCK_MEDIUM_AND_ABOVE", "BLOCK_HIGH_AND_ABOVE"
+
+Returns ImageDownloadResult with {filename, size, format} — ready immediately.
+
+Workflows:
+
+1. Text-only generation:
+   create_image_google("a futuristic cityscape at dusk")
+
+2. Landscape with high resolution:
+   create_image_google("mountain vista at golden hour", aspect_ratio="16:9", image_size="4K")
+
+3. Max quality (Nano Banana Pro):
+   create_image_google("detailed product render", model="gemini-3-pro-image-preview")
+
+4. Image editing with reference:
+   create_image_google("make this watercolor style", input_images=["photo.png"])
+
+5. Multi-image composition:
+   create_image_google("combine these into a collage", input_images=["img1.png", "img2.png", "img3.png"])
+
+6. Character consistency (same character, new scene):
+   create_image_google("place this character in a forest", input_images=["character.png"])
+
+7. Style transfer from reference:
+   create_image_google("apply this art style to a cityscape", input_images=["style_ref.png"])
+
+8. Custom filename:
+   create_image_google("a cute robot", filename="robot_concept.png")"""
 
 GET_IMAGE_STATUS = """Check status and progress of image generation.
 

diff --git a/src/sanzaru/features.py b/src/sanzaru/features.py
@@ -120,6 +120,49 @@ def check_databricks_storage() -> bool:
     return True
 
 
+def check_google_available() -> bool:
+    """Check if Google Nano Banana image generation is available.
+
+    Requires:
+    1. google-genai package installed
+    2. Either:
+       - GOOGLE_GENAI_USE_VERTEXAI=True + GOOGLE_CLOUD_PROJECT (Vertex AI, recommended for teams)
+       - GOOGLE_API_KEY (Gemini Developer API)
+
+    Returns:
+        True if google-genai is installed and credentials are configured, False otherwise
+    """
+    try:
+        import google.genai  # noqa: F401
+    except ImportError:
+        return False
+
+    use_vertex = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "").lower() in ("true", "1")
+    if use_vertex:
+        project = os.getenv("GOOGLE_CLOUD_PROJECT")
+        api_key = os.getenv("GOOGLE_API_KEY")
+        if project and api_key:
+            logger.info(
+                "Google Nano Banana available via Vertex AI Express (api_key takes precedence, project=%s ignored)",
+                project,
+            )
+            return True
+        if project:
+            logger.info("Google Nano Banana available via Vertex AI ADC (project=%s)", project)
+            return True
+        if api_key:
+            logger.info("Google Nano Banana available via Vertex AI Express (api_key only)")
+            return True
+        logger.info("GOOGLE_GENAI_USE_VERTEXAI=True but neither GOOGLE_CLOUD_PROJECT nor GOOGLE_API_KEY set")
+        return False
+
+    if os.getenv("GOOGLE_API_KEY"):
+        logger.info("Google Nano Banana available via Gemini Developer API")
+        return True
+
+    return False
+
+
 def get_available_features() -> dict[str, bool]:
     """Get a dictionary of available features.
 
@@ -130,4 +173,5 @@ def get_available_features() -> dict[str, bool]:
         "video": check_video_available(),
         "audio": check_audio_available(),
         "image": check_image_available(),
+        "google": check_google_available(),
     }