11cafe · Weixuanf · Jul 10, 2025 · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025
diff --git a/packages/jaaz_ui/package.json b/packages/jaaz_ui/package.json
diff --git a/react/src/api/upload.ts b/react/src/api/upload.ts
@@ -1,4 +1,6 @@
-import { compressImageFile } from '@/utils/imageUtils'
+import { compressImageFile, fileToBase64 } from '@/utils/imageUtils'
+import { BASE_API_URL } from '../constants'
+import { authenticatedFetch } from './auth'
 
 export async function uploadImage(
   file: File
@@ -14,3 +16,55 @@ export async function uploadImage(
   })
   return await response.json()
 }
+
+/**
+ * Upload image to Jaaz server
+ * @param file - Image file to upload
+ * @returns Promise with the uploaded image URL
+ */
+export async function uploadImageToJaaz(file: File): Promise<string> {
+  try {
+    // Compress image before upload
+    const compressedFile = await compressImageFile(file)
+
+    // Convert file to base64
+    const base64Data = await fileToBase64(compressedFile)
+
+    // Prepare request body
+    const requestBody = {
+      base64Data: base64Data.split(',')[1], // Remove data:image/jpeg;base64, prefix
+      fileName: compressedFile.name,
+      contentType: compressedFile.type,
+    }
+
+    // Make authenticated request to Jaaz cloud API
+    const response = await authenticatedFetch(
+      `${BASE_API_URL}/api/v1/image/upload`,
+      {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify(requestBody),
+      }
+    )
+
+    if (!response.ok) {
+      const errorData = await response.json().catch(() => ({}))
+      throw new Error(
+        errorData.error || `Upload failed with status ${response.status}`
+      )
+    }
+
+    const result = await response.json()
+
+    if (!result.success || !result.data?.s3Url) {
+      throw new Error(result.error || 'Upload failed - no URL returned')
+    }
+
+    return result.data.s3Url
+  } catch (error) {
+    console.error('Failed to upload image to Jaaz:', error)
+    throw error
+  }
+}
diff --git a/react/src/components/chat/ChatTextarea.tsx b/react/src/components/chat/ChatTextarea.tsx
@@ -1,6 +1,6 @@
 import { cancelChat } from '@/api/chat'
 import { cancelMagicGenerate } from '@/api/magic'
-import { uploadImage } from '@/api/upload'
+import { uploadImage, uploadImageToJaaz } from '@/api/upload'
 import { Button } from '@/components/ui/button'
 import { useConfigs } from '@/contexts/configs'
 import {
@@ -20,7 +20,6 @@ import Textarea, { TextAreaRef } from 'rc-textarea'
 import { useCallback, useEffect, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { toast } from 'sonner'
-import ModelSelector from './ModelSelector'
 import ModelSelectorV2 from './ModelSelectorV2'
 import { useAuth } from '@/contexts/AuthContext'
 
@@ -54,33 +53,67 @@ const ChatTextarea: React.FC<ChatTextareaProps> = ({
   const textareaRef = useRef<TextAreaRef>(null)
   const [images, setImages] = useState<
     {
-      file_id: string
+      file_id?: string
       width: number
       height: number
+      url?: string // S3 URL if uploaded to Jaaz
+    }[]
+  >([])
+  const [uploadingImages, setUploadingImages] = useState<
+    {
+      id: string
+      file: File
+      previewUrl: string
     }[]
   >([])
   const [isFocused, setIsFocused] = useState(false)
 
   const imageInputRef = useRef<HTMLInputElement>(null)
 
+  // New mutation that handles both local and Jaaz uploads based on login status
   const { mutate: uploadImageMutation } = useMutation({
-    mutationFn: (file: File) => uploadImage(file),
-    onSuccess: (data) => {
+    mutationFn: async (file: File) => {
+      // Upload to local server
+      const result = await uploadImage(file)
+      return { ...result, url: undefined, uploadId: file.name + Date.now() }
+    },
+    onMutate: (file: File) => {
+      // Add to uploading images immediately
+      const uploadId = file.name + Date.now()
+      const previewUrl = URL.createObjectURL(file)
+      setUploadingImages((prev) => [
+        ...prev,
+        { id: uploadId, file, previewUrl },
+      ])
+      return { uploadId }
+    },
+    onSuccess: (data, file, context) => {
       console.log('🦄uploadImageMutation onSuccess', data)
+      // Remove from uploading images
+      setUploadingImages((prev) =>
+        prev.filter((img) => img.id !== context?.uploadId)
+      )
+
+      // Add to completed images
       setImages((prev) => [
         ...prev,
         {
           file_id: data.file_id,
           width: data.width,
           height: data.height,
+          url: data.url,
         },
       ])
     },
-    onError: (error) => {
+    onError: (error, file, context) => {
       console.error('🦄uploadImageMutation onError', error)
       toast.error('Failed to upload image', {
         description: <div>{error.toString()}</div>,
       })
+      // Remove from uploading images on error
+      setUploadingImages((prev) =>
+        prev.filter((img) => img.id !== context?.uploadId)
+      )
     },
   })
 
@@ -119,48 +152,59 @@ const ChatTextarea: React.FC<ChatTextareaProps> = ({
       toast.warning(t('chat:textarea.selectTool'))
     }
 
-    let value: MessageContent[] | string = prompt
+    let text_content: MessageContent[] | string = prompt
     if (prompt.length === 0 || prompt.trim() === '') {
       toast.error(t('chat:textarea.enterPrompt'))
       return
     }
 
+    // 使用XML格式让LLM更容易识别图片信息
     if (images.length > 0) {
-      images.forEach((image) => {
-        value += `\n\n ![Attached image - width: ${image.width} height: ${image.height} filename: ${image.file_id}](/api/file/${image.file_id})`
+      text_content += `\n\n<input_images count="${images.length}">`
+      images.forEach((image, index) => {
+        const imageId = image.file_id || `image-${index}`
+        text_content += `\n  <image index="${index + 1}" file_id="${imageId}" width="${image.width}" height="${image.height}" />`
       })
+      text_content += `\n</input_images>`
+      text_content += `\n\n<instruction>Please use the input_images as input for image generation or editing.</instruction>`
+    }
 
-      // Fetch images as base64
-      const imagePromises = images.map(async (image) => {
+    // 获取图片 base64
+    const imagePromises = images.map(async (image) => {
+      // console.log('🦄imagePromises', image)
+      if (image.file_id) {
+        // Get local URL and convert to base64
         const response = await fetch(`/api/file/${image.file_id}`)
         const blob = await response.blob()
         return new Promise<string>((resolve) => {
           const reader = new FileReader()
           reader.onloadend = () => resolve(reader.result as string)
           reader.readAsDataURL(blob)
         })
-      })
+      } else {
+        throw new Error('Invalid image data')
+      }
+    })
 
-      const base64Images = await Promise.all(imagePromises)
+    const imageUrlList = await Promise.all(imagePromises)
 
-      value = [
-        {
-          type: 'text',
-          text: value,
+    const final_content = [
+      {
+        type: 'text',
+        text: text_content,
+      },
+      ...images.map((image, index) => ({
+        type: 'image_url',
+        image_url: {
+          url: imageUrlList[index],
         },
-        ...images.map((image, index) => ({
-          type: 'image_url',
-          image_url: {
-            url: base64Images[index],
-          },
-        })),
-      ] as MessageContent[]
-    }
+      })),
+    ] as MessageContent[]
 
     const newMessage = messages.concat([
       {
         role: 'user',
-        content: value,
+        content: final_content,
       },
     ])
 
@@ -262,6 +306,15 @@ const ChatTextarea: React.FC<ChatTextareaProps> = ({
     }
   }, [uploadImageMutation])
 
+  // Cleanup object URLs to prevent memory leaks
+  useEffect(() => {
+    return () => {
+      uploadingImages.forEach((img) => {
+        URL.revokeObjectURL(img.previewUrl)
+      })
+    }
+  }, [uploadingImages])
+
   return (
     <motion.div
       ref={dropAreaRef}
@@ -299,26 +352,64 @@ const ChatTextarea: React.FC<ChatTextareaProps> = ({
       </AnimatePresence>
 
       <AnimatePresence>
-        {images.length > 0 && (
+        {(images.length > 0 || uploadingImages.length > 0) && (
           <motion.div
             className="flex items-center gap-2 w-full"
             initial={{ opacity: 0, height: 0 }}
             animate={{ opacity: 1, height: 'auto' }}
             exit={{ opacity: 0, height: 0 }}
             transition={{ duration: 0.2, ease: 'easeInOut' }}
           >
-            {images.map((image) => (
+            {/* Show uploading images first */}
+            {uploadingImages.map((uploadingImage) => (
+              <motion.div
+                key={uploadingImage.id}
+                className="relative size-10"
+                initial={{ opacity: 0, scale: 0.95 }}
+                animate={{ opacity: 1, scale: 1 }}
+                exit={{ opacity: 0, scale: 0.95 }}
+                transition={{ duration: 0.2, ease: 'easeInOut' }}
+              >
+                <img
+                  src={uploadingImage.previewUrl}
+                  alt="Uploading image"
+                  className="w-full h-full object-cover rounded-md opacity-50"
+                  draggable={false}
+                />
+                {/* Upload spinner */}
+                <div className="absolute inset-0 flex items-center justify-center bg-black/20 rounded-md">
+                  <Loader2 className="size-4 animate-spin text-white" />
+                </div>
+                <Button
+                  variant="secondary"
+                  size="icon"
+                  className="absolute -top-1 -right-1 size-4"
+                  onClick={() =>
+                    setUploadingImages((prev) =>
+                      prev.filter((img) => img.id !== uploadingImage.id)
+                    )
+                  }
+                >
+                  <XIcon className="size-3" />
+                </Button>
+              </motion.div>
+            ))}
+
+            {/* Show completed images */}
+            {images.map((image, index) => (
               <motion.div
-                key={image.file_id}
+                key={image.file_id || `image-${index}`}
                 className="relative size-10"
                 initial={{ opacity: 0, scale: 0.95 }}
                 animate={{ opacity: 1, scale: 1 }}
                 exit={{ opacity: 0, scale: 0.95 }}
                 transition={{ duration: 0.2, ease: 'easeInOut' }}
               >
                 <img
-                  key={image.file_id}
-                  src={`/api/file/${image.file_id}`}
+                  src={
+                    image.url ||
+                    (image.file_id ? `/api/file/${image.file_id}` : '')
+                  }
                   alt="Uploaded image"
                   className="w-full h-full object-cover rounded-md"
                   draggable={false}
@@ -328,9 +419,7 @@ const ChatTextarea: React.FC<ChatTextareaProps> = ({
                   size="icon"
                   className="absolute -top-1 -right-1 size-4"
                   onClick={() =>
-                    setImages((prev) =>
-                      prev.filter((i) => i.file_id !== image.file_id)
-                    )
+                    setImages((prev) => prev.filter((_, i) => i !== index))
                   }
                 >
                   <XIcon className="size-3" />

diff --git a/react/src/components/chat/Message/Image.tsx b/react/src/components/chat/Message/Image.tsx
@@ -26,16 +26,16 @@ const MessageImage = ({ content }: MessageImageProps) => {
     excalidrawAPI?.scrollToContent(id, { animate: true })
   }
   const id = filesArray.find((file) =>
-    content.image_url.url?.includes(file.url)
+    content.image_url?.url?.includes(file.url)
   )?.id
 
   return (
     <div>
-      <PhotoView src={content.image_url.url}>
+      <PhotoView src={content.image_url?.url}>
         <div className="relative">
           <img
             className="hover:scale-105 transition-transform duration-300"
-            src={content.image_url.url}
+            src={content.image_url?.url}
             alt="Image"
           />
 

diff --git a/react/src/utils/imageUtils.ts b/react/src/utils/imageUtils.ts
@@ -12,7 +12,7 @@ interface ProcessedImage {
 /**
  * Convert file to base64 data URL
  */
-function fileToBase64(file: File): Promise<string> {
+export function fileToBase64(file: File): Promise<string> {
   return new Promise((resolve, reject) => {
     const reader = new FileReader()
     reader.onload = () => resolve(reader.result as string)

diff --git a/server/services/langgraph_service/configs/image_vide_creator_config.py b/server/services/langgraph_service/configs/image_vide_creator_config.py
@@ -43,8 +43,23 @@
 3. If it is a video generation task, use video generation tools to generate the video. You can choose to generate the necessary images first, and then use the images to generate the video, or directly generate the video using text prompt.
 """
 
+
 class ImageVideoCreatorAgentConfig(BaseAgentConfig):
     def __init__(self, tool_list: List[ToolInfoJson]) -> None:
+
+        image_input_detection_prompt = """
+IMAGE INPUT DETECTION:
+When the user's message contains input images in XML format like:
+<input_images></input_images>
+
+You MUST:
+1. Parse the XML to extract file_id attributes from <image> tags
+2. Use tools that support input_images parameter when images are present
+3. Pass the extracted file_id(s) in the input_images parameter as a list
+4. If input_images count > 1 , only use generate_image_by_gpt_image_1_jaaz (supports multiple images)
+5. For video generation → use video tools with input_images if images are present
+"""
+
         batch_generation_prompt = """
 
 BATCH GENERATION RULES:
@@ -74,6 +89,7 @@ def __init__(self, tool_list: List[ToolInfoJson]) -> None:
 """
 
         full_system_prompt = system_prompt + \
+            image_input_detection_prompt + \
             batch_generation_prompt + error_handling_prompt
 
         # 图像设计智能体不需要切换到其他智能体