CloudhandsAI
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 36 additions & 10 deletions b/‎.github/workflows/build.yml‎
Lines changed: 36 additions & 10 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 29 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 0 deletions b/‎README.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎assets/lens/example.png‎
630 KB b/‎assets/lens/example.png‎
630 KB
diff --git a/‎assets/lens/turbo_example.png‎
555 KB b/‎assets/lens/turbo_example.png‎
555 KB
diff --git a/‎assets/longcat/example.png‎
423 KB b/‎assets/longcat/example.png‎
423 KB
diff --git a/‎assets/pid/example.png‎
9.03 MB b/‎assets/pid/example.png‎
9.03 MB
diff --git a/‎docs/lens.md‎
Lines changed: 32 additions & 0 deletions b/‎docs/lens.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎docs/longcat_image.md‎
Lines changed: 30 additions & 0 deletions b/‎docs/longcat_image.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎docs/pid.md‎
Lines changed: 39 additions & 0 deletions b/‎docs/pid.md‎
Lines changed: 39 additions & 0 deletions
@@ -545,6 +545,30 @@ jobs:
       - name: Pack artifacts
         if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         run: |
+          $ErrorActionPreference = "Stop"
+          $dst = "build\bin"
+          $rocmBin = Join-Path "${env:HIP_PATH}" "bin"
+          $requiredRocmPaths = @(
+            (Join-Path $rocmBin "rocblas.dll"),
+            (Join-Path $rocmBin "rocblas\library")
+          )
+          foreach ($path in $requiredRocmPaths) {
+            if (!(Test-Path $path)) {
+              throw "Missing ROCm runtime dependency: $path"
+            }
+          }
+
+          foreach ($pattern in @("rocblas*.dll", "hipblas*.dll", "libhipblas*.dll")) {
+            Copy-Item -Path (Join-Path $rocmBin $pattern) -Destination $dst -Force -ErrorAction SilentlyContinue
+          }
+
+          foreach ($dir in @("rocblas", "hipblaslt")) {
+            $src = Join-Path $rocmBin $dir
+            if (Test-Path $src) {
+              Copy-Item -Path $src -Destination $dst -Recurse -Force
+            }
+          }
+
           7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-rocm-${{ env.ROCM_VERSION }}-x64.zip .\build\bin\*
 
       - name: Upload artifacts
@@ -687,16 +711,6 @@ jobs:
         with:
           submodules: recursive
 
-      - name: Setup Node
-        uses: actions/setup-node@v4
-        with:
-          node-version: 20
-
-      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
-        with:
-          version: 10.15.1
-
       - name: ccache
         uses: ggml-org/ccache-action@v1.2.16
         with:
@@ -754,6 +768,18 @@ jobs:
           echo PATH=$PATH:$ROCM_PATH/bin >> $GITHUB_ENV
           echo LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/llvm/lib:$ROCM_PATH/lib/rocprofiler-systems >> $GITHUB_ENV
 
+      # setup-node installs into /opt/hostedtoolcache, which is removed above.
+      # Keep Node/pnpm setup after disk cleanup so the server frontend can be embedded.
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 20
+
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 10.15.1
+
       - name: Build
         id: cmake_build
         run: |
 
@@ -22,6 +22,31 @@ endif()
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
+if(APPLE)
+    function(sd_set_macos_rpaths target)
+        get_target_property(target_type ${target} TYPE)
+        if(target_type STREQUAL "EXECUTABLE")
+            set(runtime_paths "@executable_path" "@executable_path/../lib")
+        elseif(target_type STREQUAL "SHARED_LIBRARY" OR target_type STREQUAL "MODULE_LIBRARY")
+            set(runtime_paths "@loader_path" "@loader_path/../lib")
+            set_target_properties(${target} PROPERTIES
+                MACOSX_RPATH ON
+                INSTALL_NAME_DIR "@rpath"
+                BUILD_WITH_INSTALL_NAME_DIR ON
+            )
+        else()
+            return()
+        endif()
+
+        # Release artifacts zip the build output directly, so keep macOS rpaths relocatable.
+        set_target_properties(${target} PROPERTIES
+            BUILD_RPATH "${runtime_paths}"
+            INSTALL_RPATH "${runtime_paths}"
+            BUILD_WITH_INSTALL_RPATH ON
+        )
+    endfunction()
+endif()
+
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
     set(SD_STANDALONE ON)
 else()
@@ -237,6 +262,10 @@ else()
     add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES})
 endif()
 
+if(APPLE)
+    sd_set_macos_rpaths(${SD_LIB})
+endif()
+
 if(SD_SYCL)
     message("-- Use SYCL as backend stable-diffusion")
     set(GGML_SYCL ON)
 
@@ -15,6 +15,8 @@ API and command-line option may change frequently.***
 
 ## 🔥Important News
 
+* **2026/05/31** 🚀 stable-diffusion.cpp now supports **PiD**
+* **2026/05/27** 🚀 stable-diffusion.cpp now supports **Lens**
 * **2026/05/17** 🚀 stable-diffusion.cpp now supports **LTX-2.3**
 * **2026/04/11** 🚀 stable-diffusion.cpp now uses a brand-new embedded web UI.  
 * **2026/01/18** 🚀 stable-diffusion.cpp now supports **FLUX.2-klein**  
@@ -37,9 +39,12 @@ API and command-line option may change frequently.***
     - [SD3/SD3.5](./docs/sd3.md)
     - [FLUX.1-dev/FLUX.1-schnell](./docs/flux.md)
     - [FLUX.2-dev/FLUX.2-klein](./docs/flux2.md)
+    - [Lens](./docs/lens.md)
     - [Chroma](./docs/chroma.md)
     - [Chroma1-Radiance](./docs/chroma_radiance.md)
     - [Qwen Image](./docs/qwen_image.md)
+    - [PiD](./docs/pid.md)
+    - [LongCat Image](./docs/longcat_image.md)
     - [Z-Image](./docs/z_image.md)
     - [Ovis-Image](./docs/ovis_image.md)
     - [Anima](./docs/anima.md)
@@ -48,6 +53,7 @@ API and command-line option may change frequently.***
   - Image Edit Models
     - [FLUX.1-Kontext-dev](./docs/kontext.md)
     - [Qwen Image Edit series](./docs/qwen_image_edit.md)
+    - [LongCat Image Edit](./docs/longcat_image.md)
   - Video Models
     - [Wan2.1/Wan2.2](./docs/wan.md)
     - [LTX-2.3](./docs/ltx2.md)
@@ -140,6 +146,8 @@ For runtime and parameter backend placement, see the [backend selection guide](.
 - [Anima](./docs/anima.md)
 - [ERNIE-Image](./docs/ernie_image.md)
 - [HiDream-O1-Image](./docs/hidream_o1_image.md)
+- [Lens](./docs/lens.md)
+- [LongCat Image / LongCat Image Edit](./docs/longcat_image.md)
 - [LoRA](./docs/lora.md)
 - [LCM/LCM-LoRA](./docs/lcm.md)
 - [Using PhotoMaker to personalize image generation](./docs/photo_maker.md)
 
@@ -0,0 +1,32 @@
+# How to Use
+
+Lens uses a Lens diffusion transformer, the FLUX.2 VAE, and GPT-OSS-20B as the LLM text encoder.
+
+## Download weights
+
+- Download Lens
+    - safetensors: https://huggingface.co/Comfy-Org/Lens/tree/main/diffusion_models
+- Download Lens Turbo
+    - safetensors: https://huggingface.co/Comfy-Org/Lens/tree/main/diffusion_models
+- Download vae
+    - safetensors: https://huggingface.co/black-forest-labs/FLUX.2-dev/tree/main
+- Download GPT-OSS-20B
+    - gguf: https://huggingface.co/unsloth/gpt-oss-20b-GGUF/tree/main
+
+## Examples
+
+### Lens
+
+```
+.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\lens_bf16.safetensors --llm "..\..\llm\gpt-oss-20b-UD-Q8_K_XL.gguf" --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --cfg-scale 5.0  -p "A crystal dragon soaring through an aurora borealis sky, its entire body made of transparent faceted crystal refracting the green and purple aurora light into rainbow spectra, ice particles trailing from its wings, high fantasy digital art" --diffusion-fa -v
+```
+
+<img width="256" alt="Lens example" src="../assets/lens/example.png" />
+
+### Lens Turbo
+
+```
+.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\lens_turbo_bf16.safetensors --llm "..\..\llm\gpt-oss-20b-UD-Q8_K_XL.gguf" --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --cfg-scale 1.0  -p "A crystal dragon soaring through an aurora borealis sky, its entire body made of transparent faceted crystal refracting the green and purple aurora light into rainbow spectra, ice particles trailing from its wings, high fantasy digital art" --diffusion-fa -v --steps 4
+```
+
+<img width="256" alt="Lens Turbo example" src="../assets/lens/turbo_example.png" />
@@ -0,0 +1,30 @@
+# How to Use
+
+LongCat-Image uses a LongCat diffusion transformer, the FLUX VAE, and Qwen2.5-VL as the LLM text encoder.
+
+## Download weights
+
+- Download LongCat Image
+    - safetensors: https://huggingface.co/Comfy-Org/LongCat-Image/tree/main/split_files/diffusion_models
+    - gguf: https://huggingface.co/vantagewithai/LongCat-Image-GGUF/tree/main/comfy
+- Download LongCat Image Edit
+    - LongCat Image Edit Turbo: https://huggingface.co/meituan-longcat/LongCat-Image-Edit-Turbo
+    - gguf: https://huggingface.co/vantagewithai/LongCat-Image-Edit-GGUF/tree/main
+- Download vae
+    - safetensors: https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/ae.safetensors
+- Download qwen_2.5_vl 7b
+    - safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/text_encoders
+    - gguf: https://huggingface.co/mradermacher/Qwen2.5-VL-7B-Instruct-GGUF/tree/main
+    - For image editing with GGUF text encoders, also download the matching mmproj file and pass it with `--llm_vision`.
+
+## Run
+
+LongCat uses quoted text for character-level text rendering. Put target text inside single quotes, double quotes, or Chinese quotes.
+
+### LongCat Image
+
+```
+.\bin\Release\sd-cli.exe --diffusion-model  ..\..\ComfyUI\models\diffusion_models\LongCat-Image-Q4_K_M.gguf --vae ..\..\ComfyUI\models\vae\ae.sft --llm ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct-Q8_0.gguf -p "a lovely cat holding a sign says 'longcat.cpp'" --cfg-scale 5.0 --sampling-method euler --flow-shift 3 -v --offload-to-cpu --diffusion-fa
+```
+
+<img alt="longcat example" src="../assets/longcat/example.png" />
@@ -0,0 +1,39 @@
+# How to Use
+
+PiD is NVIDIA's Pixel Diffusion Decoder. It replaces the usual VAE decode or decode-then-upscale path with a pixel-space diffusion decoder conditioned on a
+source latent and text prompt.
+
+In stable-diffusion.cpp, PiD currently runs as an image edit pipeline: provide a reference image with `-r`/`--ref-image`, encode that image with a matching VAE, then let the PiD diffusion model decode/upscale directly to RGB.
+
+## Download weights
+
+- Download PiD
+    - safetensors: https://huggingface.co/Comfy-Org/PixelDiT/tree/main/diffusion_models
+- Download Gemma 2 2B
+    - safetensors: https://huggingface.co/Comfy-Org/PixelDiT/tree/main/text_encoders
+- Download the VAE that matches the PiD checkpoint backbone
+    - safetensors: https://huggingface.co/nvidia/PiD/tree/main/checkpoints
+    - Flux / Z-Image PiD: use the Flux VAE and pass `--vae-format flux`
+    - SD3 PiD: use the SD3 VAE and pass `--vae-format sd3`
+    - Flux.2 PiD: use the Flux.2 VAE and pass `--vae-format flux2`
+
+The official PiD model card should be checked before use. At the time of the initial PiD release, the official weights are under the NSCLv1 non-commercial license.
+
+## Examples
+
+```
+.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\pid_flux1_512_to_2048_4step_bf16.safetensors --llm "..\..\ComfyUI\models\text_encoders\gemma_2_2b_it_elm_bf16.safetensors" --vae ..\..\ComfyUI\models\vae\ae.sft --vae-format flux --cfg-scale 1.0  -p "a lovely cat" -r ..\assets\ernie_image\turbo_example.png --diffusion-fa -v --steps 4 -H 2048 -W 2048 --rng cpu
+```
+
+Before:
+
+<img width="256" alt="ERNIE-Image Turbo example" src="../assets/ernie_image/turbo_example.png" />
+
+After:
+<img width="1024" alt="PiD example" src="../assets/pid/example.png" />
+
+## Notes
+
+- `-r`/`--ref-image` is required. PiD uses the first reference image as the source latent condition.
+- `--vae-format` should match the VAE latent layout used by the PiD checkpoint. This is important when using standalone VAE files because the PiD diffusion
+  checkpoint alone does not identify the VAE format.