leejet
diff --git a/‎README.md‎
Lines changed: 4 additions & 1 deletion b/‎README.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎assets/lens/example.png‎
630 KB b/‎assets/lens/example.png‎
630 KB
diff --git a/‎assets/lens/turbo_example.png‎
555 KB b/‎assets/lens/turbo_example.png‎
555 KB
diff --git a/‎docs/lens.md‎
Lines changed: 32 additions & 0 deletions b/‎docs/lens.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎src/conditioner.hpp‎
Lines changed: 33 additions & 2 deletions b/‎src/conditioner.hpp‎
Lines changed: 33 additions & 2 deletions
diff --git a/‎src/diffusion_model.hpp‎
Lines changed: 67 additions & 0 deletions b/‎src/diffusion_model.hpp‎
Lines changed: 67 additions & 0 deletions
@@ -15,6 +15,7 @@ API and command-line option may change frequently.***
 
 ## 🔥Important News
 
+* **2026/05/27** 🚀 stable-diffusion.cpp now supports **Lens**
 * **2026/05/17** 🚀 stable-diffusion.cpp now supports **LTX-2.3**
 * **2026/04/11** 🚀 stable-diffusion.cpp now uses a brand-new embedded web UI.  
 * **2026/01/18** 🚀 stable-diffusion.cpp now supports **FLUX.2-klein**  
@@ -37,6 +38,7 @@ API and command-line option may change frequently.***
     - [SD3/SD3.5](./docs/sd3.md)
     - [FLUX.1-dev/FLUX.1-schnell](./docs/flux.md)
     - [FLUX.2-dev/FLUX.2-klein](./docs/flux2.md)
+    - [Lens](./docs/lens.md)
     - [Chroma](./docs/chroma.md)
     - [Chroma1-Radiance](./docs/chroma_radiance.md)
     - [Qwen Image](./docs/qwen_image.md)
@@ -135,14 +137,15 @@ For runtime and parameter backend placement, see the [backend selection guide](.
 - [Chroma](./docs/chroma.md)
 - [🔥Qwen Image](./docs/qwen_image.md)
 - [🔥Qwen Image Edit series](./docs/qwen_image_edit.md)
-- [🔥LongCat Image / LongCat Image Edit](./docs/longcat_image.md)
 - [🔥Wan2.1/Wan2.2](./docs/wan.md)
 - [🔥LTX-2.3](./docs/ltx2.md)
 - [🔥Z-Image](./docs/z_image.md)
 - [Ovis-Image](./docs/ovis_image.md)
 - [Anima](./docs/anima.md)
 - [ERNIE-Image](./docs/ernie_image.md)
 - [HiDream-O1-Image](./docs/hidream_o1_image.md)
+- [Lens](./docs/lens.md)
+- [LongCat Image / LongCat Image Edit](./docs/longcat_image.md)
 - [LoRA](./docs/lora.md)
 - [LCM/LCM-LoRA](./docs/lcm.md)
 - [Using PhotoMaker to personalize image generation](./docs/photo_maker.md)
 
@@ -0,0 +1,32 @@
+# How to Use
+
+Lens uses a Lens diffusion transformer, the FLUX.2 VAE, and GPT-OSS-20B as the LLM text encoder.
+
+## Download weights
+
+- Download Lens
+    - safetensors: https://huggingface.co/Comfy-Org/Lens/tree/main/diffusion_models
+- Download Lens Turbo
+    - safetensors: https://huggingface.co/Comfy-Org/Lens/tree/main/diffusion_models
+- Download vae
+    - safetensors: https://huggingface.co/black-forest-labs/FLUX.2-dev/tree/main
+- Download GPT-OSS-20B
+    - gguf: https://huggingface.co/unsloth/gpt-oss-20b-GGUF/tree/main
+
+## Examples
+
+### Lens
+
+```
+.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\lens_bf16.safetensors --llm "..\..\llm\gpt-oss-20b-UD-Q8_K_XL.gguf" --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --cfg-scale 5.0  -p "A crystal dragon soaring through an aurora borealis sky, its entire body made of transparent faceted crystal refracting the green and purple aurora light into rainbow spectra, ice particles trailing from its wings, high fantasy digital art" --diffusion-fa -v
+```
+
+<img width="256" alt="Lens example" src="../assets/lens/example.png" />
+
+### Lens Turbo
+
+```
+.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\lens_turbo_bf16.safetensors --llm "..\..\llm\gpt-oss-20b-UD-Q8_K_XL.gguf" --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --cfg-scale 1.0  -p "A crystal dragon soaring through an aurora borealis sky, its entire body made of transparent faceted crystal refracting the green and purple aurora light into rainbow spectra, ice particles trailing from its wings, high fantasy digital art" --diffusion-fa -v --steps 4
+```
+
+<img width="256" alt="Lens Turbo example" src="../assets/lens/turbo_example.png" />
@@ -1696,11 +1696,15 @@ struct LLMEmbedder : public Conditioner {
             arch = LLM::LLMArch::MISTRAL_SMALL_3_2;
         } else if (sd_version_is_ernie_image(version)) {
             arch = LLM::LLMArch::MINISTRAL_3_3B;
+        } else if (sd_version_is_lens(version)) {
+            arch = LLM::LLMArch::GPT_OSS_20B;
         } else if (sd_version_is_z_image(version) || version == VERSION_OVIS_IMAGE || version == VERSION_FLUX2_KLEIN) {
             arch = LLM::LLMArch::QWEN3;
         }
         if (arch == LLM::LLMArch::MISTRAL_SMALL_3_2 || arch == LLM::LLMArch::MINISTRAL_3_3B) {
             tokenizer = std::make_shared<MistralTokenizer>();
+        } else if (arch == LLM::LLMArch::GPT_OSS_20B) {
+            tokenizer = std::make_shared<GPTOSSTokenizer>();
         } else {
             tokenizer = std::make_shared<Qwen2Tokenizer>();
         }
@@ -1871,6 +1875,7 @@ struct LLMEmbedder : public Conditioner {
         std::vector<std::pair<int, sd::Tensor<float>>> image_embeds;
         int prompt_template_encode_start_idx = 34;
         int min_length                       = 0;  // pad tokens
+        int max_length                       = 100000000;
         int hidden_states_min_length         = 0;  // zero pad hidden_states
         bool spell_quotes                    = false;
         std::set<int> out_layers;
@@ -2029,6 +2034,30 @@ struct LLMEmbedder : public Conditioner {
             prompt_attn_range.first = 0;
             prompt += conditioner_params.text;
             prompt_attn_range.second = static_cast<int>(prompt.size());
+        } else if (sd_version_is_lens(version)) {
+            prompt_template_encode_start_idx = 97;
+            min_length                       = 0;
+            max_length                       = 512;
+            out_layers                       = {6, 12, 18, 24};
+
+            prompt =
+                "<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.\n"
+                "Knowledge cutoff: 2024-06\n"
+                "Current date: 2026-05-26\n"  // fix for current date
+                "\n"
+                "Reasoning: medium\n"
+                "\n"
+                "# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>developer<|message|># Instructions\n"
+                "\n"
+                "Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background.\n"
+                "\n"
+                "<|end|><|start|>user<|message|>";
+
+            prompt_attn_range.first = static_cast<int>(prompt.size());
+            prompt += conditioner_params.text;
+            prompt_attn_range.second = static_cast<int>(prompt.size());
+
+            prompt += "<|end|><|start|>assistant<|channel|>analysis<|message|>Need to generate one image according to the description.<|end|><|start|>assistant<|channel|>final<|message|>";
         } else if (sd_version_is_z_image(version)) {
             prompt_template_encode_start_idx = 0;
             out_layers                       = {35};  // -2
@@ -2085,7 +2114,8 @@ struct LLMEmbedder : public Conditioner {
                                            image_embeds,
                                            out_layers,
                                            prompt_template_encode_start_idx,
-                                           spell_quotes);
+                                           spell_quotes,
+                                           max_length);
         std::vector<sd::Tensor<float>> extra_hidden_states_vec;
         for (int i = 0; i < extra_prompts.size(); i++) {
             auto extra_hidden_states = encode_prompt(n_threads,
@@ -2096,7 +2126,8 @@ struct LLMEmbedder : public Conditioner {
                                                      image_embeds,
                                                      out_layers,
                                                      prompt_template_encode_start_idx,
-                                                     spell_quotes);
+                                                     spell_quotes,
+                                                     max_length);
             extra_hidden_states_vec.push_back(std::move(extra_hidden_states));
         }
 
 
@@ -6,6 +6,7 @@
 #include "ernie_image.hpp"
 #include "flux.hpp"
 #include "hidream_o1.hpp"
+#include "lens.hpp"
 #include "ltxv.hpp"
 #include "mmdit.hpp"
 #include "qwen_image.hpp"
@@ -701,6 +702,72 @@ struct ErnieImageModel : public DiffusionModel {
     }
 };
 
+struct LensModel : public DiffusionModel {
+    std::string prefix;
+    Lens::LensRunner lens;
+
+    LensModel(ggml_backend_t backend,
+              ggml_backend_t params_backend,
+              const String2TensorStorage& tensor_storage_map = {},
+              const std::string prefix                       = "model.diffusion_model")
+        : prefix(prefix), lens(backend, params_backend, tensor_storage_map, prefix) {
+    }
+
+    std::string get_desc() override {
+        return lens.get_desc();
+    }
+
+    void alloc_params_buffer() override {
+        lens.alloc_params_buffer();
+    }
+
+    void free_params_buffer() override {
+        lens.free_params_buffer();
+    }
+
+    void free_compute_buffer() override {
+        lens.free_compute_buffer();
+    }
+
+    void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
+        lens.get_param_tensors(tensors, prefix);
+    }
+
+    size_t get_params_buffer_size() override {
+        return lens.get_params_buffer_size();
+    }
+
+    void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) override {
+        lens.set_weight_adapter(adapter);
+    }
+
+    int64_t get_adm_in_channels() override {
+        return 768;
+    }
+
+    void set_flash_attention_enabled(bool enabled) {
+        lens.set_flash_attention_enabled(enabled);
+    }
+
+    void set_max_graph_vram_bytes(size_t max_vram_bytes) override {
+        lens.set_max_graph_vram_bytes(max_vram_bytes);
+    }
+
+    void set_circular_axes(bool circular_x, bool circular_y) override {
+        lens.set_circular_axes(circular_x, circular_y);
+    }
+
+    sd::Tensor<float> compute(int n_threads,
+                              const DiffusionParams& diffusion_params) override {
+        GGML_ASSERT(diffusion_params.x != nullptr);
+        GGML_ASSERT(diffusion_params.timesteps != nullptr);
+        return lens.compute(n_threads,
+                            *diffusion_params.x,
+                            *diffusion_params.timesteps,
+                            tensor_or_empty(diffusion_params.context));
+    }
+};
+
 struct LTXAVModel : public DiffusionModel {
     std::string prefix;
     LTXV::LTXAVRunner ltxav;