Skip to content

Commit 3b4e474

Browse files
committed
fix: simplify PuLID ID extraction setup
1 parent 93527fd commit 3b4e474

10 files changed

Lines changed: 85 additions & 257 deletions

File tree

docs/pulid.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,15 @@ to a `.pulidembd` binary file (about 131 KB). Run it once per source
5252
person; the same file is reused for any number of generations.
5353

5454
A reference Python script is provided alongside this docs file at
55-
[`scripts/pulid_extract_id.py`](../scripts/pulid_extract_id.py). It
55+
[`script/pulid_extract_id.py`](../script/pulid_extract_id.py). It
5656
requires:
57-
- A working CUDA / CPU PyTorch + diffusers stack
58-
- `insightface`, `facexlib`, `eva-clip`, `torchvision`
57+
- A working CUDA / CPU PyTorch stack
58+
- `insightface`, `facexlib`, `eva-clip`, `torchvision`, `opencv-python`,
59+
`huggingface_hub`, `gguf`
5960
- The PuLID weights file (same one stable-diffusion.cpp will load below)
60-
- The ToTheBeginning/PuLID repo's `pulid/pipeline_flux.py` (and its
61-
dependencies under `pulid/` and `flux/`) -- recommended to vendor
62-
rather than pip-install due to upstream packaging quirks
61+
- The ToTheBeginning/PuLID repo's `pulid/` package (including
62+
`pulid/pipeline_flux.py`) and `eva_clip/` package on `PYTHONPATH`; `flux/`
63+
is not needed for embedding extraction
6364

6465
Run it as:
6566

examples/common/common.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ ArgOptions SDContextParams::get_options() {
417417
&photo_maker_path},
418418
{"",
419419
"--pulid-weights",
420-
"path to PuLID flux weights (e.g. pulid_flux_v0.9.1.safetensors). Identity is injected during the denoise loop when paired with --pulid-id-embedding.",
420+
"path to PuLID Flux weights",
421421
&pulid_weights_path},
422422
{"",
423423
"--upscale-model",
@@ -894,7 +894,7 @@ ArgOptions SDGenerationParams::get_options() {
894894
&pm_id_embed_path},
895895
{"",
896896
"--pulid-id-embedding",
897-
"path to a .pulidembd binary produced by pulid_extract_id.py. Carries a (32, 2048) identity embedding extracted from a source portrait. Pair with --pulid-weights on the context.",
897+
"path to PuLID id embedding",
898898
&pulid_id_embedding_path},
899899
{"",
900900
"--hires-upscaler",
@@ -1048,7 +1048,7 @@ ArgOptions SDGenerationParams::get_options() {
10481048
&pm_style_strength},
10491049
{"",
10501050
"--pulid-id-weight",
1051-
"strength of PuLID identity injection (default: 1.0). 0.7-1.2 are typical; lower lets the prompt override the face more, higher tightens identity match.",
1051+
"strength of PuLID identity injection",
10521052
&pulid_id_weight},
10531053
{"",
10541054
"--control-strength",

examples/common/common.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,6 @@ struct SDContextParams {
133133
std::string control_net_path;
134134
std::string embedding_dir;
135135
std::string photo_maker_path;
136-
// PuLID-Flux identity-preservation context path: the safetensors blob
137-
// carrying the PerceiverAttentionCA cross-attention weights. Loaded
138-
// once with the model. Per-generation pulid_id_embedding_path lives in
139-
// SDGenerationParams below.
140136
std::string pulid_weights_path;
141137
sd_type_t wtype = SD_TYPE_COUNT;
142138
std::string tensor_type_rules;
@@ -239,9 +235,6 @@ struct SDGenerationParams {
239235
std::string pm_id_embed_path;
240236
float pm_style_strength = 20.f;
241237

242-
// PuLID-Flux: per-generation identity embedding (binary file produced by
243-
// runtime-scripts/pulid_extract_id.py). Format documented in
244-
// include/stable-diffusion.h sd_pulid_params_t.
245238
std::string pulid_id_embedding_path;
246239
float pulid_id_weight = 1.0f;
247240

include/stable-diffusion.h

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,6 @@ typedef struct {
195195
const sd_embedding_t* embeddings;
196196
uint32_t embedding_count;
197197
const char* photo_maker_path;
198-
/**
199-
* Path to pulid_flux_v0.9.1.safetensors (the PuLID identity-injection
200-
* cross-attention weights). When set together with sd_img_gen_params_t.
201-
* pulid_params.id_embedding_path, the Flux diffusion model performs PuLID
202-
* cross-attention injection during the denoise loop. Loaded once with
203-
* the model; the embedding is per-generation. Currently only meaningful
204-
* for Flux (depth=19 double, 38 single blocks); silently ignored for
205-
* other model versions.
206-
*/
207198
const char* pulid_weights_path;
208199
const char* tensor_type_rules;
209200
int n_threads;
@@ -282,23 +273,9 @@ typedef struct {
282273
float style_strength;
283274
} sd_pm_params_t; // photo maker
284275

285-
/**
286-
* PuLID-Flux identity preservation params.
287-
*
288-
* Unlike PhotoMaker (which extracts the ID embedding inside the inference
289-
* process from a directory of images), PuLID's ID extraction is a heavy
290-
* Python-only stack (insightface ArcFace + EVA-CLIP-L + IDFormer). To stay
291-
* cross-vendor in C++/Vulkan, sd.cpp consumes a precomputed binary file
292-
* produced by an external tool (runtime-scripts/pulid_extract_id.py in the
293-
* Cloudhands client tree).
294-
*
295-
* Format: a gguf container with a single tensor "pulid_id" of shape
296-
* [token_dim, num_tokens] (ggml order; typically [2048, 32]) in F16/F32/BF16.
297-
* Loaded with the standard gguf reader; see docs/pulid.md.
298-
*/
299276
typedef struct {
300-
const char* id_embedding_path; // path to .pulidembd file produced by pulid_extract_id.py
301-
float id_weight; // strength of the ID injection; typical 0.7-1.2, default 1.0
277+
const char* id_embedding_path;
278+
float id_weight;
302279
} sd_pulid_params_t;
303280

304281
enum sd_cache_mode_t {
Lines changed: 13 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,18 @@
22
Precompute a PuLID-Flux identity embedding from a single source portrait.
33
44
Writes a gguf file (a single tensor `pulid_id`) that stable-diffusion.cpp's
5-
`--pulid-id-embedding` flag consumes. See docs/pulid.md for the format and
6-
overall PuLID-Flux flow.
7-
8-
This script intentionally lives outside the C++ build: identity extraction
9-
needs insightface + EVA-CLIP-L + IDFormer, which are PyTorch-only stacks
10-
that would be impractical to reimplement in ggml just to run once per
11-
source person. The C++ side downstream of this file is cross-vendor and
12-
backend-agnostic.
5+
`--pulid-id-embedding` flag consumes.
136
147
Dependencies (recommended: vendor rather than pip-install due to upstream
158
packaging quirks):
169
- torch + safetensors
17-
- The ToTheBeginning/PuLID repository's `pulid/pipeline_flux.py` and
18-
its sibling packages (`flux/`, `eva_clip/`, `models/`). Put them on
19-
PYTHONPATH or sys.path before running this script.
20-
- insightface, facexlib (PuLID pipeline pulls these in)
10+
- The ToTheBeginning/PuLID repository's `pulid/` package and `eva_clip/`.
11+
Put them on PYTHONPATH or sys.path before running this script.
12+
- insightface, facexlib, torchvision, opencv-python, huggingface_hub, gguf
2113
- numpy, Pillow
2214
2315
Usage:
24-
python pulid_extract_id.py \\
16+
python script/pulid_extract_id.py \\
2517
--portrait /path/to/source-photo.jpg \\
2618
--pulid-weights /path/to/pulid_flux_v0.9.1.safetensors \\
2719
--out /path/to/source.pulidembd
@@ -35,21 +27,7 @@
3527
import argparse
3628
import os
3729
import sys
38-
39-
40-
def _make_minimal_flux_skeleton(device):
41-
"""PuLIDPipeline expects a `dit` (Flux transformer) to attach its
42-
PerceiverAttentionCA modules to during construction. We never run a
43-
forward pass on it -- the encoders alone (which is what we actually
44-
need) live on the pipeline object, not the dit. So we instantiate a
45-
real Flux skeleton with default params and never load its weights."""
46-
import torch
47-
from flux.model import Flux
48-
from flux.util import configs
49-
50-
with torch.device("cpu"):
51-
model = Flux(configs["flux-dev"].params).to(torch.bfloat16)
52-
return model
30+
from types import SimpleNamespace
5331

5432

5533
def extract(portrait_path: str, pulid_weights: str) -> "torch.Tensor":
@@ -65,18 +43,17 @@ def extract(portrait_path: str, pulid_weights: str) -> "torch.Tensor":
6543

6644
print(f"device={device}", flush=True)
6745

68-
print("constructing minimal Flux skeleton (no weights loaded)", flush=True)
69-
dit = _make_minimal_flux_skeleton(device)
70-
71-
print("instantiating PuLIDPipeline", flush=True)
72-
pulid = PuLIDPipeline(dit=dit, device=device,
46+
# PuLIDPipeline only attaches pulid_ca attributes to `dit` during
47+
# construction; get_id_embedding() never runs Flux, so a dummy object is
48+
# enough and avoids importing/building a Flux skeleton.
49+
print("instantiating PuLIDPipeline with a dummy Flux object", flush=True)
50+
dit = SimpleNamespace()
51+
pulid = PuLIDPipeline(dit=dit,
52+
device=device,
7353
weight_dtype=torch.bfloat16,
7454
onnx_provider=onnx_provider)
7555

7656
print(f"loading PuLID weights from {pulid_weights}", flush=True)
77-
# PuLIDPipeline.load_pretrain expects a "version" string used to construct
78-
# the default filename when pretrain_path is None. We pass the file
79-
# directly so the version string is informational only.
8057
pulid.load_pretrain(pretrain_path=pulid_weights, version="v0.9.1")
8158

8259
print(f"extracting ID embedding from {portrait_path}", flush=True)
@@ -100,10 +77,6 @@ def write_embd(tensor, out_path: str, dtype_choice: str) -> None:
10077

10178
os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
10279

103-
# The embedding ships as a standard gguf container holding a single tensor
104-
# named "pulid_id". numpy is row-major (num_tokens, token_dim); gguf stores
105-
# dims reversed, so stable-diffusion.cpp reads it back as
106-
# ne[0]=token_dim, ne[1]=num_tokens (see load_pulid_id_embedding).
10780
writer = gguf.GGUFWriter(out_path, arch="pulid")
10881
writer.add_uint32("pulid.version", 1)
10982

src/extensions/pulid_extension.cpp

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,15 @@
77
#include "core/util.h"
88
#include "gguf.h"
99

10-
// Load the precomputed PuLID identity embedding produced by
11-
// scripts/pulid_extract_id.py into a sd::Tensor<float> (always materialized as
12-
// fp32 for the diffusion path). Returns an empty tensor on any failure (the
13-
// caller treats empty as "PuLID off").
14-
//
15-
// The file is a standard gguf container holding a single tensor named
16-
// "pulid_id" with shape [token_dim, num_tokens] (ggml order; typically
17-
// [2048, 32]) in f16 / bf16 / f32. Using gguf rather than a bespoke header
18-
// means the shape + dtype are self-describing and we reuse ggml's reader.
1910
static sd::Tensor<float> load_pulid_id_embedding(const char* path) {
2011
sd::Tensor<float> empty;
2112
if (path == nullptr || strlen(path) == 0) {
2213
return empty;
2314
}
2415

25-
struct ggml_context* ctx_data = nullptr;
26-
struct gguf_init_params gp = {/*.no_alloc =*/false, /*.ctx =*/&ctx_data};
27-
struct gguf_context* gguf_ctx = gguf_init_from_file(path, gp);
16+
struct ggml_context* ctx_data = nullptr;
17+
struct gguf_init_params gp = {/*.no_alloc =*/false, /*.ctx =*/&ctx_data};
18+
struct gguf_context* gguf_ctx = gguf_init_from_file(path, gp);
2819
if (gguf_ctx == nullptr || ctx_data == nullptr) {
2920
LOG_WARN("PuLID id-embedding: cannot read gguf '%s'", path);
3021
if (gguf_ctx != nullptr)
@@ -83,20 +74,9 @@ static sd::Tensor<float> load_pulid_id_embedding(const char* path) {
8374
return out;
8475
}
8576

86-
// PuLID-Flux identity injection as a generation extension.
87-
//
88-
// Unlike PhotoMaker, PuLID does NOT modify the conditioning -- it injects an
89-
// identity embedding via cross-attention *inside* the Flux denoise forward (the
90-
// pulid_ca.* blocks). Those cross-attention weights are part of the Flux
91-
// diffusion model and are loaded into the model tensor map before the model is
92-
// constructed (see SDImpl ctor, gated on sd_ctx_params.pulid_weights_path), so
93-
// this extension does not own a separate model. Its job is purely runtime:
94-
// - prepare_condition: load the per-generation id-embedding file.
95-
// - before_diffusion: hand that embedding (+ weight) to FluxDiffusionExtra,
96-
// which flux.hpp reads to drive the pulid_ca injection.
9777
struct PuLIDExtension : public GenerationExtension {
9878
bool enabled = false;
99-
sd::Tensor<float> id_embedding; // per-generation; empty when PuLID is off for this request
79+
sd::Tensor<float> id_embedding;
10080
float id_weight = 1.0f;
10181

10282
const char* name() const override {

0 commit comments

Comments
 (0)