Skip to content

Commit be38437

Browse files
committed
Finally adapt to the newest master
1 parent d13cf71 commit be38437

13 files changed

Lines changed: 1593 additions & 94 deletions

File tree

examples/common/common.cpp

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,18 @@ ArgOptions SDContextParams::get_options() {
423423
"--params-backend",
424424
"parameter backend assignment, e.g. cpu or diffusion=cpu,clip=cpu",
425425
&params_backend},
426+
{"",
427+
"--multi-gpu-mode",
428+
"how to split a too-large DiT across GPUs (auto-fit): "
429+
"row (matmul rows, CUDA/SYCL), layer (whole blocks, generic), or off "
430+
"(default: row)",
431+
&multi_gpu_mode},
432+
{"",
433+
"--fit-compute-reserve",
434+
"auto-fit: per-component compute-buffer reserve in MiB as a component "
435+
"map, e.g. dit=2048,vae=1024,cond=512 (missing keys keep the built-in "
436+
"defaults)",
437+
&fit_compute_reserve},
426438
};
427439

428440
options.int_options = {
@@ -439,19 +451,6 @@ ArgOptions SDContextParams::get_options() {
439451
"--fit-target",
440452
"auto-fit: MiB of free memory to leave on each GPU (default: 512)",
441453
&auto_fit_target_mb},
442-
{"",
443-
"--fit-compute-reserve-dit",
444-
"auto-fit: MiB reserved on the DiT's GPU for its compute buffer "
445-
"(0 keeps the built-in default)",
446-
&auto_fit_compute_reserve_dit_mb},
447-
{"",
448-
"--fit-compute-reserve-vae",
449-
"auto-fit: MiB reserved on the VAE's GPU for its compute buffer",
450-
&auto_fit_compute_reserve_vae_mb},
451-
{"",
452-
"--fit-compute-reserve-cond",
453-
"auto-fit: MiB reserved on the conditioner's GPU for its compute buffer",
454-
&auto_fit_compute_reserve_cond_mb},
455454
};
456455

457456
options.float_options = {
@@ -518,6 +517,18 @@ ArgOptions SDContextParams::get_options() {
518517
"--chroma-enable-t5-mask",
519518
"enable t5 mask for chroma",
520519
true, &chroma_use_t5_mask},
520+
{"",
521+
"--control-net-cpu",
522+
"keep controlnet in cpu (deprecated alias for --backend control_net=cpu)",
523+
true, &control_net_cpu},
524+
{"",
525+
"--clip-on-cpu",
526+
"keep clip in cpu (deprecated alias for --backend clip=cpu)",
527+
true, &clip_on_cpu},
528+
{"",
529+
"--vae-on-cpu",
530+
"keep vae in cpu (deprecated alias for --backend vae=cpu)",
531+
true, &vae_on_cpu},
521532
{"",
522533
"--auto-fit",
523534
"automatically pick DiT/VAE/Conditioner device placements based on "
@@ -771,7 +782,9 @@ std::string SDContextParams::to_string() const {
771782
<< " auto_fit: " << (auto_fit ? "true" : "false") << ",\n"
772783
<< " auto_fit_target_mb: " << auto_fit_target_mb << ",\n"
773784
<< " auto_fit_dry_run: " << (auto_fit_dry_run ? "true" : "false") << ",\n"
785+
<< " fit_compute_reserve: \"" << fit_compute_reserve << "\",\n"
774786
<< " auto_multi_gpu: " << (auto_multi_gpu ? "true" : "false") << ",\n"
787+
<< " multi_gpu_mode: \"" << multi_gpu_mode << "\",\n"
775788
<< " flash_attn: " << (flash_attn ? "true" : "false") << ",\n"
776789
<< " diffusion_flash_attn: " << (diffusion_flash_attn ? "true" : "false") << ",\n"
777790
<< " diffusion_conv_direct: " << (diffusion_conv_direct ? "true" : "false") << ",\n"
@@ -791,6 +804,30 @@ std::string SDContextParams::to_string() const {
791804
}
792805

793806
sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool free_params_immediately, bool taesd_preview) {
807+
// Fold the deprecated --*-on-cpu aliases into the generic backend spec.
808+
// They are prepended so explicit --backend entries take precedence.
809+
std::string alias_spec;
810+
if (control_net_cpu) {
811+
alias_spec += "control_net=cpu,";
812+
}
813+
if (clip_on_cpu) {
814+
alias_spec += "clip=cpu,";
815+
}
816+
if (vae_on_cpu) {
817+
alias_spec += "vae=cpu,";
818+
}
819+
if (!alias_spec.empty()) {
820+
backend = alias_spec + backend;
821+
if (backend.back() == ',') {
822+
backend.pop_back();
823+
}
824+
control_net_cpu = false;
825+
clip_on_cpu = false;
826+
vae_on_cpu = false;
827+
printf("warning: --clip-on-cpu / --vae-on-cpu / --control-net-cpu are deprecated, use --backend instead (folded into --backend \"%s\")\n",
828+
backend.c_str());
829+
}
830+
794831
embedding_vec.clear();
795832
embedding_vec.reserve(embedding_map.size());
796833
for (const auto& kv : embedding_map) {
@@ -850,10 +887,9 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f
850887
auto_fit,
851888
auto_fit_target_mb,
852889
auto_fit_dry_run,
853-
auto_fit_compute_reserve_dit_mb,
854-
auto_fit_compute_reserve_vae_mb,
855-
auto_fit_compute_reserve_cond_mb,
890+
fit_compute_reserve.c_str(),
856891
auto_multi_gpu,
892+
multi_gpu_mode.c_str(),
857893
};
858894
return sd_ctx_params;
859895
}

examples/common/common.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,21 @@ struct SDContextParams {
165165
bool qwen_image_zero_cond_t = false;
166166

167167
// Auto-fit defaults — placement is computed automatically based on free
168-
// VRAM. Pass --no-auto-fit to disable and use explicit *-backend-device.
169-
bool auto_fit = true;
170-
int auto_fit_target_mb = 512;
171-
bool auto_fit_dry_run = false;
172-
int auto_fit_compute_reserve_dit_mb = 0;
173-
int auto_fit_compute_reserve_vae_mb = 0;
174-
int auto_fit_compute_reserve_cond_mb = 0;
175-
bool auto_multi_gpu = true;
168+
// VRAM. Pass --no-auto-fit to disable and use explicit --backend specs.
169+
bool auto_fit = true;
170+
int auto_fit_target_mb = 512;
171+
bool auto_fit_dry_run = false;
172+
// Per-component compute-buffer reserve in MiB as a component map,
173+
// e.g. "dit=2048,vae=1024,cond=512"; missing keys keep built-in defaults.
174+
std::string fit_compute_reserve;
175+
bool auto_multi_gpu = true;
176+
std::string multi_gpu_mode = "row";
177+
178+
// Deprecated aliases for --backend <component>=cpu (kept for
179+
// backwards compatibility with the pre-auto-fit CLI).
180+
bool control_net_cpu = false;
181+
bool clip_on_cpu = false;
182+
bool vae_on_cpu = false;
176183

177184
prediction_t prediction = PREDICTION_COUNT;
178185
lora_apply_mode_t lora_apply_mode = LORA_APPLY_AUTO;

ggml

Submodule ggml updated 392 files

include/stable-diffusion.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,21 +230,28 @@ typedef struct {
230230
// into the same backend assignment that `backend` / `params_backend` use).
231231
// `auto_fit_target_mb` is the memory to leave free per GPU (default 512).
232232
// `auto_fit_dry_run` prints the plan and aborts init before loading.
233-
// `auto_fit_compute_reserve_{dit,vae,cond}_mb` let the user tune the
234-
// per-component compute-buffer reserve; 0 means use the built-in default.
233+
// `auto_fit_compute_reserve` tunes the per-component compute-buffer
234+
// reserve in MiB as a component map, e.g. "dit=2048,vae=1024,cond=512"
235+
// (same component-key style as `backend`); missing keys / NULL keep the
236+
// built-in defaults.
235237
bool auto_fit;
236238
int auto_fit_target_mb;
237239
bool auto_fit_dry_run;
238-
int auto_fit_compute_reserve_dit_mb;
239-
int auto_fit_compute_reserve_vae_mb;
240-
int auto_fit_compute_reserve_cond_mb;
240+
const char* auto_fit_compute_reserve;
241241

242242
// When more than one GPU device is present, prefer placing different
243243
// components on different GPUs to balance load and fit larger total
244244
// working sets. Set false to keep all components on a single GPU when
245245
// they fit. Defaults to true. Each component still lives entirely on
246-
// one device — no intra-tensor row split.
246+
// one device unless multi_gpu_mode splits it (see below).
247247
bool auto_multi_gpu;
248+
249+
// How to split a single component (currently only the DiT) across GPUs
250+
// when it doesn't fit on one but fits across several: "row" (matmul rows
251+
// split via the backend's stock split buffer type, CUDA/SYCL),
252+
// "layer" (whole blocks per GPU, routed by a scheduler, backend-generic),
253+
// or "off" (never split a single component). NULL / empty => "row".
254+
const char* multi_gpu_mode;
248255
} sd_ctx_params_t;
249256

250257
typedef struct {

0 commit comments

Comments
 (0)