Skip to content

Commit 02f0637

Browse files
wbrunaleejet
andauthored
refactor: call CPU backend functions dynamically (#1591)
Co-authored-by: leejet <leejet714@gmail.com>
1 parent f8935d6 commit 02f0637

17 files changed

Lines changed: 90 additions & 61 deletions

src/convert.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include "model_io/safetensors_io.h"
99
#include "util.h"
1010

11-
#include "ggml-cpu.h"
11+
#include "ggml_extend_backend.h"
1212

1313
static ggml_type get_export_tensor_type(ModelLoader& model_loader,
1414
const TensorStorage& tensor_storage,
@@ -103,7 +103,7 @@ bool convert(const char* input_path,
103103
bool output_is_safetensors = ends_with(output_path, ".safetensors");
104104
TensorTypeRules type_rules = parse_tensor_type_rules(tensor_type_rules);
105105

106-
auto backend = ggml_backend_cpu_init();
106+
auto backend = sd_backend_cpu_init();
107107
size_t mem_size = 1 * 1024 * 1024; // for padding
108108
mem_size += model_loader.get_tensor_storage_map().size() * ggml_tensor_overhead();
109109
mem_size += model_loader.get_params_mem_size(backend, type);

src/flux.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1567,7 +1567,7 @@ namespace Flux {
15671567

15681568
static void load_from_file_and_test(const std::string& file_path) {
15691569
// ggml_backend_t backend = ggml_backend_cuda_init(0);
1570-
ggml_backend_t backend = ggml_backend_cpu_init();
1570+
ggml_backend_t backend = sd_backend_cpu_init();
15711571
ggml_type model_data_type = GGML_TYPE_COUNT;
15721572

15731573
ModelLoader model_loader;

src/ggml_extend.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,7 +1442,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_group_norm(ggml_context* ctx,
14421442

14431443
__STATIC_INLINE__ void ggml_ext_backend_tensor_get_and_sync(ggml_backend_t backend, const ggml_tensor* tensor, void* data, size_t offset, size_t size) {
14441444
if ((sd_backend_is(backend, "ROCm") || sd_backend_is(backend, "CUDA") || sd_backend_is(backend, "SYCL")) &&
1445-
!ggml_backend_is_cpu(backend)) {
1445+
!sd_backend_is_cpu(backend)) {
14461446
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
14471447
ggml_backend_synchronize(backend);
14481448
return;
@@ -1899,7 +1899,7 @@ struct GGMLRunner {
18991899
LOG_DEBUG("%s compute buffer size: %.2f MB(%s)",
19001900
get_desc().c_str(),
19011901
compute_buffer_size / 1024.0 / 1024.0,
1902-
ggml_backend_is_cpu(runtime_backend) ? "RAM" : "VRAM");
1902+
sd_backend_is_cpu(runtime_backend) ? "RAM" : "VRAM");
19031903
return true;
19041904
}
19051905

@@ -1986,7 +1986,7 @@ struct GGMLRunner {
19861986
LOG_DEBUG("%s cache backend buffer size = % 6.2f MB(%s) (%i tensors)",
19871987
get_desc().c_str(),
19881988
cache_buffer_size / (1024.f * 1024.f),
1989-
ggml_backend_is_cpu(runtime_backend) ? "RAM" : "VRAM",
1989+
sd_backend_is_cpu(runtime_backend) ? "RAM" : "VRAM",
19901990
num_tensors);
19911991
if (old_cache_buffer != nullptr) {
19921992
ggml_backend_buffer_free(old_cache_buffer);
@@ -2293,13 +2293,13 @@ struct GGMLRunner {
22932293
max_graph_vram_bytes > 0 &&
22942294
plan.segments.size() > 1 &&
22952295
params_backend != runtime_backend &&
2296-
!ggml_backend_is_cpu(runtime_backend);
2296+
!sd_backend_is_cpu(runtime_backend);
22972297
}
22982298

22992299
bool can_attempt_graph_cut_segmented_compute() const {
23002300
return max_graph_vram_bytes > 0 &&
23012301
params_backend != runtime_backend &&
2302-
!ggml_backend_is_cpu(runtime_backend);
2302+
!sd_backend_is_cpu(runtime_backend);
23032303
}
23042304

23052305
bool resolve_graph_cut_plan(ggml_cgraph* gf,
@@ -2436,8 +2436,8 @@ struct GGMLRunner {
24362436
int64_t t_copy_begin = ggml_time_ms();
24372437
copy_data_to_backend_tensor(gf, !preserve_backend_tensor_data_map);
24382438
int64_t t_copy_end = ggml_time_ms();
2439-
if (ggml_backend_is_cpu(runtime_backend)) {
2440-
ggml_backend_cpu_set_n_threads(runtime_backend, n_threads);
2439+
if (sd_backend_is_cpu(runtime_backend)) {
2440+
sd_backend_cpu_set_n_threads(runtime_backend, n_threads);
24412441
}
24422442

24432443
int64_t t_compute_begin = ggml_time_ms();
@@ -2679,7 +2679,7 @@ struct GGMLRunner {
26792679
LOG_DEBUG("%s params backend buffer size = % 6.2f MB(%s) (%i tensors)",
26802680
get_desc().c_str(),
26812681
params_buffer_size / (1024.f * 1024.f),
2682-
ggml_backend_is_cpu(params_backend) ? "RAM" : "VRAM",
2682+
sd_backend_is_cpu(params_backend) ? "RAM" : "VRAM",
26832683
num_tensors);
26842684
return true;
26852685
}
@@ -2746,7 +2746,7 @@ struct GGMLRunner {
27462746
return nullptr;
27472747
}
27482748
// it's performing a compute, check if backend isn't cpu
2749-
if (!ggml_backend_is_cpu(runtime_backend) && (tensor->buffer == nullptr || ggml_backend_buffer_is_host(tensor->buffer))) {
2749+
if (!sd_backend_is_cpu(runtime_backend) && (tensor->buffer == nullptr || ggml_backend_buffer_is_host(tensor->buffer))) {
27502750
// pass input tensors to gpu memory
27512751
auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor);
27522752

src/ggml_extend_backend.cpp

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <stdexcept>
99
#include <vector>
1010

11+
#include "stable-diffusion.h"
1112
#include "util.h"
1213

1314
static std::string trim_copy(const std::string& value) {
@@ -300,6 +301,61 @@ static ggml_backend_t init_named_backend(const std::string& name) {
300301
return ggml_backend_init_by_name(resolved.c_str(), nullptr);
301302
}
302303

304+
bool sd_backend_is_cpu(ggml_backend_t backend) {
305+
if (backend == nullptr) {
306+
return false;
307+
}
308+
auto dev = ggml_backend_get_device(backend);
309+
return dev != nullptr && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU;
310+
}
311+
312+
ggml_backend_t sd_backend_cpu_init() {
313+
ggml_backend_load_all_once();
314+
return ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
315+
}
316+
317+
bool sd_backend_cpu_set_n_threads(ggml_backend_t backend, int n_threads) {
318+
if (backend == nullptr) {
319+
return false;
320+
}
321+
auto dev = ggml_backend_get_device(backend);
322+
if (dev != nullptr && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) {
323+
auto reg = ggml_backend_dev_backend_reg(dev);
324+
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t)ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
325+
if (ggml_backend_set_n_threads_fn != nullptr) {
326+
ggml_backend_set_n_threads_fn(backend, n_threads);
327+
return true;
328+
}
329+
}
330+
return false;
331+
}
332+
333+
const char* sd_get_system_info() {
334+
static std::string cache_info = []() -> std::string {
335+
ggml_backend_load_all_once();
336+
std::stringstream ss;
337+
ss << "System Info: \n";
338+
auto dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
339+
if (dev != nullptr) {
340+
auto reg = ggml_backend_dev_backend_reg(dev);
341+
auto ggml_backend_get_features_fn = (ggml_backend_get_features_t)ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features");
342+
if (ggml_backend_get_features_fn != nullptr) {
343+
ggml_backend_feature* feat = ggml_backend_get_features_fn(reg);
344+
while (feat->name && feat->value) {
345+
ss << " " << feat->name << " = " << feat->value << " | ";
346+
feat++;
347+
}
348+
} else {
349+
LOG_WARN("unable to get CPU features");
350+
}
351+
} else {
352+
LOG_WARN("unable to get CPU features");
353+
}
354+
return ss.str();
355+
}();
356+
return cache_info.c_str();
357+
}
358+
303359
static ggml_backend_t sd_get_default_backend() {
304360
ggml_backend_load_all_once();
305361
static std::once_flag once;
@@ -349,10 +405,10 @@ static ggml_backend_t sd_get_default_backend() {
349405

350406
if (!backend) {
351407
LOG_WARN("loading CPU backend");
352-
backend = ggml_backend_cpu_init();
408+
backend = sd_backend_cpu_init();
353409
}
354410

355-
if (ggml_backend_is_cpu(backend)) {
411+
if (sd_backend_is_cpu(backend)) {
356412
LOG_DEBUG("Using CPU backend");
357413
}
358414

@@ -452,19 +508,19 @@ ggml_backend_t SDBackendManager::params_backend(SDBackendModule module) {
452508
}
453509

454510
bool SDBackendManager::runtime_backend_is_cpu(SDBackendModule module) {
455-
return ggml_backend_is_cpu(runtime_backend(module));
511+
return sd_backend_is_cpu(runtime_backend(module));
456512
}
457513

458514
bool SDBackendManager::params_backend_is_cpu(SDBackendModule module) {
459-
return ggml_backend_is_cpu(params_backend(module));
515+
return sd_backend_is_cpu(params_backend(module));
460516
}
461517

462518
bool SDBackendManager::runtime_backend_supports_host_buffer(SDBackendModule module) {
463519
ggml_backend_t backend = runtime_backend(module);
464520
if (backend == nullptr) {
465521
return false;
466522
}
467-
if (ggml_backend_is_cpu(backend)) {
523+
if (sd_backend_is_cpu(backend)) {
468524
return true;
469525
}
470526
ggml_backend_dev_t dev = ggml_backend_get_device(backend);

src/ggml_extend_backend.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include <unordered_map>
99

1010
#include "ggml-backend.h"
11-
#include "ggml-cpu.h"
1211
#include "ggml.h"
1312

1413
enum class SDBackendModule {
@@ -72,6 +71,9 @@ class SDBackendManager {
7271
};
7372

7473
bool sd_backend_is(ggml_backend_t backend, const std::string& name);
74+
bool sd_backend_is_cpu(ggml_backend_t backend);
75+
ggml_backend_t sd_backend_cpu_init();
76+
bool sd_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
7577
const char* sd_backend_module_name(SDBackendModule module);
7678
void ggml_ext_im_set_f32_1d(const struct ggml_tensor* tensor, int i, float value);
7779
#endif

src/llm.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2041,7 +2041,7 @@ namespace LLM {
20412041
static void load_from_file_and_test(const std::string& file_path) {
20422042
// cpu f16: pass
20432043
// ggml_backend_t backend = ggml_backend_cuda_init(0);
2044-
ggml_backend_t backend = ggml_backend_cpu_init();
2044+
ggml_backend_t backend = sd_backend_cpu_init();
20452045
ggml_type model_data_type = GGML_TYPE_COUNT;
20462046

20472047
ModelLoader model_loader;

src/lora.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ struct LoraModel : public GGMLRunner {
772772
}
773773

774774
ggml_tensor* original_tensor = model_tensor;
775-
if (!ggml_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
775+
if (!sd_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
776776
model_tensor = ggml_dup_tensor(compute_ctx, model_tensor);
777777
set_backend_tensor_data(model_tensor, original_tensor->data);
778778
}
@@ -786,7 +786,7 @@ struct LoraModel : public GGMLRunner {
786786
final_tensor = ggml_add_inplace(compute_ctx, model_tensor, diff);
787787
}
788788
ggml_build_forward_expand(gf, final_tensor);
789-
if (!ggml_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
789+
if (!sd_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
790790
original_tensor_to_final_tensor[original_tensor] = final_tensor;
791791
}
792792
}

src/ltx_audio_vae.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1052,7 +1052,7 @@ namespace LTXV {
10521052
static void load_from_file_and_test(const std::string& model_path,
10531053
const std::string& input_path,
10541054
const std::string& prefix = "") {
1055-
ggml_backend_t backend = ggml_backend_cpu_init();
1055+
ggml_backend_t backend = sd_backend_cpu_init();
10561056
// ggml_backend_t backend = ggml_backend_cuda_init(0);
10571057
LOG_INFO("loading ltx audio vae from '%s'", model_path.c_str());
10581058

src/ltx_vae.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1517,7 +1517,7 @@ struct LTXVideoVAE : public VAE {
15171517
static void load_from_file_and_test(const std::string& model_path,
15181518
const std::string& input_path) {
15191519
// ggml_backend_t backend = ggml_backend_cuda_init(0);
1520-
ggml_backend_t backend = ggml_backend_cpu_init();
1520+
ggml_backend_t backend = sd_backend_cpu_init();
15211521
LOG_INFO("loading ltx vae from '%s'", model_path.c_str());
15221522

15231523
ModelLoader model_loader;

src/ltxv.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1995,7 +1995,7 @@ namespace LTXV {
19951995
const std::string& audio_x_path = "",
19961996
const std::string& audio_timesteps_path = "") {
19971997
// ggml_backend_t backend = ggml_backend_cuda_init(0);
1998-
ggml_backend_t backend = ggml_backend_cpu_init();
1998+
ggml_backend_t backend = sd_backend_cpu_init();
19991999
LOG_INFO("loading ltxav from '%s'", model_path.c_str());
20002000

20012001
ModelLoader model_loader;

0 commit comments

Comments
 (0)