File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -2999,7 +2999,18 @@ struct GGMLRunner {
29992999 LOG_DEBUG (" %s skipping params allocation (no tensors)" , get_desc ().c_str ());
30003000 return true ;
30013001 }
3002- params_buffer = ggml_backend_alloc_ctx_tensors (params_ctx, params_backend);
3002+ // Pinned host buffer when CPU-offloaded for DMA-direct H2D.
3003+ ggml_backend_buffer_type_t params_buft = nullptr ;
3004+ if (params_backend != runtime_backend) {
3005+ ggml_backend_dev_t runtime_dev = ggml_backend_get_device (runtime_backend);
3006+ if (runtime_dev != nullptr ) {
3007+ params_buft = ggml_backend_dev_host_buffer_type (runtime_dev);
3008+ }
3009+ }
3010+ if (params_buft == nullptr ) {
3011+ params_buft = ggml_backend_get_default_buffer_type (params_backend);
3012+ }
3013+ params_buffer = ggml_backend_alloc_ctx_tensors_from_buft (params_ctx, params_buft);
30033014 if (params_buffer == nullptr ) {
30043015 LOG_ERROR (" %s alloc params backend buffer failed, num_tensors = %i" ,
30053016 get_desc ().c_str (),
You can’t perform that action at this time.
0 commit comments