Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

llama : expose API to retrieve devices associated with the model. #12073

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,8 @@ extern "C" {

LLAMA_API const struct llama_model * llama_get_model (const struct llama_context * ctx);
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
LLAMA_API size_t llama_n_backends(const struct llama_context * ctx);
LLAMA_API size_t llama_get_backends(const struct llama_context * ctx, ggml_backend_t * out_buf, size_t out_len);

LLAMA_API const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model);
LLAMA_API enum llama_rope_type llama_model_rope_type(const struct llama_model * model);
Expand All @@ -479,6 +481,7 @@ extern "C" {
LLAMA_API int32_t llama_model_n_layer (const struct llama_model * model);
LLAMA_API int32_t llama_model_n_head (const struct llama_model * model);
LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model);
LLAMA_API const ggml_backend_dev_t * llama_model_get_devices (const struct llama_model * model, size_t * out_len);

// Get the model's RoPE frequency scaling factor
LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);
Expand Down
12 changes: 12 additions & 0 deletions src/llama-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,18 @@ enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {
return ctx->cparams.pooling_type;
}

size_t llama_n_backends(const struct llama_context * ctx) {
return ctx->backends.size();
}

size_t llama_get_backends(const struct llama_context * ctx, ggml_backend_t * out, size_t out_len) {
size_t return_len = std::min(ctx->backends.size(), out_len);
for (size_t i = 0; i < return_len; i++) {
out[i] = ctx->backends[i].get();
}
return return_len;
}

void llama_attach_threadpool(
struct llama_context * ctx,
ggml_threadpool_t threadpool,
Expand Down
5 changes: 5 additions & 0 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3845,6 +3845,11 @@ int32_t llama_model_n_head_kv(const struct llama_model * model) {
return model->hparams.n_head_kv();
}

const ggml_backend_dev_t * llama_model_get_devices(const struct llama_model * model, size_t * out_len) {
*out_len = model->devices.size();
return model->devices.data();
}

// deprecated
int32_t llama_n_ctx_train(const struct llama_model * model) {
return llama_model_n_ctx_train(model);
Expand Down
Loading