Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions ggml/include/ggml-virtgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
extern "C" {
#endif

#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();

#ifdef __cplusplus
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) {
result.data = reinterpret_cast<uint64_t>(tensor->data);
if (tensor->data) {
if (!tensor->buffer) {
GGML_ABORT("tensor has data but not buffer");
GGML_ABORT("%s: tensor has data but not buffer", __func__);
}
// tensor->data is serialized as an offset to the buffer base address
result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v

const void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) {
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
apir_decoder_set_fatal(dec);
return 1;
}
Expand All @@ -45,7 +45,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
if (dev->iface.supports_op(dev, op)) {
continue;
}
GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));

status = GGML_STATUS_ABORTED;
apir_encode_ggml_status(enc, &status);
Expand Down
12 changes: 8 additions & 4 deletions ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,22 @@ uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);

size_t value = buft->iface.get_max_size(buft);
size_t value = SIZE_MAX;
if (buft->iface.get_max_size) {
value = buft->iface.get_max_size(buft);
}

apir_encode_size_t(enc, &value);

return 0;
}

/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */
uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);
GGML_UNUSED(dec);
const bool is_host = false;

bool is_host = buft->iface.is_host(buft);
apir_encode_bool_t(enc, &is_host);

return 0;
Expand Down
6 changes: 3 additions & 3 deletions ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);

if (!shmem_data) {
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
return 1;
}

Expand Down Expand Up @@ -71,7 +71,7 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl

void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) {
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
return 1;
}

Expand Down Expand Up @@ -121,7 +121,7 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg
buffer = apir_decode_ggml_buffer(dec);

if (!apir_untrack_backend_buffer(buffer)) {
GGML_LOG_WARN("%s: unknown buffer %p\n", __func__, (void *) buffer);
GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: unknown buffer %p\n", __func__, (void *) buffer);
return 1;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec,

void * shmem_ptr = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_ptr) {
GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
apir_decoder_set_fatal(dec);
return 1;
}
Expand Down
8 changes: 4 additions & 4 deletions ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ uint64_t timer_count = 0;

uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) {
if (reg != NULL) {
GGML_LOG_WARN("%s: already initialized\n", __func__);
GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: already initialized\n", __func__);
return APIR_BACKEND_INITIALIZE_ALREADY_INITED;
}
ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p;

reg = ggml_backend_reg_fct();
if (reg == NULL) {
GGML_LOG_ERROR("%s: backend registration failed\n", __func__);
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend registration failed\n", __func__);
return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED;
}

if (!reg->iface.get_device_count(reg)) {
GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__);
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device found\n", __func__);
return APIR_BACKEND_INITIALIZE_NO_DEVICE;
}

dev = reg->iface.get_device(reg, 0);

if (!dev) {
GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__);
GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device received\n", __func__);
return APIR_BACKEND_INITIALIZE_NO_DEVICE;
}

Expand Down
5 changes: 3 additions & 2 deletions ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec,
uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */
uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
Expand Down Expand Up @@ -62,7 +63,7 @@ static inline const char * backend_dispatch_command_name(ApirBackendCommandType
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
return "backend_buffer_type_get_max_size";
case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
return "backend_buffer_type_is_host";
return "backend_buffer_type_is_host (DEPRECATED)";
case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
return "backend_buffer_type_alloc_buffer";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
Expand Down Expand Up @@ -110,7 +111,7 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host /* DEPRECATED */,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size,

Expand Down
2 changes: 2 additions & 0 deletions ggml/src/ggml-virtgpu/backend/backend-dispatched.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "shared/apir_cs.h"
#include "shared/apir_cs_ggml.h"

#define GGML_VIRTGPU_BCK "ggml-virtgpu-backend: "

struct virgl_apir_context {
uint32_t ctx_id;
virgl_apir_callbacks * iface;
Expand Down
32 changes: 18 additions & 14 deletions ggml/src/ggml-virtgpu/backend/backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,8 @@ void apir_backend_deinit(uint32_t virgl_ctx_id) {
buffer->iface.free_buffer(buffer);
}

if (dev) {
size_t free, total;
dev->iface.get_memory(dev, &free, &total);
GGML_LOG_INFO("%s: free memory: %ld MB\n", __func__, (size_t) free / 1024 / 1024);
}

if (backend_library_handle) {
GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.\n", __func__);
GGML_LOG_INFO(GGML_VIRTGPU_BCK "The GGML backend library was loaded. Unloading it.\n");
dlclose(backend_library_handle);
backend_library_handle = NULL;
}
Expand All @@ -65,7 +59,7 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
if (apir_logfile) {
ggml_log_set(log_to_file_callback, apir_logfile);
} else {
GGML_LOG_INFO("Could not open the log file at '%s'\n", apir_log_to_file);
GGML_LOG_INFO(GGML_VIRTGPU_BCK "Could not open the log file at '%s'\n", apir_log_to_file);
}
}

Expand All @@ -74,30 +68,38 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG;

if (!library_name) {
GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
GGML_LOG_ERROR(GGML_VIRTGPU_BCK
"%s: cannot open the GGML library: env var '%s' not defined\n",
__func__, APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);


return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
}

backend_library_handle = dlopen(library_name, RTLD_LAZY);

if (!backend_library_handle) {
GGML_LOG_ERROR("cannot open the GGML library: %s\n", dlerror());
GGML_LOG_ERROR(GGML_VIRTGPU_BCK
"%s: cannot open the GGML library: %s\n", __func__, dlerror());

return APIR_LOAD_LIBRARY_CANNOT_OPEN;
}

if (!library_reg) {
GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
GGML_LOG_ERROR(GGML_VIRTGPU_BCK
"%s: cannot register the GGML library: env var '%s' not defined\n",
__func__, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);

return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
}

void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg);
dlsym_error = dlerror();
if (dlsym_error) {
GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s\n", library_reg,
APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
GGML_LOG_ERROR(GGML_VIRTGPU_BCK
"%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n",
__func__, library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);


return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
}
Expand Down Expand Up @@ -134,7 +136,9 @@ uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id,
};

if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) {
GGML_LOG_ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
GGML_LOG_ERROR(GGML_VIRTGPU_BCK
"%s: Received an invalid dispatch index (%d >= %d)\n",
__func__, cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
return APIR_BACKEND_FORWARD_INDEX_INVALID;
}

Expand Down
11 changes: 6 additions & 5 deletions ggml/src/ggml-virtgpu/backend/shared/apir_cs.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ static inline bool apir_decoder_peek_internal(apir_decoder * dec,
assert(val_size <= size);

if (unlikely(size > (size_t) (dec->end - dec->cur))) {
GGML_LOG_ERROR("reading too much from the decoder ...\n");
GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__);
apir_decoder_set_fatal(dec);
memset(val, 0, val_size);
return false;
Expand All @@ -103,7 +103,7 @@ static inline void apir_decoder_peek(apir_decoder * dec, size_t size, void * val

static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) {
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
GGML_LOG_ERROR("reading too much from the decoder ...\n");
GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__);
apir_decoder_set_fatal(dec);
return NULL;
}
Expand Down Expand Up @@ -221,7 +221,7 @@ static inline uint64_t apir_decode_array_size(apir_decoder * dec, uint64_t expec
uint64_t size;
apir_decode_uint64_t(dec, &size);
if (size != expected_size) {
GGML_LOG_ERROR("Couldn't decode array from the decoder\n");
GGML_LOG_ERROR("%s: Couldn't decode array from the decoder\n", __func__);
apir_decoder_set_fatal(dec);
size = 0;
}
Expand Down Expand Up @@ -322,7 +322,7 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t
if (size) {
val[size - 1] = '\0';
} else {
GGML_LOG_ERROR("Couldn't decode the blog array\n");
GGML_LOG_ERROR("%s: Couldn't decode the blog array\n", __func__);
apir_decoder_set_fatal(dec);
}
}
Expand All @@ -332,7 +332,8 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t
static inline void * apir_decoder_alloc_array(size_t size, size_t count) {
size_t alloc_size;
if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) {
GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes\n", size, count);
GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n",
__func__, size, count);
return NULL;
}

Expand Down
14 changes: 12 additions & 2 deletions ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,17 @@ static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor

static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec);

if (!apir_rpc_tensor) {
return NULL;
}

ggml_init_params params{
/*.mem_size =*/ ggml_tensor_overhead(),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};

ggml_context * ctx = ggml_init(params);

const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor);
Expand Down Expand Up @@ -71,6 +77,10 @@ static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decod
return (ggml_backend_buffer_type_t) handle;
}

static inline void apir_encode_apir_buffer_type_host_handle(apir_encoder * enc, apir_buffer_type_host_handle_t handle) {
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
}

static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) {
apir_buffer_type_host_handle_t handle;

Expand Down Expand Up @@ -154,13 +164,13 @@ static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml
size_t tensor_size = sizeof(*tensor);

if (tensor->extra) {
GGML_ABORT("Cannot pass tensors with extra");
GGML_ABORT("%s: Cannot pass tensors with extra", __func__);
}

if (tensor->src[0] && tensor->buffer) {
static int first = 1;
if (first) {
GGML_LOG_WARN("Cannot pass tensors with src and buffer\n");
GGML_LOG_WARN("%s: Cannot pass tensors with src and buffer\n", __func__);
first = 0;
}
}
Expand Down
Loading
Loading