Skip to content

Commit

Permalink
metal : add perf-metal tool + fix build
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Oct 1, 2024
1 parent b302031 commit 6b30c17
Show file tree
Hide file tree
Showing 12 changed files with 163 additions and 43 deletions.
4 changes: 4 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@ add_subdirectory(sam)
add_subdirectory(yolo)
add_subdirectory(simple)
add_subdirectory(magika)

if (GGML_METAL)
add_subdirectory(perf-metal)
endif()
6 changes: 0 additions & 6 deletions examples/gpt-2/main-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -758,12 +758,6 @@ bool gpt2_eval(
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}

#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif

// run the computation
ggml_backend_graph_compute(model.backend, gf);

Expand Down
5 changes: 0 additions & 5 deletions examples/gpt-2/main-batched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -942,11 +942,6 @@ int gpt2_decode(
if (ggml_backend_is_cpu(model.backend)) {
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}
#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif
ggml_backend_graph_compute(model.backend, gf);

//if (n_past%100 == 0) {
Expand Down
2 changes: 0 additions & 2 deletions examples/gpt-2/main-sched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ void init_backends(gpt2_model & model, const gpt_params & params) {
gpu_backend = ggml_backend_metal_init();
if (!gpu_backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
} else {
ggml_backend_metal_set_n_cb(gpu_backend, params.n_threads);
}
}
#endif
Expand Down
7 changes: 7 additions & 0 deletions examples/perf-metal/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#
# perf-metal

set(TEST_TARGET perf-metal)
add_executable(${TEST_TARGET} perf-metal.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml)

152 changes: 152 additions & 0 deletions examples/perf-metal/perf-metal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// basic tool to experiment with the Metal backend
//
// 1. Get GPU trace of a dummy graph:
//
// rm -rf /tmp/perf-metal.gputrace
// make -j perf-metal && METAL_CAPTURE_ENABLED=1 ./bin/perf-metal
// open /tmp/perf-metal.gputrace
//
// https://github.com/ggerganov/llama.cpp/issues/9507
//

#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "ggml-metal.h"

#include <cstdio>
#include <vector>
#include <thread>

int main(int argc, char ** argv) {
int n_op = 1024;
int n_iter = 128;

if (argc > 1) {
n_op = std::atoi(argv[1]);
}

if (argc > 2) {
n_iter = std::atoi(argv[2]);
}

printf("%s: n_op = %d, n_iter = %d\n", __func__, n_op, n_iter);

const int ne00 = 8;
const int ne01 = 8;
const int ne11 = 8;

std::vector<float> data0(ne00*ne01, 1.0f);
std::vector<float> data1(ne00*ne01, 1.0f/ne00);

ggml_backend_t backend = ggml_backend_metal_init();
if (!backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
return 1;
}

const size_t ctx_size = 2 * ggml_tensor_overhead();

struct ggml_init_params params = {
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};
struct ggml_context * ctx = ggml_init(params);

struct ggml_tensor * t0 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne00, ne01);
struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne00, ne11);

ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx, backend);

ggml_backend_tensor_set(t0, data0.data(), 0, ggml_nbytes(t0));
ggml_backend_tensor_set(t1, data1.data(), 0, ggml_nbytes(t1));

struct ggml_cgraph * gf = NULL;

struct ggml_context * ctx_cgraph = NULL;

// create a dummy compute graph:
//
// x = mul_mat(t0, t1)
// x = x * 1.0f
// x = mul_mat(x, t1)
// x = x * 1.0f
// ... repeat n_op times ...
//
{
struct ggml_init_params params0 = {
/*.mem_size =*/ 4*n_op*ggml_tensor_overhead() + ggml_graph_overhead(),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};
ctx_cgraph = ggml_init(params0);

gf = ggml_new_graph_custom(ctx_cgraph, 4*n_op, false);

struct ggml_tensor * cur = ggml_mul_mat(ctx_cgraph, t0, t1);
cur = ggml_scale(ctx_cgraph, cur, 1.0f);

for (int i = 0; i < n_op - 1; i++) {
cur = ggml_mul_mat(ctx_cgraph, cur, t1);
cur = ggml_scale(ctx_cgraph, cur, 1.0f);
}

cur = ggml_scale(ctx_cgraph, cur, 42.0f);

ggml_build_forward_expand(gf, cur);
}

printf("%s: graph nodes = %d\n", __func__, ggml_graph_n_nodes(gf));

ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend));
ggml_gallocr_alloc_graph(allocr, gf);

{
// warm-up
ggml_backend_graph_compute(backend, gf);

const int64_t t_start = ggml_time_us();

for (int iter = 0; iter < n_iter; iter++) {
ggml_backend_graph_compute(backend, gf);
}

const int64_t t_end = ggml_time_us();

// actual trace
ggml_backend_metal_capture_next_compute(backend);
ggml_backend_graph_compute(backend, gf);
//std::this_thread::sleep_for(std::chrono::milliseconds(1000)); // NOTE: these intervals do not appear in the XCode trace!
ggml_backend_metal_capture_next_compute(backend);
ggml_backend_graph_compute(backend, gf);
//std::this_thread::sleep_for(std::chrono::milliseconds(1000)); // NOTE: these intervals do not appear in the XCode trace!
ggml_backend_metal_capture_next_compute(backend);
ggml_backend_graph_compute(backend, gf);

printf("%s: time = %f ms\n", __func__, (t_end - t_start) / 1000.0 / n_iter);
}

{
struct ggml_tensor * res = ggml_graph_node(gf, -1);

std::vector<float> data(res->ne[0] * res->ne[1], 0.0f);

ggml_backend_tensor_get(res, data.data(), 0, ggml_nbytes(res));

for (int i1 = 0; i1 < res->ne[1]; i1++) {
for (int i0 = 0; i0 < res->ne[0]; i0++) {
printf("%f ", data[i1*res->ne[0] + i0]);
}
printf("\n");
}
}

ggml_free(ctx_cgraph);
ggml_gallocr_free(allocr);
ggml_free(ctx);
ggml_backend_buffer_free(buffer);
ggml_backend_free(backend);

return 0;
}
6 changes: 0 additions & 6 deletions examples/simple/simple-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,6 @@ struct ggml_tensor * compute(const simple_model & model, ggml_gallocr_t allocr)
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}

#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif

ggml_backend_graph_compute(model.backend, gf);

// in this case, the output tensor is the last one in the graph
Expand Down
1 change: 0 additions & 1 deletion src/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -3042,7 +3042,6 @@ static enum ggml_status ggml_metal_graph_compute(
NSError * error = nil;
if (![[MTLCaptureManager sharedCaptureManager] startCaptureWithDescriptor:descriptor error:&error]) {
GGML_METAL_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]);
GGML_ABORT("capture failed");
} else {
[ctx->capture_scope beginScope];
ctx->capture_started = true;
Expand Down
6 changes: 0 additions & 6 deletions tests/test-conv-transpose-1d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,6 @@ struct ggml_cgraph* compute_graph(const test_model & model, ggml_gallocr_t alloc
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}

#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif

ggml_backend_graph_compute(model.backend, gf);

//ggml_graph_print(gf);
Expand Down
6 changes: 0 additions & 6 deletions tests/test-conv1d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,6 @@ struct ggml_cgraph* compute_graph(const test_model & model, ggml_gallocr_t alloc
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}

#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif

ggml_backend_graph_compute(model.backend, gf);

//ggml_graph_print(gf);
Expand Down
6 changes: 0 additions & 6 deletions tests/test-conv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,6 @@ struct ggml_cgraph * compute_graph(const test_model & model, ggml_gallocr_t allo
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}

#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif

ggml_backend_graph_compute(model.backend, gf);

//ggml_graph_print(gf);
Expand Down
5 changes: 0 additions & 5 deletions tests/test-mul-mat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,6 @@ struct ggml_tensor* compute(const test_model & model, ggml_gallocr_t allocr) {
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
}

#ifdef GGML_USE_METAL
if (ggml_backend_is_metal(model.backend)) {
ggml_backend_metal_set_n_cb(model.backend, n_threads);
}
#endif

ggml_backend_graph_compute(model.backend, gf);

Expand Down

0 comments on commit 6b30c17

Please sign in to comment.