Skip to content

Commit bbac6a2

Browse files
ggml: fix cuda kernel launch configuration for k_compute_batched_ptrs to support large batch (#16744)
* fix k_compute_batched_ptrs * add backend ops test * Update ggml/src/ggml-cuda/ggml-cuda.cu Co-authored-by: Johannes Gäßler <[email protected]> * reduce the batch size --------- Co-authored-by: Johannes Gäßler <[email protected]>
1 parent 73a48c9 commit bbac6a2

File tree

2 files changed

+12
-2
lines changed

2 files changed

+12
-2
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,8 +1957,15 @@ static void ggml_cuda_mul_mat_batched_cublas_impl(ggml_backend_cuda_context & ct
19571957

19581958
size_t src1_stride_size = sizeof(cuda_t);
19591959

1960-
dim3 block_dims(ne13, ne12);
1961-
k_compute_batched_ptrs<<<1, block_dims, 0, main_stream>>>(
1960+
const int threads_x = 16;
1961+
const int threads_y = 16;
1962+
dim3 block_dims(threads_x, threads_y);
1963+
1964+
dim3 grid_dims(
1965+
(ne13 + threads_x - 1) / threads_x,
1966+
(ne12 + threads_y - 1) / threads_y
1967+
);
1968+
k_compute_batched_ptrs<<<grid_dims, block_dims, 0, main_stream>>>(
19621969
src0_ptr, src1_ptr, dst_t,
19631970
ptrs_src.get(), ptrs_dst.get(),
19641971
ne12, ne13,

tests/test-backend-ops.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6697,6 +6697,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
66976697
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 1024, {3, 2}, {1, 1}));
66986698
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 1024, {3, 2}, {1, 1}));
66996699
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 1024, {3, 2}, {1, 1}));
6700+
6701+
// test cases with large batch size
6702+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {1536, 1}, {1, 1}));
67006703
}
67016704
}
67026705
for (ggml_type type_a : other_types) {

0 commit comments

Comments
 (0)