diff --git a/src/basic/MAT_MAT_SHARED-Hip.cpp b/src/basic/MAT_MAT_SHARED-Hip.cpp index 9c58d9267..9ccc011da 100644 --- a/src/basic/MAT_MAT_SHARED-Hip.cpp +++ b/src/basic/MAT_MAT_SHARED-Hip.cpp @@ -86,7 +86,7 @@ void MAT_MAT_SHARED::runHipVariantImpl(VariantID vid) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { auto mat_mat_shared_lambda = [=] __device__() { - + for (RepIndex_type extra_rep = 0; extra_rep < extra_kernel_reps; ++extra_rep) { auto outer_y = [&](Index_type by) { auto outer_x = [&](Index_type bx) { MAT_MAT_SHARED_BODY_0(tile_size) @@ -173,6 +173,7 @@ void MAT_MAT_SHARED::runHipVariantImpl(VariantID vid) Index_type by = blockIdx.y; if(by < Ny) outer_y(by); } + } }; RPlaunchHipKernel( (lambda_hip >( res, RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { - TRIAD_BODY; - }); + for (RepIndex_type extra_rep = 0; extra_rep < extra_kernel_reps; ++extra_rep) { + TRIAD_BODY; + } + + }); } + stopTimer(); } else { diff --git a/src/stream/TRIAD.hpp b/src/stream/TRIAD.hpp index f901314a4..c555db7bb 100644 --- a/src/stream/TRIAD.hpp +++ b/src/stream/TRIAD.hpp @@ -26,6 +26,7 @@ #define TRIAD_BODY \ a[i] = b[i] + alpha * c[i] ; +constexpr int extra_kernel_reps = 20; #include "common/KernelBase.hpp"