diff --git a/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device.cpp b/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device.cpp index 9d68433..f06ab5d 100644 --- a/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device.cpp +++ b/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device.cpp @@ -29,12 +29,11 @@ int main() a_h = static_cast(host_allocator.allocate(N*sizeof(double))); b_h = static_cast(host_allocator.allocate(N*sizeof(double))); - //TODO: fill in the forall statement with the CUDA execution policy. - //TODO: and its block size argument. Then be sure to use RAJA_DEVICE - RAJA::forall< ????? < ?????> >( - RAJA::TypedRangeSegment(0, N), [=] ????? (int i) { - a[i] = 1.0; - b[i] = 1.0; + //TODO: fill in the forall statement with the sequential execution policy. + RAJA::forall< ????? >( + RAJA::TypedRangeSegment(0, N), [=] (int i) { + a_h[i] = 1.0; + b_h[i] = 1.0; } ); diff --git a/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device_solution.cpp b/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device_solution.cpp index 686ef91..a38c8a4 100644 --- a/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device_solution.cpp +++ b/Intro_Tutorial/lessons/06_raja_umpire_host_device/06_raja_umpire_host_device_solution.cpp @@ -28,10 +28,9 @@ int main() a_h = static_cast(host_allocator.allocate(N*sizeof(double))); b_h = static_cast(host_allocator.allocate(N*sizeof(double))); - //TODO: fill in the forall statement with the CUDA execution policy. - //TODO: and its block size argument. Then be sure to use RAJA_DEVICE - RAJA::forall< RAJA::cuda_exec>( - RAJA::TypedRangeSegment(0, N), [=] RAJA_DEVICE (int i) { + //TODO: fill in the forall statement with the sequential execution policy. + RAJA::forall( + RAJA::TypedRangeSegment(0, N), [=] (int i) { a_h[i] = 1.0; b_h[i] = 1.0; } diff --git a/Intro_Tutorial/lessons/06_raja_umpire_host_device/README.md b/Intro_Tutorial/lessons/06_raja_umpire_host_device/README.md index 08ffa7a..c8965a8 100644 --- a/Intro_Tutorial/lessons/06_raja_umpire_host_device/README.md +++ b/Intro_Tutorial/lessons/06_raja_umpire_host_device/README.md @@ -37,7 +37,7 @@ memory resources at the bottom of this README! Now, let's learn how to use Umpire's operations to copy data between CPU and GPU memory in a portable way, using Umpire's memory resources. -In `07_raja_umpire_host_device.cpp`, we create an allocator for the GPU with: +In `06_raja_umpire_host_device.cpp`, we create an allocator for the GPU with: ``` auto allocator = rm.getAllocator("DEVICE"); ``` @@ -66,7 +66,7 @@ void umpire::ResourceManager::copy (void* dst_ptr, void * src_ptr, std::size_t s *Note:* The destination is the first argument. -In the file `07_raja_umpire_host_device.cpp`, there is a `TODO` comment where you should insert two copy +In the file `06_raja_umpire_host_device.cpp`, there is a `TODO` comment where you should insert two copy calls to copy data from the CPU memory to the DEVICE memory. You will also find that we are adjusting the `RAJA::forall` to now work on the GPU. @@ -88,8 +88,8 @@ be sure to check out the links at the bottom of this README. When you are done editing the file, compile and run it: ``` -$ make 07_raja_umpire_host_device -$ ./bin/07_raja_umpire_host_device +$ make 06_raja_umpire_host_device +$ ./bin/06_raja_umpire_host_device ``` Want to learn more about Umpire memory resources? Check out the list below! You can also learn more by going to our online documentation on diff --git a/README.md b/README.md index 64aeec6..c30606d 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ cmake -DCMAKE_CXX_COMPILER=g++ -DBLT_CXX_STD=c++17 -DENABLE_CUDA=Off -DENABLE_OP module load cmake/3.23.1 module load gcc/8.3.1 module load cuda/11.2.0 -cmake -DBLT_CXX_STD=c++14 -DENABLE_CUDA=On -DENABLE_OPENMP=On -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-11.2.0/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-11.2.0 -DCMAKE_CUDA_FLAGS=--extended-lambda -DRAJA_ENABLE_EXERCISES=Off -DCMAKE_BUILD_TYPE=Release .. +cmake -DBLT_CXX_STD=c++17 -DENABLE_CUDA=On -DENABLE_OPENMP=On -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-11.2.0/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-11.2.0 -DCMAKE_CUDA_FLAGS=--extended-lambda -DRAJA_ENABLE_EXERCISES=Off -DCMAKE_BUILD_TYPE=Release .. ``` License