ICLDisco · therault · Jun 14, 2023 · Nov 7, 2023 · Nov 7, 2023 · abouteiller
@@ -0,0 +1,36 @@
+spack:
+  definitions:
+  - pkgs:
+    - [email protected]
+    - git
+    - patch
+    - flex
+    - bison
+    - hwloc
+    - unzip
+    - python@3
+    - py-pip
+    - py-pandas
+    - py-matplotlib
+    - py-tables
+    - py-networkx
+    - py-cython
+    - py-wheel
+    - cmake
+    - ninja
+    - [email protected]
+    - openmpi
+    - [email protected]+headers
+    - hip
+
+  view: true
+  specs:
+    - matrix:
+      - [$pkgs]
+  packages:
+     binutils:
+       buildable: false
+       externals:
+       - spec: [email protected]
+         prefix: /usr
+
@@ -0,0 +1,35 @@
+spack:
+  definitions:
+  - pkgs:
+    - [email protected]
+    - git
+    - patch
+    - flex
+    - bison
+    - hwloc
+    - unzip
+    - python@3
+    - py-pip
+    - py-pandas
+    - py-matplotlib
+    - py-tables
+    - py-networkx
+    - py-cython
+    - py-wheel
+    - cmake
+    - ninja
+    - [email protected]
+    - openmpi
+    - cuda@12
+
+  view: true
+  specs:
+    - matrix:
+      - [$pkgs]
+  packages:
+     binutils:
+       buildable: false
+       externals:
+       - spec: [email protected]
+         prefix: /usr
+
@@ -19,18 +19,22 @@ env:
 
 jobs:
   debug:
-    runs-on: [self-hosted, Linux]
     strategy:
       fail-fast: false
       matrix:
         build_type : [ Debug ]
         shared_type : [ OFF, ON ]
         profiling : [ ON ]
+        device : [cpu, gpu_nvidia, gpu_amd]
 
-    name: "Type=${{ matrix.build_type }} shared=${{ matrix.shared_type }} profiling=${{matrix.profiling}}"
+    runs-on: ${{matrix.device}}
+
+    name: "Type=${{ matrix.build_type }} device=${{matrix.device}} shared=${{ matrix.shared_type }} profiling=${{matrix.profiling}}"
     env:
       BUILD_DIRECTORY : "${{github.workspace}}/build/${{ matrix.build_type }}/shared_${{matrix.shared_type}}/profile_${{matrix.profiling}}"
       INSTALL_DIRECTORY : "${{github.workspace}}/install/${{ matrix.build_type }}/shared_${{matrix.shared_type}}/profile_${{matrix.profiling}}"
+      RUNNER_ENV : github_runner-${{matrix.device}}
+      DEVICE_ENV : ${{matrix.device}}
       BUILD_CONFIG : >
         -G Ninja
         -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
@@ -40,6 +44,7 @@ jobs:
         -DPARSEC_PROF_TRACE=${{ matrix.profiling }}
         -DMPIEXEC_PREFLAGS='--bind-to;none;--oversubscribe'
         -DCMAKE_INSTALL_PREFIX=$INSTALL_DIRECTORY
+        -DPARSEC_REQUIRE_DEVICE_TEST=${{matrix.device}}
 
     steps:
     - uses: actions/checkout@v2
@@ -104,18 +109,22 @@ jobs:
         path: ${{ env.BUILD_DIRECTORY }}/CMakeFiles/CMakeError.log
   release:
     needs: debug
-    runs-on: [self-hosted, Linux]
     strategy:
       fail-fast: false
       matrix:
         build_type : [ Release ]
         shared_type : [ ON ]
         profiling : [ OFF, ON ]
+        device : [cpu, gpu_nvidia, gpu_amd]
+
+    runs-on: ${{matrix.device}}
 
-    name: "Type=${{ matrix.build_type }} shared=${{ matrix.shared_type }} profiling=${{matrix.profiling}}"
+    name: "Type=${{ matrix.build_type }} device=${{matrix.device}} shared=${{ matrix.shared_type }} profiling=${{matrix.profiling}}"
     env:
       BUILD_DIRECTORY : "${{github.workspace}}/build/${{ matrix.build_type }}/shared_${{matrix.shared_type}}/profile_${{matrix.profiling}}"
       INSTALL_DIRECTORY : "${{github.workspace}}/install/${{ matrix.build_type }}/shared_${{matrix.shared_type}}/profile_${{matrix.profiling}}"
+      RUNNER_ENV : github_runner-${{matrix.device}}
+      DEVICE_ENV : ${{matrix.device}}
       BUILD_CONFIG : >
         -G Ninja
         -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
@@ -124,6 +133,7 @@ jobs:
         -DPARSEC_PROF_TRACE=${{ matrix.profiling }}
         -DMPIEXEC_PREFLAGS='--bind-to;none;--oversubscribe'
         -DCMAKE_INSTALL_PREFIX=$INSTALL_DIRECTORY
+        -DPARSEC_REQUIRE_DEVICE_TEST=${{matrix.device}}
 
     steps:
     - uses: actions/checkout@v2
@@ -159,7 +169,14 @@ jobs:
       # The CMake binaries on the Github Actions machines are (as of this writing) 3.12
       run: |
         source ${{github.workspace}}/.github/CI/spack_setup.sh
-        cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG
+        if [ "${{matrix.device}}" == "gpu_amd" ]; then
+          cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG -DPARSEC_GPU_WITH_HIP=ON -DPARSEC_GPU_WITH_CUDA=OFF
+        elif [ "${{matrix.device}}" == "gpu_nvidia" ]; then
+          cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG -DPARSEC_GPU_WITH_HIP=OFF -DPARSEC_GPU_WITH_CUDA=ON
+        else
+          cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG -DPARSEC_GPU_WITH_HIP=OFF -DPARSEC_GPU_WITH_CUDA=OFF
+        fi
+
 
     - name: Build
       working-directory: ${{ env.BUILD_DIRECTORY }}

@@ -59,6 +59,9 @@ if(POLICY CMP0098)
   # CMP0098: New in version 3.17, FindFLEX runs flex in directory CMAKE_CURRENT_BINARY_DIR when executing.
   cmake_policy(SET CMP0098 NEW)
 endif(POLICY CMP0098)
+if(POLICY CMP0104 AND NOT CUDA_ARCHITECTURES)
+  set(CUDA_ARCHITECTURES OFF)
+endif()
 
 set(CMAKE_NO_SYSTEM_FROM_IMPORTED True)
 # On OSX only find the Apple frameworks is nothing else is available.
@@ -75,6 +78,11 @@ include(CTest)
 # ccmake tunable parameters
 #####
 
+# CTest related options
+set(PARSEC_REQUIRE_DEVICE_TEST "NONE" CACHE STRING "Make tests fail if specified device support is disabled (default NONE, valid values are HIP or amd, CUDA or nvidia, or NONE or cpu). The intended use is to ensure that device tests are passed in CI, and avoid failing silently if there is no GPU on the target system.")
+set_property(CACHE PARSEC_REQUIRE_DEVICE_TEST PROPERTY STRINGS "NONE" "HIP" "CUDA" "cpu" "gpu_amd" "gpu_nvidia")
+mark_as_advanced(PARSEC_REQUIRE_DEVICE_TEST)
+
 ## Check for the support of additional languages and capabilities
 option(SUPPORT_FORTRAN
        "Enable support for Fortran bindings (default ON)" ON)
@@ -123,6 +131,9 @@ mark_as_advanced(BUILD_PARSEC)
 ### Misc options
 option(BUILD_SHARED_LIBS
     "Build shared libraries" ON)
+if(BUILD_SHARED_LIBS)
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+endif(BUILD_SHARED_LIBS)
 option(BUILD_64bits
   "Build 64 bits mode" ON)
 if(NOT CMAKE_BUILD_TYPE)
@@ -717,16 +728,16 @@ int main(int argc, char *argv[]) {
       if(CMAKE_CUDA_COMPILER)
         enable_language(CUDA)
       endif(CMAKE_CUDA_COMPILER)
+      cmake_pop_check_state()
     endif (CUDAToolkit_FOUND)
     set(PARSEC_HAVE_CU_COMPILER ${CMAKE_CUDA_COMPILER} CACHE BOOL "True if PaRSEC provide support for compiling .cu files")
   endif( PARSEC_GPU_WITH_CUDA )
 
   if( PARSEC_GPU_WITH_HIP )
     # This is kinda ugly but the PATH and HINTS don't get transmitted to sub-dependents
     set(CMAKE_SYSTEM_PREFIX_PATH_save ${CMAKE_SYSTEM_PREFIX_PATH})
-    list(APPEND CMAKE_SYSTEM_PREFIX_PATH /opt/rocm)
-    find_package(HIP 5 QUIET) #quiet because hip-config.cmake is not part of core-cmake and will spam a loud warning when hip/rocm is not installed
-    set(CMAKE_SYSTEM_PREFIX_PATH ${CMAKE_SYSTEM_PREFIX_PATH_save})
+    list(APPEND CMAKE_SYSTEM_PREFIX_PATH $ENV{ROCM_PATH}/lib/cmake)
+    find_package(HIP QUIET) #quiet because hip-config.cmake is not part of core-cmake and will spam a loud warning when hip/rocm is not installed
     if(HIP_FOUND AND PARSEC_HAVE_CUDA)
       # the underlying reason is that the generated ptg code cannot include at the same time
       # cuda_runtime.h and hip_runtime.h, so we need to modify the dev_cuda.h to not expose any
@@ -738,6 +749,8 @@ int main(int argc, char *argv[]) {
       get_target_property(extra_hip_libs hip::host INTERFACE_LINK_LIBRARIES)
       list(APPEND EXTRA_LIBS ${extra_hip_libs})
       set(HIP_NOT_CUDA_FOUND TRUE)
+      enable_language(HIP)
+      set(CMAKE_SYSTEM_PREFIX_PATH ${CMAKE_SYSTEM_PREFIX_PATH_save})
     else()
       set(HIP_NOT_CUDA_FOUND FALSE)
     endif()
@@ -747,8 +760,8 @@ int main(int argc, char *argv[]) {
   if( PARSEC_GPU_WITH_LEVEL_ZERO )
     find_package(level-zero)
     find_package(DPCPP)
-    set(PARSEC_HAVE_LEVEL_ZERO ${LEVEL_ZERO_FOUND} CACHE BOOL "True if PaRSEC provide support for Intel level-zero")
     if (LEVEL_ZERO_FOUND AND PARSEC_HAVE_DPCPP)
+      set(PARSEC_HAVE_LEVEL_ZERO ${LEVEL_ZERO_FOUND} CACHE BOOL "True if PaRSEC provide support for Intel level-zero")
       include_directories("${LEVEL_ZERO_INCLUDE_DIR}/level_zero/")
       set(PARSEC_HAVE_LEVEL_ZERO ${LEVEL_ZERO_FOUND} CACHE BOOL "True if PaRSEC provide support for Intel Level Zero")
       message(STATUS "Found Intel level-zero ${LEVEL_ZERO_VERSION} in -I${LEVEL_ZERO_INCLUDE_DIR} / -L${LEVEL_ZERO_LIBRARY_DIR}")
@@ -939,6 +952,7 @@ add_subdirectory(parsec)
 # Add dependency to Level-Zero if it is enabled
 #
 if(PARSEC_HAVE_LEVEL_ZERO)
+    message(STATUS "parsec depends on ze_loader")
     target_link_libraries(parsec PRIVATE level_zero::ze_loader)
 endif(PARSEC_HAVE_LEVEL_ZERO)
 

@@ -41,6 +41,9 @@
 #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
 #include "parsec/mca/device/cuda/device_cuda.h"
 #endif  /* defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) */
+#if defined(PARSEC_HAVE_DEV_HIP_SUPPORT)
+#include "parsec/mca/device/hip/device_hip.h"
+#endif  /* defined(PARSEC_HAVE_DEV_HIP_SUPPORT) */
 
 #include "parsec/mca/mca_repository.h"
 #include "parsec/constants.h"
@@ -1491,9 +1494,8 @@ parsec_dtd_startup(parsec_context_t *context,
         parsec_device_module_t *device = parsec_mca_device_get(_i);
         if( NULL == device ) continue;
         if( !(tp->devices_index_mask & (1 << device->device_index))) continue;  /* not supported */
-        // If CUDA is enabled, let the CUDA device activated for this
-        // taskpool.
-        if( PARSEC_DEV_CUDA == device->type ) continue;
+        // If a GPU is enabled, let the device be activated for this taskpool.
+        if( PARSEC_DEV_IS_GPU(device->type) ) continue;
         if( NULL != device->taskpool_register )
             if( PARSEC_SUCCESS !=
                 device->taskpool_register(device, (parsec_taskpool_t *)tp)) {
@@ -2327,7 +2329,7 @@ static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t
     }
 
     parsec_device_module_t *device = parsec_mca_device_get(dev_index);
-    assert(NULL != device);
+     assert(NULL != device);
     /* We already know the device is a GPU device from the test above */
     gpu_task->stage_in  = parsec_default_gpu_stage_in;
     gpu_task->stage_out = parsec_default_gpu_stage_out;
@@ -2400,7 +2402,7 @@ int parsec_dtd_task_class_add_chore(parsec_taskpool_t *tp,
     }
 
     incarnations[i].type = device_type;
-    if(PARSEC_DEV_CUDA == device_type) {
+    if(PARSEC_DEV_IS_GPU(device_type)) {
         incarnations[i].hook = parsec_dtd_gpu_task_submit;
         dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)function;
     }
@@ -2998,11 +3000,11 @@ parsec_insert_dtd_task(parsec_task_t *__this_task)
                         FLOW_OF(last_user.task, last_user.flow_index)->flags &= ~RELEASE_OWNERSHIP_SPECIAL;
 
                         if( this_task->super.data[flow_index].data_in != NULL) {
-/* #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) */
+/* #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) */
 /*                            parsec_atomic_lock(&this_task->super.data[flow_index].data_in->original->lock); */
 /* #endif */
                             (void)parsec_atomic_fetch_dec_int32(&this_task->super.data[flow_index].data_in->readers);
-/* #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) */
+/* #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) */
 /*                            parsec_atomic_unlock(&this_task->super.data[flow_index].data_in->original->lock); */
 /* #endif */
                         }
@@ -3287,8 +3289,8 @@ __parsec_dtd_taskpool_create_task(parsec_taskpool_t *tp,
 
             __parsec_chore_t **incarnations = (__parsec_chore_t **)&tc->incarnations;
             (*incarnations)[0].type = device_type;
-            if( device_type == PARSEC_DEV_CUDA ) {
-                /* Special case for CUDA: we need an intermediate */
+            if( PARSEC_DEV_IS_GPU(device_type) ) {
+                /* Special case for GPUs: we need an intermediate */
                 (*incarnations)[0].hook = parsec_dtd_gpu_task_submit;
                 dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)fpointer;
             }

@@ -21,10 +21,6 @@
 #include "parsec/execution_stream.h"
 #include "parsec/mca/device/device_gpu.h"
 
-#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
-#include "parsec/mca/device/cuda/device_cuda.h"
-#endif /* PARSEC_HAVE_DEV_CUDA_SUPPORT */
-
 BEGIN_C_DECLS
 
 #define PARSEC_DTD_NB_TASK_CLASSES  25 /*< Max number of task classes allowed */

@@ -4,6 +4,7 @@
 #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h"
 #include "parsec/interfaces/dtd/insert_function_internal.h"
 #include "tests/tests_data.h"
+#include "parsec/mca/device/cuda/device_cuda_internal.h"
 
 #if defined(PARSEC_HAVE_MPI)
 #include <mpi.h>

@@ -9,6 +9,9 @@
 #include "tests/tests_timing.h"
 #include "parsec/interfaces/dtd/insert_function_internal.h"
 #include "parsec/utils/debug.h"
+#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
+#include "parsec/mca/device/cuda/device_cuda_internal.h"
+#endif
 
 #if defined(PARSEC_HAVE_STRING_H)
 #include <string.h>

@@ -1,5 +1,5 @@
 add_subdirectory(scheduling)
-add_Subdirectory(cuda)
+add_Subdirectory(gpu)
 
 if( MPI_C_FOUND )
   parsec_addtest_executable(C multichain)

@@ -1,2 +1,2 @@
 include(runtime/scheduling/Testings.cmake)
-include(runtime/cuda/Testings.cmake)
+include(runtime/gpu/Testings.cmake)
@@ -25,4 +25,24 @@ if(PARSEC_HAVE_CUDA)
   parsec_addtest_executable(C testing_get_best_device SOURCES "testing_get_best_device.c")
   target_include_directories(testing_get_best_device PRIVATE $<$<NOT:${PARSEC_BUILD_INPLACE}>:${CMAKE_CURRENT_SOURCE_DIR}>)
   target_ptg_sources(testing_get_best_device PRIVATE "get_best_device_check.jdf")
+
+  if(CMAKE_CUDA_COMPILER)
+    set_source_files_properties(ping_kernel.cu PROPERTIES LANGUAGE CUDA)
+    parsec_addtest_executable(C dtd_pingpong SOURCES dtd_pingpong.c)
+    target_sources(dtd_pingpong PRIVATE ping_kernel.cu)
+
+    parsec_addtest_executable(C ptg_pingpong SOURCES ping_kernel.cu)
+    target_ptg_sources(ptg_pingpong PRIVATE "ptg_pingpong.jdf")
+  endif(CMAKE_CUDA_COMPILER)
 endif(PARSEC_HAVE_CUDA)
+
+if(PARSEC_HAVE_HIP)
+  if(CMAKE_HIP_COMPILER)
+    include(ParsecCompilePTG)
+    set_source_files_properties(ping_kernel.hip.c PROPERTIES LANGUAGE HIP)
+    parsec_addtest_executable(C dtd_pingpong SOURCES dtd_pingpong.c ping_kernel.hip.c)
+
+    parsec_addtest_executable(C ptg_pingpong SOURCES ping_kernel.hip.c)
+    target_ptg_sources(ptg_pingpong PRIVATE "ptg_pingpong.jdf")
+  endif(CMAKE_HIP_COMPILER)
+endif(PARSEC_HAVE_HIP)