Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
36 changes: 36 additions & 0 deletions .github/CI/github_runner-gpu_amd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
spack:
definitions:
- pkgs:
- [email protected]
- git
- patch
- flex
- bison
- hwloc
- unzip
- python@3
- py-pip
- py-pandas
- py-matplotlib
- py-tables
- py-networkx
- py-cython
- py-wheel
- cmake
- ninja
- [email protected]
- openmpi
- [email protected]+headers
- hip

view: true
specs:
- matrix:
- [$pkgs]
packages:
binutils:
buildable: false
externals:
- spec: [email protected]
prefix: /usr

35 changes: 35 additions & 0 deletions .github/CI/github_runner-gpu_nvidia.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
spack:
definitions:
- pkgs:
- [email protected]
- git
- patch
- flex
- bison
- hwloc
- unzip
- python@3
- py-pip
- py-pandas
- py-matplotlib
- py-tables
- py-networkx
- py-cython
- py-wheel
- cmake
- ninja
- [email protected]
- openmpi
- cuda@12

view: true
specs:
- matrix:
- [$pkgs]
packages:
binutils:
buildable: false
externals:
- spec: [email protected]
prefix: /usr

27 changes: 22 additions & 5 deletions .github/workflows/build_cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,22 @@ env:

jobs:
debug:
runs-on: [self-hosted, Linux]
strategy:
fail-fast: false
matrix:
build_type : [ Debug ]
shared_type : [ OFF, ON ]
profiling : [ ON ]
device : [cpu, gpu_nvidia, gpu_amd]

name: "Type=${{ matrix.build_type }} shared=${{ matrix.shared_type }} profiling=${{matrix.profiling}}"
runs-on: ${{matrix.device}}

name: "Type=${{ matrix.build_type }} device=${{matrix.device}} shared=${{ matrix.shared_type }} profiling=${{matrix.profiling}}"
env:
BUILD_DIRECTORY : "${{github.workspace}}/build/${{ matrix.build_type }}/shared_${{matrix.shared_type}}/profile_${{matrix.profiling}}"
INSTALL_DIRECTORY : "${{github.workspace}}/install/${{ matrix.build_type }}/shared_${{matrix.shared_type}}/profile_${{matrix.profiling}}"
RUNNER_ENV : github_runner-${{matrix.device}}
DEVICE_ENV : ${{matrix.device}}
BUILD_CONFIG : >
-G Ninja
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
Expand All @@ -40,6 +44,7 @@ jobs:
-DPARSEC_PROF_TRACE=${{ matrix.profiling }}
-DMPIEXEC_PREFLAGS='--bind-to;none;--oversubscribe'
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIRECTORY
-DPARSEC_REQUIRE_DEVICE_TEST=${{matrix.device}}

steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -104,18 +109,22 @@ jobs:
path: ${{ env.BUILD_DIRECTORY }}/CMakeFiles/CMakeError.log
release:
needs: debug
runs-on: [self-hosted, Linux]
strategy:
fail-fast: false
matrix:
build_type : [ Release ]
shared_type : [ ON ]
profiling : [ OFF, ON ]
device : [cpu, gpu_nvidia, gpu_amd]

runs-on: ${{matrix.device}}

name: "Type=${{ matrix.build_type }} shared=${{ matrix.shared_type }} profiling=${{matrix.profiling}}"
name: "Type=${{ matrix.build_type }} device=${{matrix.device}} shared=${{ matrix.shared_type }} profiling=${{matrix.profiling}}"
env:
BUILD_DIRECTORY : "${{github.workspace}}/build/${{ matrix.build_type }}/shared_${{matrix.shared_type}}/profile_${{matrix.profiling}}"
INSTALL_DIRECTORY : "${{github.workspace}}/install/${{ matrix.build_type }}/shared_${{matrix.shared_type}}/profile_${{matrix.profiling}}"
RUNNER_ENV : github_runner-${{matrix.device}}
DEVICE_ENV : ${{matrix.device}}
BUILD_CONFIG : >
-G Ninja
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
Expand All @@ -124,6 +133,7 @@ jobs:
-DPARSEC_PROF_TRACE=${{ matrix.profiling }}
-DMPIEXEC_PREFLAGS='--bind-to;none;--oversubscribe'
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIRECTORY
-DPARSEC_REQUIRE_DEVICE_TEST=${{matrix.device}}

steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -159,7 +169,14 @@ jobs:
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
run: |
source ${{github.workspace}}/.github/CI/spack_setup.sh
cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG
if [ "${{matrix.device}}" == "gpu_amd" ]; then
cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG -DPARSEC_GPU_WITH_HIP=ON -DPARSEC_GPU_WITH_CUDA=OFF
elif [ "${{matrix.device}}" == "gpu_nvidia" ]; then
cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG -DPARSEC_GPU_WITH_HIP=OFF -DPARSEC_GPU_WITH_CUDA=ON
else
cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG -DPARSEC_GPU_WITH_HIP=OFF -DPARSEC_GPU_WITH_CUDA=OFF
fi


- name: Build
working-directory: ${{ env.BUILD_DIRECTORY }}
Expand Down
22 changes: 18 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ if(POLICY CMP0098)
# CMP0098: New in version 3.17, FindFLEX runs flex in directory CMAKE_CURRENT_BINARY_DIR when executing.
cmake_policy(SET CMP0098 NEW)
endif(POLICY CMP0098)
if(POLICY CMP0104 AND NOT CUDA_ARCHITECTURES)
set(CUDA_ARCHITECTURES OFF)
endif()

set(CMAKE_NO_SYSTEM_FROM_IMPORTED True)
# On OSX only find the Apple frameworks is nothing else is available.
Expand All @@ -75,6 +78,11 @@ include(CTest)
# ccmake tunable parameters
#####

# CTest related options
set(PARSEC_REQUIRE_DEVICE_TEST "NONE" CACHE STRING "Make tests fail if specified device support is disabled (default NONE, valid values are HIP or amd, CUDA or nvidia, or NONE or cpu). The intended use is to ensure that device tests are passed in CI, and avoid failing silently if there is no GPU on the target system.")
set_property(CACHE PARSEC_REQUIRE_DEVICE_TEST PROPERTY STRINGS "NONE" "HIP" "CUDA" "cpu" "gpu_amd" "gpu_nvidia")
mark_as_advanced(PARSEC_REQUIRE_DEVICE_TEST)

## Check for the support of additional languages and capabilities
option(SUPPORT_FORTRAN
"Enable support for Fortran bindings (default ON)" ON)
Expand Down Expand Up @@ -123,6 +131,9 @@ mark_as_advanced(BUILD_PARSEC)
### Misc options
option(BUILD_SHARED_LIBS
"Build shared libraries" ON)
if(BUILD_SHARED_LIBS)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif(BUILD_SHARED_LIBS)
option(BUILD_64bits
"Build 64 bits mode" ON)
if(NOT CMAKE_BUILD_TYPE)
Expand Down Expand Up @@ -717,16 +728,16 @@ int main(int argc, char *argv[]) {
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
endif(CMAKE_CUDA_COMPILER)
cmake_pop_check_state()
endif (CUDAToolkit_FOUND)
set(PARSEC_HAVE_CU_COMPILER ${CMAKE_CUDA_COMPILER} CACHE BOOL "True if PaRSEC provide support for compiling .cu files")
endif( PARSEC_GPU_WITH_CUDA )

if( PARSEC_GPU_WITH_HIP )
# This is kinda ugly but the PATH and HINTS don't get transmitted to sub-dependents
set(CMAKE_SYSTEM_PREFIX_PATH_save ${CMAKE_SYSTEM_PREFIX_PATH})
list(APPEND CMAKE_SYSTEM_PREFIX_PATH /opt/rocm)
find_package(HIP 5 QUIET) #quiet because hip-config.cmake is not part of core-cmake and will spam a loud warning when hip/rocm is not installed
set(CMAKE_SYSTEM_PREFIX_PATH ${CMAKE_SYSTEM_PREFIX_PATH_save})
list(APPEND CMAKE_SYSTEM_PREFIX_PATH $ENV{ROCM_PATH}/lib/cmake)
find_package(HIP QUIET) #quiet because hip-config.cmake is not part of core-cmake and will spam a loud warning when hip/rocm is not installed
if(HIP_FOUND AND PARSEC_HAVE_CUDA)
# the underlying reason is that the generated ptg code cannot include at the same time
# cuda_runtime.h and hip_runtime.h, so we need to modify the dev_cuda.h to not expose any
Expand All @@ -738,6 +749,8 @@ int main(int argc, char *argv[]) {
get_target_property(extra_hip_libs hip::host INTERFACE_LINK_LIBRARIES)
list(APPEND EXTRA_LIBS ${extra_hip_libs})
set(HIP_NOT_CUDA_FOUND TRUE)
enable_language(HIP)
set(CMAKE_SYSTEM_PREFIX_PATH ${CMAKE_SYSTEM_PREFIX_PATH_save})
else()
set(HIP_NOT_CUDA_FOUND FALSE)
endif()
Expand All @@ -747,8 +760,8 @@ int main(int argc, char *argv[]) {
if( PARSEC_GPU_WITH_LEVEL_ZERO )
find_package(level-zero)
find_package(DPCPP)
set(PARSEC_HAVE_LEVEL_ZERO ${LEVEL_ZERO_FOUND} CACHE BOOL "True if PaRSEC provide support for Intel level-zero")
if (LEVEL_ZERO_FOUND AND PARSEC_HAVE_DPCPP)
set(PARSEC_HAVE_LEVEL_ZERO ${LEVEL_ZERO_FOUND} CACHE BOOL "True if PaRSEC provide support for Intel level-zero")
include_directories("${LEVEL_ZERO_INCLUDE_DIR}/level_zero/")
set(PARSEC_HAVE_LEVEL_ZERO ${LEVEL_ZERO_FOUND} CACHE BOOL "True if PaRSEC provide support for Intel Level Zero")
message(STATUS "Found Intel level-zero ${LEVEL_ZERO_VERSION} in -I${LEVEL_ZERO_INCLUDE_DIR} / -L${LEVEL_ZERO_LIBRARY_DIR}")
Expand Down Expand Up @@ -939,6 +952,7 @@ add_subdirectory(parsec)
# Add dependency to Level-Zero if it is enabled
#
if(PARSEC_HAVE_LEVEL_ZERO)
message(STATUS "parsec depends on ze_loader")
target_link_libraries(parsec PRIVATE level_zero::ze_loader)
endif(PARSEC_HAVE_LEVEL_ZERO)

Expand Down
20 changes: 11 additions & 9 deletions parsec/interfaces/dtd/insert_function.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
#include "parsec/mca/device/cuda/device_cuda.h"
#endif /* defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) */
#if defined(PARSEC_HAVE_DEV_HIP_SUPPORT)
#include "parsec/mca/device/hip/device_hip.h"
#endif /* defined(PARSEC_HAVE_DEV_HIP_SUPPORT) */

#include "parsec/mca/mca_repository.h"
#include "parsec/constants.h"
Expand Down Expand Up @@ -1491,9 +1494,8 @@ parsec_dtd_startup(parsec_context_t *context,
parsec_device_module_t *device = parsec_mca_device_get(_i);
if( NULL == device ) continue;
if( !(tp->devices_index_mask & (1 << device->device_index))) continue; /* not supported */
// If CUDA is enabled, let the CUDA device activated for this
// taskpool.
if( PARSEC_DEV_CUDA == device->type ) continue;
// If a GPU is enabled, let the device be activated for this taskpool.
if( PARSEC_DEV_IS_GPU(device->type) ) continue;
if( NULL != device->taskpool_register )
if( PARSEC_SUCCESS !=
device->taskpool_register(device, (parsec_taskpool_t *)tp)) {
Expand Down Expand Up @@ -2327,7 +2329,7 @@ static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t
}

parsec_device_module_t *device = parsec_mca_device_get(dev_index);
assert(NULL != device);
assert(NULL != device);
/* We already know the device is a GPU device from the test above */
gpu_task->stage_in = parsec_default_gpu_stage_in;
gpu_task->stage_out = parsec_default_gpu_stage_out;
Expand Down Expand Up @@ -2400,7 +2402,7 @@ int parsec_dtd_task_class_add_chore(parsec_taskpool_t *tp,
}

incarnations[i].type = device_type;
if(PARSEC_DEV_CUDA == device_type) {
if(PARSEC_DEV_IS_GPU(device_type)) {
incarnations[i].hook = parsec_dtd_gpu_task_submit;
dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)function;
}
Expand Down Expand Up @@ -2998,11 +3000,11 @@ parsec_insert_dtd_task(parsec_task_t *__this_task)
FLOW_OF(last_user.task, last_user.flow_index)->flags &= ~RELEASE_OWNERSHIP_SPECIAL;

if( this_task->super.data[flow_index].data_in != NULL) {
/* #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) */
/* #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) */
/* parsec_atomic_lock(&this_task->super.data[flow_index].data_in->original->lock); */
/* #endif */
(void)parsec_atomic_fetch_dec_int32(&this_task->super.data[flow_index].data_in->readers);
/* #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) */
/* #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) */
/* parsec_atomic_unlock(&this_task->super.data[flow_index].data_in->original->lock); */
/* #endif */
}
Expand Down Expand Up @@ -3287,8 +3289,8 @@ __parsec_dtd_taskpool_create_task(parsec_taskpool_t *tp,

__parsec_chore_t **incarnations = (__parsec_chore_t **)&tc->incarnations;
(*incarnations)[0].type = device_type;
if( device_type == PARSEC_DEV_CUDA ) {
/* Special case for CUDA: we need an intermediate */
if( PARSEC_DEV_IS_GPU(device_type) ) {
/* Special case for GPUs: we need an intermediate */
(*incarnations)[0].hook = parsec_dtd_gpu_task_submit;
dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)fpointer;
}
Expand Down
4 changes: 0 additions & 4 deletions parsec/interfaces/dtd/insert_function_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@
#include "parsec/execution_stream.h"
#include "parsec/mca/device/device_gpu.h"

#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
#include "parsec/mca/device/cuda/device_cuda.h"
#endif /* PARSEC_HAVE_DEV_CUDA_SUPPORT */

BEGIN_C_DECLS

#define PARSEC_DTD_NB_TASK_CLASSES 25 /*< Max number of task classes allowed */
Expand Down
1 change: 1 addition & 0 deletions tests/dsl/dtd/dtd_test_cuda_task_insert.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h"
#include "parsec/interfaces/dtd/insert_function_internal.h"
#include "tests/tests_data.h"
#include "parsec/mca/device/cuda/device_cuda_internal.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this, this is internal and should not spillover into user code.


#if defined(PARSEC_HAVE_MPI)
#include <mpi.h>
Expand Down
3 changes: 3 additions & 0 deletions tests/dsl/dtd/dtd_test_new_tile.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include "tests/tests_timing.h"
#include "parsec/interfaces/dtd/insert_function_internal.h"
#include "parsec/utils/debug.h"
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
#include "parsec/mca/device/cuda/device_cuda_internal.h"
#endif

#if defined(PARSEC_HAVE_STRING_H)
#include <string.h>
Expand Down
2 changes: 1 addition & 1 deletion tests/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
add_subdirectory(scheduling)
add_Subdirectory(cuda)
add_Subdirectory(gpu)

if( MPI_C_FOUND )
parsec_addtest_executable(C multichain)
Expand Down
2 changes: 1 addition & 1 deletion tests/runtime/Testings.cmake
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
include(runtime/scheduling/Testings.cmake)
include(runtime/cuda/Testings.cmake)
include(runtime/gpu/Testings.cmake)
13 changes: 0 additions & 13 deletions tests/runtime/cuda/Testings.cmake

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,24 @@ if(PARSEC_HAVE_CUDA)
parsec_addtest_executable(C testing_get_best_device SOURCES "testing_get_best_device.c")
target_include_directories(testing_get_best_device PRIVATE $<$<NOT:${PARSEC_BUILD_INPLACE}>:${CMAKE_CURRENT_SOURCE_DIR}>)
target_ptg_sources(testing_get_best_device PRIVATE "get_best_device_check.jdf")

if(CMAKE_CUDA_COMPILER)
set_source_files_properties(ping_kernel.cu PROPERTIES LANGUAGE CUDA)
parsec_addtest_executable(C dtd_pingpong SOURCES dtd_pingpong.c)
target_sources(dtd_pingpong PRIVATE ping_kernel.cu)

parsec_addtest_executable(C ptg_pingpong SOURCES ping_kernel.cu)
target_ptg_sources(ptg_pingpong PRIVATE "ptg_pingpong.jdf")
endif(CMAKE_CUDA_COMPILER)
endif(PARSEC_HAVE_CUDA)

if(PARSEC_HAVE_HIP)
if(CMAKE_HIP_COMPILER)
include(ParsecCompilePTG)
set_source_files_properties(ping_kernel.hip.c PROPERTIES LANGUAGE HIP)
parsec_addtest_executable(C dtd_pingpong SOURCES dtd_pingpong.c ping_kernel.hip.c)

parsec_addtest_executable(C ptg_pingpong SOURCES ping_kernel.hip.c)
target_ptg_sources(ptg_pingpong PRIVATE "ptg_pingpong.jdf")
endif(CMAKE_HIP_COMPILER)
endif(PARSEC_HAVE_HIP)
Loading