diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..6e16cc238 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,8 @@ +[submodule "tpls/Caliper"] + path = tpls/Caliper + url = https://github.com/NexGenAnalytics/Caliper.git + branch = feature/make-multitool-safe # Until Caliper gets full support for Kokkos EventSet +[submodule "tpls/apex"] + path = tpls/apex + url = https://github.com/NexGenAnalytics/apex.git + branch = develop diff --git a/Build.md b/Build.md new file mode 100644 index 000000000..2fffc7650 --- /dev/null +++ b/Build.md @@ -0,0 +1,22 @@ +# How to Build + +# With Cmake + +1. Create your build directory and go to it + +2. Type `ccmake ..` and change any options, including tools you want turned on (some are by default off). (Optional) + +3. Type `cmake ..` + +4. Type `make` + +5. Specify the generated .dylib file in the environment variable KOKKOS_TOOLS_LIBRARY when running your Kokkos-based application. + + +# With Makefile (recommended) + +1. Go into the directory of the particular tool, e.g., `cd debugging/kernel_logger` + +2. Type `make` + +3. Specify the generated .so file in the environment variable KOKKOS_TOOLS_LIBRARY when running your Kokkos-based application. diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..f12d6bdb5 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,227 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) + +project(KokkosTools CXX) + +# Include utilities +include(cmake/utils.cmake) +include(cmake/configure_tpls.cmake) + +# Set policies +cmake_policy(SET CMP0111 NEW) # error if library not found + +# Disable in-source builds to prevent source tree corruption. +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) + message(FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files.") +endif() + +list(INSERT CMAKE_MODULE_PATH 0 ${PROJECT_SOURCE_DIR}/cmake) + +message(STATUS) +message(STATUS Configuring Kokkos-Tools) +message(STATUS) + +# Common settings +set(BUILD_SHARED_LIBS "Build shared libraries" ON) +if(WIN32) + set(BUILD_SHARED_LIBS OFF) # We need to add __declspec(dllexport/dllimport) for Windows DLLs +endif() + +# Tools settings +option(KokkosTools_ENABLE_SINGLE "Build single library interfacing all profilers and dispatching at runtime" OFF) +if(WIN32) + set(KokkosTools_ENABLE_SINGLE ON) +endif() + +option(KokkosTools_ENABLE_PAPI "Enable PAPI support" OFF) +option(KokkosTools_ENABLE_MPI "Enable MPI support" OFF) +option(KokkosTools_ENABLE_CALIPER "Enable building Caliper library" OFF) +option(KokkosTools_ENABLE_APEX "Enable building Apex library" OFF) +option(KokkosTools_ENABLE_EXAMPLES "Build examples" OFF) +# Advanced settings +option(KokkosTools_REUSE_KOKKOS_COMPILER "Set the compiler and flags based on installed Kokkos settings" OFF) +mark_as_advanced(KokkosTools_REUSE_KOKKOS_COMPILER) + +# Fetch Kokkos options: +acquire_kokkos_config() +if(DEFINED Kokkos_FOUND_MSG) + message(STATUS "${Kokkos_FOUND_MSG}: ${Kokkos_INSTALL_DIR}\n" + "\t\tDevices: ${Kokkos_DEVICES}\n" + "\t\tArchitecture: ${Kokkos_ARCH}\n" + "\t\tTPLs: ${Kokkos_TPLS}\n" + "\t\tCompiler: ${Kokkos_CXX_COMPILER} (${Kokkos_CXX_COMPILER_ID})\n" + "\t\tCMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}\n" + "\t\tOptions: ${Kokkos_OPTIONS}") + # Synchronize compiler and flags (only when explicitly requested) + if(KokkosTools_REUSE_KOKKOS_COMPILER) + set(CMAKE_CXX_COMPILER "${Kokkos_CXX_COMPILER}" CACHE STRING "C++ Compiler") + set(CMAKE_CXX_STANDARD "${CMAKE_CXX_STANDARD_DEFAULT}" CACHE STRING "C++ Standard: 98, 11, 14, 17, 20 or 23") + endif() +else() + if(KokkosTools_REUSE_KOKKOS_COMPILER) + message(FATAL_ERROR "Kokkos not found: can't reuse Kokkos compiler (which was explicitly" + "requested with KokkosTools_REUSE_KOKKOS_COMPILER=ON)") + endif() + message(STATUS "Kokkos NOT found") +endif() + +# Libraries +if(KokkosTools_ENABLE_PAPI) + find_package(PAPI REQUIRED) # TODO: papi-connector requires v6.0 or newer + cmake_path(GET PAPI_INCLUDE_DIR PARENT_PATH PAPI_ROOT) + message(STATUS "Found PAPI ${PAPI_VERSION_STRING} at ${PAPI_ROOT}") + set(KokkosTools_HAS_PAPI ON) +else() + message(STATUS "PAPI support disabled") + set(KokkosTools_HAS_PAPI OFF) +endif() + +if(KokkosTools_ENABLE_MPI) + find_package(MPI REQUIRED) + message(STATUS "Found MPI ${MPI_CXX_VERSION}: ${MPI_CXX_LIBRARIES}") + set(KOKKOSTOOLS_HAS_MPI 1) +else() + message(STATUS "MPI not available. MPI disabled.") + set(KOKKOSTOOLS_HAS_MPI 0) +endif() + +include(cmake/configure_variorum.cmake) + +set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER}) +set(KOKKOSTOOLS_HAS_NVPROF ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvprof should be available + +if(DEFINED ENV{VTUNE_HOME}) + set(VTune_ROOT $ENV{VTUNE_HOME}) +endif() +if(VTune_ROOT) + find_package(ITT REQUIRED) + set(KOKKOSTOOLS_HAS_VTUNE ON) +else() + message(WARNING "Set VTUNE_HOME in environment or VTune_ROOT in build options to build VTune connectors") + set(VTune_ROOT "" CACHE STRING "Path to VTune Intel compiler") + set(KOKKOSTOOLS_HAS_VTUNE OFF) +endif() + +# make Kokkos profiling interface available for native profilers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/profiling/all) + +# Config file +configure_file(common/kp_config.hpp.in common/kp_config.hpp) +set(COMMON_HEADERS_PATH ${CMAKE_CURRENT_BINARY_DIR}/common) +include_directories(${COMMON_HEADERS_PATH}) + +set(SINGLELIB_PROFILERS "" CACHE STRING "" FORCE) + +# Export settings +include(GNUInstallDirs) +set(EXPORT_NAME KokkosToolsConfig) +set(EXPORT_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}) +set(EXPORT_LIB_DIR ${CMAKE_INSTALL_LIBDIR}) +set(EXPORT_TARGETS "" CACHE STRING "" FORCE) + +if(WIN32) + message(STATUS "Windows target detected - skipping Unix-only tools.") +endif() + +if(APPLE) + message(STATUS "Apple OSX target detected.") +endif() + +# Utilities +if(NOT WIN32) + add_subdirectory(common/kernel-filter) +endif() +add_subdirectory(debugging/kernel-logger) + +# Profilers +if(NOT WIN32) + add_subdirectory(profiling/simple-kernel-timer) + add_subdirectory(profiling/memory-hwm) + if(KOKKOSTOOLS_USE_MPI) + add_subdirectory(profiling/memory-hwm-mpi) + else() + message(STATUS "Skipping memory-hwm-mpi (MPI disabled)") + endif() + add_subdirectory(profiling/memory-events) + add_subdirectory(profiling/memory-usage) + add_subdirectory(profiling/chrome-tracing) + add_subdirectory(profiling/space-time-stack) +endif() + +# External lib connectors +if(KokkosTools_ENABLE_PAPI) + add_subdirectory(profiling/papi-connector) +endif() + +if(NOT WIN32 AND NOT APPLE) + add_subdirectory(profiling/systemtap-connector) +endif() + +if(KOKKOSTOOLS_HAS_VARIORUM) + add_subdirectory(profiling/variorum-connector) +endif() + +# GPU profilers +if(Kokkos_ENABLE_CUDA) + add_subdirectory(profiling/nvprof-connector) + add_subdirectory(profiling/nvprof-focused-connector) +endif() +if(KOKKOS_ENABLE_HIP) + #add_subdirectory(profiling/roctx-connector) +endif() + +if(KOKKOSTOOLS_HAS_VTUNE) + add_subdirectory(profiling/vtune-connector) + add_subdirectory(profiling/vtune-focused-connector) +endif() + +# Find or build Caliper +if(KokkosTools_ENABLE_CALIPER) + find_package(caliper QUIET) + if(caliper_INCLUDE_DIR) + cmake_path(GET caliper_INCLUDE_DIR PARENT_PATH Caliper_INSTALL_DIR) + file(REAL_PATH ${Caliper_INSTALL_DIR} Caliper_INSTALL_DIR) + message(STATUS "Caliper installation found in: ${Caliper_INSTALL_DIR}") + list(APPEND SINGLELIB_PROFILERS caliper) + else() + # Don't support git submodules for Caliper. The Kokkos tools user has can try installing Apex and linking on their own if they don't have it. + message(FATAL_ERROR "FATAL: Required Caliper installation not found! Exiting.") + endif() +endif() + +# Find or build Apex +if(KokkosTools_ENABLE_APEX) + find_package(Apex QUIET) + if(Apex_FOUND) + message(STATUS "Apex installation found in: ${Apex_DIR}") + list(APPEND SINGLELIB_PROFILERS "apex") + else() + # Don't support git submodules for apex. The Kokkos tools user has can try installing Apex and linking on their own if they don't have it. + message(FATAL_ERROR "FATAL: Required Apex installation not found! Exiting.") + endif() +endif() + +# Build single library interface (once we have everything set up) +if(KokkosTools_ENABLE_SINGLE) + message(STATUS "Building Monolithic KokkosTools library with profilers: ${SINGLELIB_PROFILERS}") + add_subdirectory(profiling/all) +else() + message(STATUS "Monolithic KokkosTools library skipped") +endif() + +# Build examples +if(KokkosTools_ENABLE_EXAMPLES) + if(NOT KokkosTools_ENABLE_SINGLE) + message(WARNING "This example requires KokkosTools built with monolothic library interface (KokkosTools_ENABLE_SINGLE=ON)") + else() + enable_testing() + add_subdirectory(example) + endif() +endif() + +# Install exports +install(TARGETS ${EXPORT_TARGETS} EXPORT ${EXPORT_NAME}) +install(EXPORT ${EXPORT_NAME} + NAMESPACE KokkosTools:: + DESTINATION ${EXPORT_LIB_DIR}/cmake) +install(CODE "SET(KokkosTools_HAS_MPI ${USE_MPI})") + diff --git a/cmake/FindApex.cmake b/cmake/FindApex.cmake new file mode 100644 index 000000000..4f6be521d --- /dev/null +++ b/cmake/FindApex.cmake @@ -0,0 +1,34 @@ +find_package(PkgConfig REQUIRED) + +# backup current CMAKE_PREFIX_PATH and PKG_CONFIG_USE_CMAKE_PREFIX_PATH +if(DEFINED CMAKE_PREFIX_PATH) + set(_old_def ON) + set(_old_val ${CMAKE_PREFIX_PATH}) +else() + set(_old_def OFF) +endif() +set(_old_use ${PKG_CONFIG_USE_CMAKE_PREFIX_PATH}) +set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH ON) + +# add Apex_DIR / Apex_ROOT to module search path +if(Apex_DIR) + set(CMAKE_PREFIX_PATH ${Apex_DIR}) +elseif(Apex_ROOT) + set(CMAKE_PREFIX_PATH ${Apex_ROOT}) +endif() + +# find Apex +pkg_check_modules(Apex QUIET IMPORTED_TARGET apex) +if(Apex_FOUND) + # create "apex" target like it would be created by Apex setup + add_library(apex ALIAS PkgConfig::Apex) + file(REAL_PATH ${Apex_PREFIX} Apex_DIR) +endif() + +# restore original variables +if(_old_def) + set(CMAKE_PREFIX_PATH ${_old_val}) +else() + unset(CMAKE_PREFIX_PATH) +endif() +set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH ${_old_use}) diff --git a/cmake/FindITT.cmake b/cmake/FindITT.cmake new file mode 100644 index 000000000..753e48878 --- /dev/null +++ b/cmake/FindITT.cmake @@ -0,0 +1,57 @@ +# Note: Package is named "ITT" here because we reuse Caliper's FindITTAPI.cmake find module +# and it calls find_package_handle_standard_args() with "ITT" package name internally, so CMake +# expectes find_package() calles to use "ITT" package name as well. + +function(is_architecture_x64 OUT_ARCH64) + # heuristic to catch x86_64 on Unix and AMD64 on Windows + string(REGEX MATCH "64$" ARCH64 ${CMAKE_SYSTEM_PROCESSOR}) + if(${ARCH64} STREQUAL "64") + set(${OUT_ARCH64} ON PARENT_SCOPE) + else() + set(${OUT_ARCH64} OFF PARENT_SCOPE) + endif() +endfunction() + +#--------------------------------------------------------------------------------# +# 2022-02-14 On some x64 platforms (encountered on Ubuntu 20.04 in Win11/WSL2) +# CMake does NOT enable FIND_LIBRARY_USE_LIB64_PATHS as it should, which leads to +# Intel oneAPI libs not being found in .../lib64 folders. +# See: https://cmake.org/cmake/help/latest/command/find_library.html +get_property(USE_LIB32 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB32_PATHS) +get_property(USE_LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS) +is_architecture_x64(ARCH64) +if(ARCH64 AND NOT USE_LIB32) + set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON) +elseif(NOT USE_LIB64) + set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB32_PATHS ON) +endif() +#--------------------------------------------------------------------------------# + +if(MSVC) + + # 2022-02-14: find_library() can't locate libittnotify.lib on Windows - not sure why... + # using find_file() instead as a workaround + get_property(USE_LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS) + if(USE_LIB64) + find_file(ITT_LIBRARY libittnotify.lib ${VTune_ROOT}/lib64) + else() + find_file(ITT_LIBRARY libittnotify.lib ${VTune_ROOT}/lib32) + endif() + find_path(ITT_INCLUDE_DIR NAMES ittnotify.h HINTS ${VTune_ROOT}/include) + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(ITT DEFAULT_MSG ITT_LIBRARY ITT_INCLUDE_DIR) + +else() + + # Just reuse find module implemented in Caliper + set(ITT_PREFIX ${VTune_ROOT}) + include(${PROJECT_SOURCE_DIR}/tpls/Caliper/cmake/FindITTAPI.cmake) + +endif() + +# Set up imported target +if(NOT TARGET ittapi) # Note: "ittnotify" is a target created by Apex + add_library(ittapi INTERFACE IMPORTED) + target_include_directories(ittapi INTERFACE ${ITT_INCLUDE_DIR}) + target_link_libraries(ittapi INTERFACE ${ITT_LIBRARY}) +endif() diff --git a/cmake/FindPAPI.cmake b/cmake/FindPAPI.cmake new file mode 100644 index 000000000..d2573fa8c --- /dev/null +++ b/cmake/FindPAPI.cmake @@ -0,0 +1,82 @@ +#[=======================================================================[.rst: +FindPAPI +-------- + +Find the native PAPI headers and libraries. + +IMPORTED Targets +^^^^^^^^^^^^^^^^ + +This module defines :prop_tgt:`IMPORTED` target ``PAPI::PAPI``, if PAPI has been found. + +Result Variables +^^^^^^^^^^^^^^^^ + +This module defines the following variables: + +``PAPI_FOUND`` + "True" if ``papi`` found. + +``PAPI_INCLUDE_DIR`` + where to find ``papi``/``papi.h``, etc. + +``PAPI_LIBRARY`` + List of libraries when using ``papi``. + +``PAPI_VERSION_STRING`` + The version of ``papi`` found. + +This module defines ``PAPI::PAPI`` target for PAPI library. + +#]=======================================================================] + +# Look for the header file. +find_path( + PAPI_INCLUDE_DIR + NAMES papi.h + HINTS /usr/include /usr/local/include) +mark_as_advanced(PAPI_INCLUDE_DIR) + +# Look for the library (sorted from most current/relevant entry to least). +find_library( + PAPI_LIBRARY + NAMES papi + HINTS /usr/lib /usr/local/lib) +mark_as_advanced(PAPI_LIBRARY) + +#define PAPI_VERSION PAPI_VERSION_NUMBER(6,0,0,1) + +if(PAPI_INCLUDE_DIR AND NOT PAPI_VERSION_STRING AND EXISTS "${PAPI_INCLUDE_DIR}/papi.h") + file( + STRINGS "${PAPI_INCLUDE_DIR}/papi.h" + PAPI_VERSION_STRING + REGEX "^#define[\t ]+PAPI_VERSION[\t ]+PAPI_VERSION_NUMBER\(.*\)") + string( + REGEX REPLACE + "^#define[\t ]+PAPI_VERSION[\t ]+PAPI_VERSION_NUMBER\\((.*)\\)" + "\\1" + PAPI_VERSION_STRING + "${PAPI_VERSION_STRING}") + string(REPLACE "," "." PAPI_VERSION_STRING "${PAPI_VERSION_STRING}") +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + PAPI + REQUIRED_VARS PAPI_LIBRARY PAPI_INCLUDE_DIR + VERSION_VAR PAPI_VERSION_STRING) + + +# Skip target if already defined +if(TARGET PAPI::PAPI) + return() +endif() + +# Set up imported target +add_library(PAPI::PAPI INTERFACE IMPORTED) + +target_include_directories(PAPI::PAPI INTERFACE ${PAPI_INCLUDE_DIR}) +target_link_libraries(PAPI::PAPI INTERFACE ${PAPI_LIBRARY}) + +set(PAPI_INCLUDE_DIRS ${PAPI_INCLUDE_DIR}) +set(PAPI_LIBRARIES ${PAPI_LIBRARY}) diff --git a/cmake/configure_tpls.cmake b/cmake/configure_tpls.cmake new file mode 100644 index 000000000..398eabba6 --- /dev/null +++ b/cmake/configure_tpls.cmake @@ -0,0 +1,67 @@ +# Alter some Caliper defaults, based on Kokkos and Tools settings +# see https://software.llnl.gov/Caliper/build.html +macro(configure_caliper) + set_cache(CALIPER_OPTION_PREFIX ON) + set_cache(CALIPER_WITH_KOKKOS ON) + if(USE_MPI) + set_cache(CALIPER_WITH_MPI ON) + endif() + if(KokkosTools_HAS_PAPI) + set(PAPI_PREFIX ${PAPI_ROOT}) + set_cache(CALIPER_WITH_PAPI ON) + endif() + if(KOKKOSTOOLS_HAS_VARIORUM) + set_cache(CALIPER_WITH_VARIORUM ON) + set(VARIORUM_PREFIX ${Variorum_ROOT}) + endif() + if(KOKKOSTOOLS_HAS_VTUNE) + set_cache(CALIPER_WITH_VTUNE ON) + set(ITT_PREFIX ${VTune_ROOT}) + endif() + if(Kokkos_FOUND) + if(Kokkos_ENABLE_CUDA) + # TODO: check if this works... + set_cache(CALIPER_WITH_NVTX ON) + set_cache(CALIPER_WITH_CUPTI ON) + endif() + if(Kokkos_ENABLE_HIP) + # TODO: check if this works... + set_cache(CALIPER_WITH_ROCTX ON) + set_cache(CALIPER_WITH_ROCTRACER ON) + endif() + endif() +endmacro() + +# Alter some Apex defaults, based on Kokkos and Tools settings +# See http://uo-oaciss.github.io/apex/install/#standalone_installation +macro(configure_apex) + if(BUILD_SHARED_LIBS) + set_cache(BUILD_STATIC_EXECUTABLES OFF) + else() + set_cache(BUILD_STATIC_EXECUTABLES ON) + endif() + set_cache(APEX_WITH_PAPI ${KokkosTools_ENABLE_PAPI}) + set_cache(APEX_WITH_MPI ${KokkosTools_ENABLE_MPI}) + + ## TODO: Build Binutils if not installed (detect?) and the compiler is NOT gcc/clang/icc (check CMake vars) + # set(BFD_ROOT /path/to/binutils) + # option(APEX_BUILD_BFD "Build Binutils library if not found" ON) + + ## TODO: Build OMPT if compilers >= [gcc/clang/icc] and we're NOT offloading to GPU + ## Note: OMPT should work nice with Intel compiler + # option(APEX_BUILD_OMPT "Build OpenMP runtime with OMPT if support not found" ON) + + if(Kokkos_ENABLE_CUDA) + option(APEX_WITH_CUDA "Enable CUDA (CUPTI) support" ON) + # TODO: check if we need to set CUPTI_ROOT and/or NVML_ROOT here + endif() + + if(Kokkos_ENABLE_HIP) + option(APEX_WITH_HIP "Enable HIP (ROCTRACER) support" ON) + ## TODO: check/set paths (we can skip roctracer, rocprofiler, rocm_smi if they're located in ${ROCM_PATH}) + # set(ROCM_ROOT ${ROCM_PATH}) + # set(ROCTX_ROOT ${ROCM_PATH}/roctracer) + # set(ROCTRACER_ROOT ${ROCM_PATH}/roctracer) + # set(RSMI_ROOT ${ROCM_PATH}/rocm_smi) + endif() +endmacro() diff --git a/cmake/configure_variorum.cmake b/cmake/configure_variorum.cmake new file mode 100644 index 000000000..b6280dc0a --- /dev/null +++ b/cmake/configure_variorum.cmake @@ -0,0 +1,24 @@ +# Based on Makefile authored by Zachary S. Frye (CASC at LLNL) in July 2020 + +set(KOKKOSTOOLS_HAS_VARIORUM OFF) + +# Set Variorum_ROOT for find_package() based on VARIORUM_ROOT (CMake or environment variable) +set(MSG_NOTFOUND "set Variorum_ROOT CMake variable or VARIORUM_ROOT environment variable to build Variorum connector") +if(NOT DEFINED Variorum_ROOT) + if(DEFINED ENV{VARIORUM_ROOT}) + set(Variorum_ROOT $ENV{VARIORUM_ROOT}) + set(MSG_NOTFOUND "check VARIORUM_ROOT environment variable ($ENV{VARIORUM_ROOT})") + endif() + set(Variorum_ROOT ${VARIORUM_ROOT}) +else() + set(MSG_NOTFOUND "check Variorum_ROOT (${Variorum_ROOT})") +endif() + +set(Variorum_DIR ${Variorum_ROOT}/lib/cmake) +find_package(Variorum QUIET) + +if(Variorum_FOUND) + set(KOKKOSTOOLS_HAS_VARIORUM TRUE) +else() + message(WARNING "Variorum not found: ${MSG_NOTFOUND}") +endif() diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 000000000..549ec8d82 --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,44 @@ +function(kp_add_library TARGET) + add_library(${TARGET} ${ARGN}) # SOURCES = ${ARGN} + + # add this library to the list of profilers linked to single library + list(APPEND SINGLELIB_PROFILERS ${TARGET}) + set(SINGLELIB_PROFILERS ${SINGLELIB_PROFILERS} CACHE STRING "" FORCE) + + # add this library to exported targets + list(APPEND EXPORT_TARGETS ${TARGET}) + set(EXPORT_TARGETS ${EXPORT_TARGETS} CACHE STRING "" FORCE) +endfunction() + +macro(set_cache NAME VAL) + set(${NAME} ON CACHE BOOL "") +endmacro() + +function(acquire_kokkos_config) + if(NOT TARGET Kokkos::kokkos) + find_package(Kokkos QUIET) + if(Kokkos_FOUND) + set(Kokkos_FOUND_MSG "Found Kokkos installation") + get_property(Kokkos_INSTALL_DIR TARGET Kokkos::kokkoscore PROPERTY INTERFACE_INCLUDE_DIRECTORIES) + cmake_path(GET Kokkos_INSTALL_DIR PARENT_PATH Kokkos_INSTALL_DIR) + endif() + elseif(DEFINED Kokkos_DEVICES) + set(Kokkos_FOUND_MSG "Found Kokkos package already imported by superproject") + get_property(Kokkos_INSTALL_DIR TARGET Kokkos::kokkoscore PROPERTY INTERFACE_INCLUDE_DIRECTORIES) + cmake_path(GET Kokkos_INSTALL_DIR PARENT_PATH Kokkos_INSTALL_DIR) + else() + set(Kokkos_FOUND_MSG "Found Kokkos included by source in superproject") + get_property(Kokkos_INSTALL_DIR TARGET Kokkos::kokkos PROPERTY BINARY_DIR) + # Include Kokkos exported settings like we would have them from find_package(Kokkos) + set(Kokkos_FIND_QUIETLY ON) + include(${Kokkos_INSTALL_DIR}/KokkosConfigCommon.cmake) + endif() + foreach(VAR_NAME Kokkos_FOUND_MSG Kokkos_INSTALL_DIR + # Settings exported by Kokkos + Kokkos_DEVICES Kokkos_ARCH Kokkos_TPLS Kokkos_CXX_COMPILER Kokkos_CXX_COMPILER_ID Kokkos_OPTIONS + Kokkos_ENABLE_OPENMP Kokkos_ENABLE_CUDA Kokkos_ENABLE_HIP + # Kokkos exports the flags as well + CMAKE_CXX_FLAGS) + set(${VAR_NAME} ${${VAR_NAME}} PARENT_SCOPE) + endforeach() +endfunction() diff --git a/common/kernel-filter/CMakeLists.txt b/common/kernel-filter/CMakeLists.txt new file mode 100644 index 000000000..ae5cad488 --- /dev/null +++ b/common/kernel-filter/CMakeLists.txt @@ -0,0 +1 @@ +add_library(kp_kernel_filter ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_filter.cpp) \ No newline at end of file diff --git a/common/kp_config.hpp.in b/common/kp_config.hpp.in new file mode 100644 index 000000000..44eca1359 --- /dev/null +++ b/common/kp_config.hpp.in @@ -0,0 +1,7 @@ +// Note: keep legacy 0|1 or update `#if USE_MPI` checks +#define USE_MPI @USE_MPI@ + +#cmakedefine KOKKOSTOOLS_HAS_NVPROF +#cmakedefine KOKKOSTOOLS_HAS_CALIPER +#cmakedefine KOKKOSTOOLS_HAS_VARIORUM +#cmakedefine KOKKOSTOOLS_HAS_VTUNE \ No newline at end of file diff --git a/debugging/kernel-logger/CMakeLists.txt b/debugging/kernel-logger/CMakeLists.txt new file mode 100644 index 000000000..6f95865d0 --- /dev/null +++ b/debugging/kernel-logger/CMakeLists.txt @@ -0,0 +1 @@ +add_library(kp_kernel_logger ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_logger.cpp) \ No newline at end of file diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt new file mode 100644 index 000000000..749b4464d --- /dev/null +++ b/example/CMakeLists.txt @@ -0,0 +1,54 @@ +# Find Kokkos +find_package(Kokkos QUIET) +if(NOT Kokkos_FOUND) + message(FATAL_ERROR "Kokkos not found, set Kokkos_ROOT properly (current Kokkos_ROOT=${Kokkos_ROOT})") +endif() +foreach(_i "1;2;3") # cut .../lib/cmake/Kokkos suffix +cmake_path(GET Kokkos_DIR PARENT_PATH Kokkos_DIR) +endforeach() +message(STATUS "Found installed Kokkos at ${Kokkos_DIR}") + +# Create target executable +set(TEST_APP kp_example) +add_executable(${TEST_APP} main.cpp) +set(LIBS "Kokkos::kokkos;kokkostools") +if(USE_MPI) + list(APPEND LIBS MPI::MPI_CXX) +endif() +target_link_libraries(${TEST_APP} PRIVATE ${LIBS}) + +# Create tests +macro(add_kp_test NAME) + add_test(NAME test_kokkos_tools_${NAME} + COMMAND "kp_example" ${ARGN} + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/..") +endmacro() +# TODO: Read profiling results and check if the profiler had succesfully run +# and exported output in expected format, fail the test otherwise. +if(NOT WIN32) + add_kp_test(kernel_timer "kernel-timer") + add_kp_test(kernel_timer_json "kernel-timer-json") + add_kp_test(memory_events "memory-events") + add_kp_test(memory_usage "memory-usage") + add_kp_test(chrome_tracing "chrome-tracing") + add_kp_test(space_time_stack "space-time-stack") + add_kp_test(systemtap_connector "systemtap-connector") + add_kp_test(highwater_mark "highwater-mark") + if(USE_MPI) + add_kp_test(highwater_mark_mpi "highwater-mark-mpi") + endif() +endif() +if(KokkosTools_ENABLE_CALIPER) + add_kp_test(caliper "caliper" "runtime-report(profile.kokkos)") +endif() +if(KOKKOSTOOLS_HAS_VARIORUM) + add_kp_test(variorum "variorum") +endif() +if(KOKKOSTOOLS_HAS_VTUNE) + add_kp_test(vtune_connector "vtune-connector") + add_kp_test(vtune_focused_connector "vtune-focused-connector") +endif() +if(KOKKOSTOOLS_HAS_NVPROF) + add_kp_test(vtune_connector "nvprof-connector") + add_kp_test(vtune_focused_connector "nvprof-focused-connector") +endif() diff --git a/example/kernels.hpp b/example/kernels.hpp new file mode 100644 index 000000000..2f37a765b --- /dev/null +++ b/example/kernels.hpp @@ -0,0 +1,35 @@ +#pragma once +//-------------------------------------------------------------------------------------// + +// Sample computation: S(N) = 1 + 2 + 3 + ... + N +// Tests: regions, allocation, parallel for and reduction +template +int run_calculation(const data_type SIZE) +{ + Kokkos::Profiling::pushRegion("Computation"); + + Kokkos::View data(Kokkos::ViewAllocateWithoutInitializing("data"), SIZE); + Kokkos::parallel_for("initialize()", SIZE, KOKKOS_LAMBDA(data_type i) { + data(i) = i; + }); + Kokkos::fence(); + + data_type sum = 0; + Kokkos::parallel_reduce("accumulate()", SIZE, KOKKOS_LAMBDA(data_type i, data_type &lsum) { + lsum += 1 + data(i); + }, sum); + Kokkos::fence(); + + Kokkos::Profiling::popRegion(); + + // check results + const data_type check = (SIZE + 1) * SIZE / 2; + if (sum != check) { + std::cout << "BAD result, got S(" << SIZE << ") = " << sum << " (expected " << check << ")" << std::endl; + return 1; + } + std::cout << "Result OK: S(" << SIZE << ") = " << sum << std::endl; + return 0; +} + +//-------------------------------------------------------------------------------------// diff --git a/example/main.cpp b/example/main.cpp new file mode 100644 index 000000000..66fb50117 --- /dev/null +++ b/example/main.cpp @@ -0,0 +1,41 @@ +#include +#include +#include "kp_all.hpp" +#include "kernels.hpp" + +#if USE_MPI +#include +#endif + +//-------------------------------------------------------------------------------------// + +int main(int argc, char *argv[]) +{ +#if USE_MPI + MPI_Init(&argc, &argv); +#endif + + const char *profiler_name = argc >= 2 ? argv[1] : ""; + const char *profiler_config = argc >= 3 ? argv[2] : ""; + + auto eventSet = KokkosTools::get_event_set(profiler_name, profiler_config); + + // Note: callbacks must be set before Kokkos::initialize() + Kokkos::Tools::Experimental::set_callbacks(eventSet); + Kokkos::initialize(argc, argv); + + Kokkos::print_configuration(std::cout); + + std::cout << std::endl; + int ret_code = run_calculation(100000); + std::cout << std::endl; + + Kokkos::finalize(); +#if USE_MPI + MPI_Finalize(); +#endif + + return ret_code; +} + +//-------------------------------------------------------------------------------------// diff --git a/profiling/all/CMakeLists.txt b/profiling/all/CMakeLists.txt new file mode 100644 index 000000000..ce8b13e27 --- /dev/null +++ b/profiling/all/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LIBNAME kokkostools) + +#if(NOT SINGLELIB_PROFILERS) +# message(FATAL_ERROR "Can't build ${kokkostools}: no profilers enabled") +# return() +#endif() + +add_library(${LIBNAME} ${KOKKOSTOOLS_LIBRARY_MODE} kp_all.cpp) + +target_include_directories(${LIBNAME} + PUBLIC $ + $ + $) + +if(SINGLELIB_PROFILERS) + target_link_libraries(${LIBNAME} PUBLIC ${SINGLELIB_PROFILERS}) +endif() + +file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS kp_all.hpp "${COMMON_HEADERS_PATH}/*.hpp") + +install(FILES ${HEADER_FILES} DESTINATION ${EXPORT_INCLUDE_DIR}) +install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME}) \ No newline at end of file diff --git a/profiling/all/impl/Kokkos_Profiling_C_Interface.h b/profiling/all/impl/Kokkos_Profiling_C_Interface.h new file mode 100644 index 000000000..2c8d1428f --- /dev/null +++ b/profiling/all/impl/Kokkos_Profiling_C_Interface.h @@ -0,0 +1,296 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_PROFILING_C_INTERFACE_HPP +#define KOKKOS_PROFILING_C_INTERFACE_HPP + +#ifdef __cplusplus +#include +#include +#else +#include +#include +#include +#endif + +#define KOKKOSP_INTERFACE_VERSION 20210623 + +// Profiling + +struct Kokkos_Profiling_KokkosPDeviceInfo { + size_t deviceID; +}; + +struct Kokkos_Profiling_SpaceHandle { + char name[64]; +}; + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_initFunction)( + const int, const uint64_t, const uint32_t, + struct Kokkos_Profiling_KokkosPDeviceInfo*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_finalizeFunction)(); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_parseArgsFunction)(int, char**); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_printHelpFunction)(char*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_beginFunction)(const char*, const uint32_t, + uint64_t*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_endFunction)(uint64_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_pushFunction)(const char*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_popFunction)(); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_allocateDataFunction)( + const struct Kokkos_Profiling_SpaceHandle, const char*, const void*, + const uint64_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_deallocateDataFunction)( + const struct Kokkos_Profiling_SpaceHandle, const char*, const void*, + const uint64_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_createProfileSectionFunction)(const char*, + uint32_t*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_startProfileSectionFunction)(const uint32_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_stopProfileSectionFunction)(const uint32_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_destroyProfileSectionFunction)(const uint32_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_profileEventFunction)(const char*); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_beginDeepCopyFunction)( + struct Kokkos_Profiling_SpaceHandle, const char*, const void*, + struct Kokkos_Profiling_SpaceHandle, const char*, const void*, uint64_t); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_endDeepCopyFunction)(); +typedef void (*Kokkos_Profiling_beginFenceFunction)(const char*, const uint32_t, + uint64_t*); +typedef void (*Kokkos_Profiling_endFenceFunction)(uint64_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_dualViewSyncFunction)(const char*, + const void* const, bool); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_dualViewModifyFunction)(const char*, + const void* const, + bool); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_declareMetadataFunction)(const char*, + const char*); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_toolInvokedFenceFunction)(const uint32_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_functionPointer)(); +struct Kokkos_Tools_ToolProgrammingInterface { + Kokkos_Tools_toolInvokedFenceFunction fence; + // allow addition of more actions + Kokkos_Tools_functionPointer padding[31]; +}; + +struct Kokkos_Tools_ToolSettings { + bool requires_global_fencing; + bool padding[255]; +}; + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_provideToolProgrammingInterfaceFunction)( + const uint32_t, struct Kokkos_Tools_ToolProgrammingInterface); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_requestToolSettingsFunction)( + const uint32_t, struct Kokkos_Tools_ToolSettings*); + +// Tuning + +#define KOKKOS_TOOLS_TUNING_STRING_LENGTH 64 +typedef char Kokkos_Tools_Tuning_String[KOKKOS_TOOLS_TUNING_STRING_LENGTH]; +union Kokkos_Tools_VariableValue_ValueUnion { + int64_t int_value; + double double_value; + Kokkos_Tools_Tuning_String string_value; +}; + +union Kokkos_Tools_VariableValue_ValueUnionSet { + int64_t* int_value; + double* double_value; + Kokkos_Tools_Tuning_String* string_value; +}; + +struct Kokkos_Tools_ValueSet { + size_t size; + union Kokkos_Tools_VariableValue_ValueUnionSet values; +}; + +enum Kokkos_Tools_OptimizationType { + Kokkos_Tools_Minimize, + Kokkos_Tools_Maximize +}; + +struct Kokkos_Tools_OptimzationGoal { + size_t type_id; + enum Kokkos_Tools_OptimizationType goal; +}; + +struct Kokkos_Tools_ValueRange { + union Kokkos_Tools_VariableValue_ValueUnion lower; + union Kokkos_Tools_VariableValue_ValueUnion upper; + union Kokkos_Tools_VariableValue_ValueUnion step; + bool openLower; + bool openUpper; +}; + +enum Kokkos_Tools_VariableInfo_ValueType { + kokkos_value_double, + kokkos_value_int64, + kokkos_value_string, +}; + +enum Kokkos_Tools_VariableInfo_StatisticalCategory { + kokkos_value_categorical, // unordered distinct objects + kokkos_value_ordinal, // ordered distinct objects + kokkos_value_interval, // ordered distinct objects for which distance matters + kokkos_value_ratio // ordered distinct objects for which distance matters, + // division matters, and the concept of zero exists +}; + +enum Kokkos_Tools_VariableInfo_CandidateValueType { + kokkos_value_set, // I am one of [2,3,4,5] + kokkos_value_range, // I am somewhere in [2,12) + kokkos_value_unbounded // I am [text/int/float], but we don't know at + // declaration time what values are appropriate. Only + // valid for Context Variables +}; + +union Kokkos_Tools_VariableInfo_SetOrRange { + struct Kokkos_Tools_ValueSet set; + struct Kokkos_Tools_ValueRange range; +}; + +struct Kokkos_Tools_VariableInfo { + enum Kokkos_Tools_VariableInfo_ValueType type; + enum Kokkos_Tools_VariableInfo_StatisticalCategory category; + enum Kokkos_Tools_VariableInfo_CandidateValueType valueQuantity; + union Kokkos_Tools_VariableInfo_SetOrRange candidates; + void* toolProvidedInfo; +}; + +struct Kokkos_Tools_VariableValue { + size_t type_id; + union Kokkos_Tools_VariableValue_ValueUnion value; + struct Kokkos_Tools_VariableInfo* metadata; +}; + +typedef void (*Kokkos_Tools_outputTypeDeclarationFunction)( + const char*, const size_t, struct Kokkos_Tools_VariableInfo* info); +typedef void (*Kokkos_Tools_inputTypeDeclarationFunction)( + const char*, const size_t, struct Kokkos_Tools_VariableInfo* info); + +typedef void (*Kokkos_Tools_requestValueFunction)( + const size_t, const size_t, const struct Kokkos_Tools_VariableValue*, + const size_t count, struct Kokkos_Tools_VariableValue*); +typedef void (*Kokkos_Tools_contextBeginFunction)(const size_t); +typedef void (*Kokkos_Tools_contextEndFunction)( + const size_t, struct Kokkos_Tools_VariableValue); +typedef void (*Kokkos_Tools_optimizationGoalDeclarationFunction)( + const size_t, const struct Kokkos_Tools_OptimzationGoal goal); + +struct Kokkos_Profiling_EventSet { + Kokkos_Profiling_initFunction init; + Kokkos_Profiling_finalizeFunction finalize; + Kokkos_Profiling_parseArgsFunction parse_args; + Kokkos_Profiling_printHelpFunction print_help; + Kokkos_Profiling_beginFunction begin_parallel_for; + Kokkos_Profiling_endFunction end_parallel_for; + Kokkos_Profiling_beginFunction begin_parallel_reduce; + Kokkos_Profiling_endFunction end_parallel_reduce; + Kokkos_Profiling_beginFunction begin_parallel_scan; + Kokkos_Profiling_endFunction end_parallel_scan; + Kokkos_Profiling_pushFunction push_region; + Kokkos_Profiling_popFunction pop_region; + Kokkos_Profiling_allocateDataFunction allocate_data; + Kokkos_Profiling_deallocateDataFunction deallocate_data; + Kokkos_Profiling_createProfileSectionFunction create_profile_section; + Kokkos_Profiling_startProfileSectionFunction start_profile_section; + Kokkos_Profiling_stopProfileSectionFunction stop_profile_section; + Kokkos_Profiling_destroyProfileSectionFunction destroy_profile_section; + Kokkos_Profiling_profileEventFunction profile_event; + Kokkos_Profiling_beginDeepCopyFunction begin_deep_copy; + Kokkos_Profiling_endDeepCopyFunction end_deep_copy; + Kokkos_Profiling_beginFenceFunction begin_fence; + Kokkos_Profiling_endFenceFunction end_fence; + Kokkos_Profiling_dualViewSyncFunction sync_dual_view; + Kokkos_Profiling_dualViewModifyFunction modify_dual_view; + Kokkos_Profiling_declareMetadataFunction declare_metadata; + Kokkos_Tools_provideToolProgrammingInterfaceFunction + provide_tool_programming_interface; + Kokkos_Tools_requestToolSettingsFunction request_tool_settings; + char profiling_padding[9 * sizeof(Kokkos_Tools_functionPointer)]; + Kokkos_Tools_outputTypeDeclarationFunction declare_output_type; + Kokkos_Tools_inputTypeDeclarationFunction declare_input_type; + Kokkos_Tools_requestValueFunction request_output_values; + Kokkos_Tools_contextBeginFunction begin_tuning_context; + Kokkos_Tools_contextEndFunction end_tuning_context; + Kokkos_Tools_optimizationGoalDeclarationFunction declare_optimization_goal; + char padding[232 * + sizeof( + Kokkos_Tools_functionPointer)]; // allows us to add another + // 256 events to the Tools + // interface without + // changing struct layout +}; + +#endif // KOKKOS_PROFILING_C_INTERFACE_HPP diff --git a/profiling/all/impl/Kokkos_Profiling_DeviceInfo.hpp b/profiling/all/impl/Kokkos_Profiling_DeviceInfo.hpp new file mode 100644 index 000000000..be6f756d0 --- /dev/null +++ b/profiling/all/impl/Kokkos_Profiling_DeviceInfo.hpp @@ -0,0 +1,56 @@ +/* + //@HEADER + // ************************************************************************ + // + // Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). + // + // Under the terms of Contract DE-NA0003525 with NTESS, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact Christian R. Trott (crtrott@sandia.gov) + // + // ************************************************************************ + //@HEADER +*/ + +#ifndef KOKKOSP_DEVICE_INFO_HPP +#define KOKKOSP_DEVICE_INFO_HPP + +#include +#include +namespace Kokkos { +namespace Profiling { +using KokkosPDeviceInfo = Kokkos_Profiling_KokkosPDeviceInfo; +} // namespace Profiling +} // namespace Kokkos + +#endif diff --git a/profiling/all/impl/Kokkos_Profiling_Interface.hpp b/profiling/all/impl/Kokkos_Profiling_Interface.hpp new file mode 100644 index 000000000..deeab829d --- /dev/null +++ b/profiling/all/impl/Kokkos_Profiling_Interface.hpp @@ -0,0 +1,268 @@ +/* + //@HEADER + // ************************************************************************ + // + // Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). + // + // Under the terms of Contract DE-NA0003525 with NTESS, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact Christian R. Trott (crtrott@sandia.gov) + // + // ************************************************************************ + //@HEADER + */ + +#ifndef KOKKOSP_INTERFACE_HPP +#define KOKKOSP_INTERFACE_HPP + +#include +#include + +#include + +// NOTE: in this Kokkos::Profiling block, do not define anything that shouldn't +// exist should Profiling be disabled + +namespace Kokkos { +namespace Tools { +namespace Experimental { + +constexpr const uint32_t NumReservedDeviceIDs = 1; + +enum SpecialSynchronizationCases : int { + GlobalDeviceSynchronization = 1, + DeepCopyResourceSynchronization = 2, +}; + +enum struct DeviceType { + Serial, + OpenMP, + Cuda, + HIP, + OpenMPTarget, + HPX, + Threads, + SYCL, + Unknown +}; + +struct ExecutionSpaceIdentifier { + DeviceType type; + uint32_t device_id; + uint32_t instance_id; +}; +inline DeviceType devicetype_from_uint32t(const uint32_t in) { + switch (in) { + case 0: return DeviceType::Serial; + case 1: return DeviceType::OpenMP; + case 2: return DeviceType::Cuda; + case 3: return DeviceType::HIP; + case 4: return DeviceType::OpenMPTarget; + case 5: return DeviceType::HPX; + case 6: return DeviceType::Threads; + case 7: return DeviceType::SYCL; + default: return DeviceType::Unknown; // TODO: error out? + } +} + +inline ExecutionSpaceIdentifier identifier_from_devid(const uint32_t in) { + // ExecutionSpaceIdentifier out; + // out.type = in >> 24; + // out.device_id = in >> 17; + // out.instance_id = ((uint32_t(-1)) << 17 ) & in; + return {devicetype_from_uint32t(in >> 24), + (~((uint32_t(-1)) << 24)) & (in >> 17), + (~((uint32_t(-1)) << 17)) & in}; +} + +template +struct DeviceTypeTraits; + +constexpr const size_t device_type_bits = 8; +constexpr const size_t instance_bits = 24; +template +constexpr uint32_t device_id_root() { + /** uncomment when C++14 is enabled + constexpr auto device_id = + static_cast(DeviceTypeTraits::id); + return (device_id << instance_bits); + */ + return 0; +} +template +inline uint32_t device_id(ExecutionSpace const& space) noexcept { + return device_id_root() + space.impl_instance_id(); +} +} // namespace Experimental +} // namespace Tools +} // end namespace Kokkos + +#if defined(KOKKOS_ENABLE_LIBDL) +// We check at configure time that libdl is available. +#include +#endif + +#include +#include + +namespace Kokkos { +namespace Tools { + +using SpaceHandle = Kokkos_Profiling_SpaceHandle; + +} // namespace Tools + +namespace Tools { + +namespace Experimental { +using EventSet = Kokkos_Profiling_EventSet; +static_assert(sizeof(EventSet) / sizeof(Kokkos_Tools_functionPointer) == 275, + "sizeof EventSet has changed, this is an error on the part of a " + "Kokkos developer"); +static_assert(sizeof(Kokkos_Tools_ToolSettings) / sizeof(bool) == 256, + "sizeof EventSet has changed, this is an error on the part of a " + "Kokkos developer"); +static_assert(sizeof(Kokkos_Tools_ToolProgrammingInterface) / + sizeof(Kokkos_Tools_functionPointer) == + 32, + "sizeof EventSet has changed, this is an error on the part of a " + "Kokkos developer"); + +using toolInvokedFenceFunction = Kokkos_Tools_toolInvokedFenceFunction; +using provideToolProgrammingInterfaceFunction = + Kokkos_Tools_provideToolProgrammingInterfaceFunction; +using requestToolSettingsFunction = Kokkos_Tools_requestToolSettingsFunction; +using ToolSettings = Kokkos_Tools_ToolSettings; +using ToolProgrammingInterface = Kokkos_Tools_ToolProgrammingInterface; +} // namespace Experimental +using initFunction = Kokkos_Profiling_initFunction; +using finalizeFunction = Kokkos_Profiling_finalizeFunction; +using parseArgsFunction = Kokkos_Profiling_parseArgsFunction; +using printHelpFunction = Kokkos_Profiling_printHelpFunction; +using beginFunction = Kokkos_Profiling_beginFunction; +using endFunction = Kokkos_Profiling_endFunction; +using pushFunction = Kokkos_Profiling_pushFunction; +using popFunction = Kokkos_Profiling_popFunction; +using allocateDataFunction = Kokkos_Profiling_allocateDataFunction; +using deallocateDataFunction = Kokkos_Profiling_deallocateDataFunction; +using createProfileSectionFunction = + Kokkos_Profiling_createProfileSectionFunction; +using startProfileSectionFunction = + Kokkos_Profiling_startProfileSectionFunction; +using stopProfileSectionFunction = Kokkos_Profiling_stopProfileSectionFunction; +using destroyProfileSectionFunction = + Kokkos_Profiling_destroyProfileSectionFunction; +using profileEventFunction = Kokkos_Profiling_profileEventFunction; +using beginDeepCopyFunction = Kokkos_Profiling_beginDeepCopyFunction; +using endDeepCopyFunction = Kokkos_Profiling_endDeepCopyFunction; +using beginFenceFunction = Kokkos_Profiling_beginFenceFunction; +using endFenceFunction = Kokkos_Profiling_endFenceFunction; +using dualViewSyncFunction = Kokkos_Profiling_dualViewSyncFunction; +using dualViewModifyFunction = Kokkos_Profiling_dualViewModifyFunction; +using declareMetadataFunction = Kokkos_Profiling_declareMetadataFunction; + +} // namespace Tools + +} // namespace Kokkos + +// Profiling + +namespace Kokkos { + +namespace Profiling { + +/** The Profiling namespace is being renamed to Tools. + * This is reexposing the contents of what used to be the Profiling + * Interface with their original names, to avoid breaking old code + */ + +namespace Experimental { + +using Kokkos::Tools::Experimental::device_id; +using Kokkos::Tools::Experimental::DeviceType; +using Kokkos::Tools::Experimental::DeviceTypeTraits; + +} // namespace Experimental + +using Kokkos::Tools::allocateDataFunction; +using Kokkos::Tools::beginDeepCopyFunction; +using Kokkos::Tools::beginFunction; +using Kokkos::Tools::createProfileSectionFunction; +using Kokkos::Tools::deallocateDataFunction; +using Kokkos::Tools::destroyProfileSectionFunction; +using Kokkos::Tools::endDeepCopyFunction; +using Kokkos::Tools::endFunction; +using Kokkos::Tools::finalizeFunction; +using Kokkos::Tools::initFunction; +using Kokkos::Tools::parseArgsFunction; +using Kokkos::Tools::popFunction; +using Kokkos::Tools::printHelpFunction; +using Kokkos::Tools::profileEventFunction; +using Kokkos::Tools::pushFunction; +using Kokkos::Tools::SpaceHandle; +using Kokkos::Tools::startProfileSectionFunction; +using Kokkos::Tools::stopProfileSectionFunction; + +} // namespace Profiling +} // namespace Kokkos + +// Tuning + +namespace Kokkos { +namespace Tools { +namespace Experimental { +using ValueSet = Kokkos_Tools_ValueSet; +using ValueRange = Kokkos_Tools_ValueRange; +using StatisticalCategory = Kokkos_Tools_VariableInfo_StatisticalCategory; +using ValueType = Kokkos_Tools_VariableInfo_ValueType; +using CandidateValueType = Kokkos_Tools_VariableInfo_CandidateValueType; +using SetOrRange = Kokkos_Tools_VariableInfo_SetOrRange; +using VariableInfo = Kokkos_Tools_VariableInfo; +using OptimizationGoal = Kokkos_Tools_OptimzationGoal; +using TuningString = Kokkos_Tools_Tuning_String; +using VariableValue = Kokkos_Tools_VariableValue; + +using outputTypeDeclarationFunction = + Kokkos_Tools_outputTypeDeclarationFunction; +using inputTypeDeclarationFunction = Kokkos_Tools_inputTypeDeclarationFunction; +using requestValueFunction = Kokkos_Tools_requestValueFunction; +using contextBeginFunction = Kokkos_Tools_contextBeginFunction; +using contextEndFunction = Kokkos_Tools_contextEndFunction; +using optimizationGoalDeclarationFunction = + Kokkos_Tools_optimizationGoalDeclarationFunction; +} // end namespace Experimental +} // end namespace Tools + +} // end namespace Kokkos + +#endif diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp new file mode 100644 index 000000000..c9959846e --- /dev/null +++ b/profiling/all/kp_all.cpp @@ -0,0 +1,132 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact David Poliakoff (dzpolia@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#include "kp_all.hpp" + +#define KOKKOSTOOLS_EXTERN_EVENT_SET(NAMESPACE) \ +namespace KokkosTools { namespace NAMESPACE { \ + extern Kokkos::Tools::Experimental::EventSet get_event_set(); \ +}} + +#ifndef WIN32 +KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimer) +KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimerJSON) +KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryEvents) +KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryUsage) +KOKKOSTOOLS_EXTERN_EVENT_SET(HighwaterMark) +KOKKOSTOOLS_EXTERN_EVENT_SET(HighwaterMarkMPI) +KOKKOSTOOLS_EXTERN_EVENT_SET(ChromeTracing) +KOKKOSTOOLS_EXTERN_EVENT_SET(SpaceTimeStack) +KOKKOSTOOLS_EXTERN_EVENT_SET(SystemtapConnector) +#endif +#ifdef KOKKOSTOOLS_HAS_VTUNE + KOKKOSTOOLS_EXTERN_EVENT_SET(VTuneConnector) + KOKKOSTOOLS_EXTERN_EVENT_SET(VTuneFocusedConnector) +#endif +#ifdef KOKKOSTOOLS_HAS_VARIORUM + KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector) +#endif +#ifdef KOKKOSTOOLS_HAS_NVPROF + KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfConnector) + KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfFocusedConnector) +#endif +#ifdef KOKKOSTOOLS_HAS_CALIPER +namespace cali { + extern Kokkos::Tools::Experimental::EventSet get_kokkos_event_set(const char* config_str); +} +#endif + +using EventSet = Kokkos::Tools::Experimental::EventSet; + +namespace KokkosTools { + +EventSet get_event_set(const char* profiler, const char* config_str) +{ + std::map handlers; +#ifndef WIN32 + handlers["kernel-timer"] = KernelTimer::get_event_set(); + handlers["kernel-timer-json"] = KernelTimerJSON::get_event_set(); + handlers["memory-events"] = MemoryEvents::get_event_set(); + handlers["memory-usage"] = MemoryUsage::get_event_set(); +#if USE_MPI + handlers["highwater-mark-mpi"] = HighwaterMarkMPI::get_event_set(); +#endif + handlers["highwater-mark"] = HighwaterMark::get_event_set(); + handlers["chrome-tracing"] = ChromeTracing::get_event_set(); + handlers["space-time-stack"] = SpaceTimeStack::get_event_set(); + handlers["systemtap-connector"] = SystemtapConnector::get_event_set(); +#endif +#ifdef KOKKOSTOOLS_HAS_VARIORUM + handlers["variorum"] = VariorumConnector::get_event_set(); +#endif +#ifdef KOKKOSTOOLS_HAS_VTUNE + handlers["vtune-connector"] = VTuneConnector::get_event_set(); + handlers["vtune-focused-connector"] = VTuneFocusedConnector::get_event_set(); +#endif +#ifdef KOKKOSTOOLS_HAS_CALIPER + handlers["caliper"] = cali::get_kokkos_event_set(config_str); +#endif +#ifdef KOKKOSTOOLS_HAS_NVPROF + handlers["nvprof-connector"] = NVProfConnector::get_event_set(); + handlers["nvprof-focused-connector"] = NVProfFocusedConnector::get_event_set(); +#endif + auto e = handlers.find(profiler); + if (e != handlers.end()) + return e->second; + + if (strlen(profiler) > 0) { + const auto msg = std::string("Profiler not supported: ") + profiler + " (unknown tool)"; + throw std::runtime_error(msg); + } + + // default = no profiling + EventSet eventSet; + memset(&eventSet, 0, sizeof(eventSet)); + return eventSet; +} + +} diff --git a/profiling/memory-usage/kp_timer.hpp b/profiling/all/kp_all.hpp similarity index 72% rename from profiling/memory-usage/kp_timer.hpp rename to profiling/all/kp_all.hpp index 299bbc61c..3294b8ae3 100644 --- a/profiling/memory-usage/kp_timer.hpp +++ b/profiling/all/kp_all.hpp @@ -1,4 +1,3 @@ -/* //@HEADER // ************************************************************************ // @@ -40,37 +39,17 @@ // // ************************************************************************ //@HEADER -*/ -#ifndef KOKKOS_TIMER_HPP -#define KOKKOS_TIMER_HPP +#ifndef KOKKOSTOOLS_ALL_HPP +#define KOKKOSTOOLS_ALL_HPP -#include +#include "kp_config.hpp" +#include "impl/Kokkos_Profiling_Interface.hpp" // Note: impl/... is used inside the header -namespace Kokkos { +namespace KokkosTools { -/** \brief Time since construction */ +Kokkos::Tools::Experimental::EventSet get_event_set(const char *profiler, const char *options); -class Timer { - private: - std::chrono::high_resolution_clock::time_point m_old; - Timer(const Timer&); - Timer& operator=(const Timer&); +} - public: - void reset() { m_old = std::chrono::high_resolution_clock::now(); } - - Timer() { reset(); } - - double seconds() const { - std::chrono::high_resolution_clock::time_point m_new = - std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast>(m_new - - m_old) - .count(); - } -}; - -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_TIMER_HPP */ +#endif diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp new file mode 100644 index 000000000..cda18c0c2 --- /dev/null +++ b/profiling/all/kp_core.hpp @@ -0,0 +1,185 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact David Poliakoff (dzpolia@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOSTOOLS_KOKKOSINTERFACE_HPP +#define KOKKOSTOOLS_KOKKOSINTERFACE_HPP + +#include + +#include "kp_config.hpp" +#include "impl/Kokkos_Profiling_Interface.hpp" // Note: impl/... is used inside the header + +using Kokkos::Tools::SpaceHandle; + +#ifdef WIN32 + + #define EXPOSE_INIT(FUNC_NAME) + #define EXPOSE_NOARGFUNCTION(HANDLER_NAME, FUNC_NAME) + #define EXPOSE_FINALIZE(FUNC_NAME) + #define EXPOSE_ALLOCATE(FUNC_NAME) + #define EXPOSE_DEALLOCATE(FUNC_NAME) + #define EXPOSE_PUSH_REGION(FUNC_NAME) + #define EXPOSE_POP_REGION(FUNC_NAME) + #define EXPOSE_BEGIN_PARALLEL_FOR(FUNC_NAME) + #define EXPOSE_END_PARALLEL_FOR(FUNC_NAME) + #define EXPOSE_BEGIN_PARALLEL_SCAN(FUNC_NAME) + #define EXPOSE_END_PARALLEL_SCAN(FUNC_NAME) + #define EXPOSE_BEGIN_PARALLEL_REDUCE(FUNC_NAME) + #define EXPOSE_END_PARALLEL_REDUCE(FUNC_NAME) + #define EXPOSE_BEGIN_DEEP_COPY(FUNC_NAME) + #define EXPOSE_END_DEEP_COPY(FUNC_NAME) + #define EXPOSE_CREATE_PROFILE_SECTION(FUNC_NAME) + #define EXPOSE_START_PROFILE_SECTION(FUNC_NAME) + #define EXPOSE_STOP_PROFILE_SECTION(FUNC_NAME) + #define EXPOSE_DESTROY_PROFILE_SECTION(FUNC_NAME) + #define EXPOSE_PROFILE_EVENT(FUNC_NAME) + +#else + +#define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_request_tool_settings(const uint32_t num_actions, \ + Kokkos_Tools_ToolSettings* settings) { \ + FUNC_NAME(num_actions, settings); \ +} + +#define EXPOSE_INIT(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_init_library(const int loadSeq, \ + const uint64_t interfaceVer, \ + const uint32_t devInfoCount, \ + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { \ + FUNC_NAME(loadSeq, interfaceVer, devInfoCount, deviceInfo); \ +} + +#define EXPOSE_NOARGFUNCTION(HANDLER_NAME, FUNC_NAME) \ +__attribute__((weak)) void HANDLER_NAME() { FUNC_NAME(); } + +#define EXPOSE_FINALIZE(FUNC_NAME) EXPOSE_NOARGFUNCTION(kokkosp_finalize_library, FUNC_NAME) + +#define EXPOSE_ALLOCATE(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_allocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { \ + FUNC_NAME(space, label, ptr, size); \ +} + +#define EXPOSE_DEALLOCATE(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_deallocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { \ + FUNC_NAME(space, label, ptr, size); \ +} + +#define EXPOSE_PUSH_REGION(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_push_profile_region(const char* name) { FUNC_NAME(name); } + +#define EXPOSE_POP_REGION(FUNC_NAME) EXPOSE_NOARGFUNCTION(kokkosp_pop_profile_region, FUNC_NAME) + +#define EXPOSE_BEGIN_PARALLEL_FOR(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { \ + FUNC_NAME(name, devID, kID); \ +} + +#define EXPOSE_END_PARALLEL_FOR(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_end_parallel_for(const uint64_t kID) { \ + FUNC_NAME(kID); \ +} + +#define EXPOSE_BEGIN_PARALLEL_SCAN(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { \ + FUNC_NAME(name, devID, kID); \ +} + +#define EXPOSE_END_PARALLEL_SCAN(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_end_parallel_scan(const uint64_t kID) { \ + FUNC_NAME(kID); \ +} + +#define EXPOSE_BEGIN_PARALLEL_REDUCE(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { \ + FUNC_NAME(name, devID, kID); \ +} + +#define EXPOSE_END_PARALLEL_REDUCE(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_end_parallel_reduce(const uint64_t kID) { \ + FUNC_NAME(kID); \ +} + +#define EXPOSE_BEGIN_DEEP_COPY(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_begin_deep_copy(SpaceHandle dst_handle, \ + const char *dst_name, const void *dst_ptr, \ + SpaceHandle src_handle, const char *src_name, \ + const void *src_ptr, uint64_t size) { \ + FUNC_NAME(dst_handle, dst_name, dst_ptr, src_handle, src_name, src_ptr, size); \ +} + +#define EXPOSE_END_DEEP_COPY(FUNC_NAME) EXPOSE_NOARGFUNCTION(kokkosp_end_deep_copy, FUNC_NAME) + +#define EXPOSE_CREATE_PROFILE_SECTION(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_create_profile_section(const char* name, uint32_t* sec_id) { FUNC_NAME(name, sec_id); } + +#define EXPOSE_START_PROFILE_SECTION(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_start_profile_section(const uint32_t sec_id) { FUNC_NAME(sec_id); } + +#define EXPOSE_STOP_PROFILE_SECTION(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_stop_profile_section(const uint32_t sec_id) { FUNC_NAME(sec_id); } + +#define EXPOSE_DESTROY_PROFILE_SECTION(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_destroy_profile_section(const uint32_t sec_id) { FUNC_NAME(sec_id); } + +#define EXPOSE_PROFILE_EVENT(FUNC_NAME) \ +__attribute__((weak)) \ +void kokkosp_profile_event(const char* name) { FUNC_NAME(name); } +#endif + +#endif // KOKKOSTOOLS_KOKKOSINTERFACE_HPP diff --git a/profiling/chrome-tracing/CMakeLists.txt b/profiling/chrome-tracing/CMakeLists.txt new file mode 100644 index 000000000..2653affd3 --- /dev/null +++ b/profiling/chrome-tracing/CMakeLists.txt @@ -0,0 +1,5 @@ +kp_add_library(kp_chrome_tracing kp_chrome_tracing.cpp) + +if(USE_MPI) + target_link_libraries(kp_chrome_tracing PRIVATE MPI::MPI_CXX) +endif() diff --git a/profiling/chrome-tracing/kp_chrome_tracing.cpp b/profiling/chrome-tracing/kp_chrome_tracing.cpp index 8caf859ff..6a06e1b32 100644 --- a/profiling/chrome-tracing/kp_chrome_tracing.cpp +++ b/profiling/chrome-tracing/kp_chrome_tracing.cpp @@ -15,9 +15,7 @@ #include #include -#ifndef USE_MPI -#define USE_MPI 1 -#endif +#include "kp_core.hpp" #if USE_MPI #include @@ -25,14 +23,8 @@ #include -namespace { - -struct SpaceHandle { - char name[64]; -}; -struct KokkosPDeviceInfo { - std::uint32_t deviceID; -}; +namespace KokkosTools { +namespace ChromeTracing { enum Space { SPACE_HOST, @@ -187,69 +179,59 @@ struct State { State *global_state = nullptr; -} // end anonymous namespace - -extern "C" void kokkosp_init_library(int loadseq, uint64_t, uint32_t ndevinfos, - KokkosPDeviceInfo *devinfos) { +void kokkosp_init_library(int loadseq, uint64_t, uint32_t ndevinfos, + Kokkos_Profiling_KokkosPDeviceInfo *devinfos) { (void)loadseq; (void)ndevinfos; (void)devinfos; global_state = new State(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { delete global_state; global_state = nullptr; } -extern "C" void kokkosp_begin_parallel_for(const char *name, +void kokkosp_begin_parallel_for(const char *name, std::uint32_t devid, std::uint64_t *kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_FOR); } -extern "C" void kokkosp_begin_parallel_reduce(const char *name, +void kokkosp_begin_parallel_reduce(const char *name, std::uint32_t devid, std::uint64_t *kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_REDUCE); } -extern "C" void kokkosp_begin_parallel_scan(const char *name, +void kokkosp_begin_parallel_scan(const char *name, std::uint32_t devid, std::uint64_t *kernid) { (void)devid; *kernid = global_state->begin_kernel(name, STACK_SCAN); } -extern "C" void kokkosp_end_parallel_for(std::uint64_t kernid) { +void kokkosp_end_parallel_for(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_end_parallel_reduce(std::uint64_t kernid) { +void kokkosp_end_parallel_reduce(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_end_parallel_scan(std::uint64_t kernid) { +void kokkosp_end_parallel_scan(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_push_profile_region(const char *name) { +void kokkosp_push_profile_region(const char *name) { global_state->push_region(name); } -extern "C" void kokkosp_pop_profile_region() { global_state->pop_region(); } - -extern "C" void kokkosp_allocate_data(SpaceHandle , const char *, - void *, uint64_t ) { -} +void kokkosp_pop_profile_region() { global_state->pop_region(); } -extern "C" void kokkosp_deallocate_data(SpaceHandle , const char *, - void *, uint64_t ) { -} - -extern "C" void kokkosp_begin_deep_copy(SpaceHandle dst_handle, +void kokkosp_begin_deep_copy(SpaceHandle dst_handle, const char *dst_name, const void *dst_ptr, SpaceHandle src_handle, @@ -261,4 +243,43 @@ extern "C" void kokkosp_begin_deep_copy(SpaceHandle dst_handle, src_name, src_ptr, size); } -extern "C" void kokkosp_end_deep_copy() { global_state->end_deep_copy(); } +void kokkosp_end_deep_copy() { global_state->end_deep_copy(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + return my_event_set; +} + +}} // namespace KokkosTools::ChromeTracing + +extern "C" { + +namespace impl = KokkosTools::ChromeTracing; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_BEGIN_DEEP_COPY(impl::kokkosp_begin_deep_copy) +EXPOSE_END_DEEP_COPY(impl::kokkosp_end_deep_copy) + +} // extern "C" \ No newline at end of file diff --git a/profiling/memory-events/CMakeLists.txt b/profiling/memory-events/CMakeLists.txt new file mode 100644 index 000000000..db88966b4 --- /dev/null +++ b/profiling/memory-events/CMakeLists.txt @@ -0,0 +1 @@ +kp_add_library(kp_memory_events kp_memory_events.cpp) \ No newline at end of file diff --git a/profiling/memory-events/kp_memory_events.cpp b/profiling/memory-events/kp_memory_events.cpp index 37b1a1be1..5af70a689 100644 --- a/profiling/memory-events/kp_memory_events.cpp +++ b/profiling/memory-events/kp_memory_events.cpp @@ -53,9 +53,15 @@ #include #include +#include "kp_core.hpp" #include "kp_memory_events.hpp" #include "kp_timer.hpp" +namespace KokkosTools { +namespace MemoryEvents { + +char space_name[16][64]; + std::vector events; int num_spaces; @@ -73,10 +79,9 @@ double max_mem_usage() { return max_rssKB*1024; } -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, - const uint32_t devInfoCount, - void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { num_spaces = 0; for(int i=0; i<16; i++) @@ -87,7 +92,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, timer.reset(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { char* hostname = (char*) malloc(sizeof(char) * 256); gethostname(hostname, 256); int pid = getpid(); @@ -129,7 +134,7 @@ extern "C" void kokkosp_finalize_library() { free(hostname); } -extern "C" void kokkosp_allocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { +void kokkosp_allocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { std::lock_guard lock(m); double time = timer.seconds(); @@ -151,7 +156,7 @@ extern "C" void kokkosp_allocate_data(const SpaceHandle space, const char* label } -extern "C" void kokkosp_deallocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { +void kokkosp_deallocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { std::lock_guard lock(m); double time = timer.seconds(); @@ -174,15 +179,41 @@ extern "C" void kokkosp_deallocate_data(const SpaceHandle space, const char* lab events.push_back(EventRecord(ptr,size,MEMOP_DEALLOCATE,space_i,time,label)); } -extern "C" void kokkosp_push_profile_region(const char* name) { +void kokkosp_push_profile_region(const char* name) { std::lock_guard lock(m); double time = timer.seconds(); events.push_back(EventRecord(nullptr,0,MEMOP_PUSH_REGION,0,time,name)); } -extern "C" void kokkosp_pop_profile_region() { +void kokkosp_pop_profile_region() { std::lock_guard lock(m); double time = timer.seconds(); events.push_back(EventRecord(nullptr,0,MEMOP_POP_REGION,0,time,"")); } +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.allocate_data = kokkosp_allocate_data; + my_event_set.deallocate_data = kokkosp_deallocate_data; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + return my_event_set; +} + +}} // namespace KokkosTools::MemoryEvents + +extern "C" { + +namespace impl = KokkosTools::MemoryEvents; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) + +} // extern "C" \ No newline at end of file diff --git a/profiling/memory-events/kp_memory_events.hpp b/profiling/memory-events/kp_memory_events.hpp index 6a809c9a5..a5dd7b235 100644 --- a/profiling/memory-events/kp_memory_events.hpp +++ b/profiling/memory-events/kp_memory_events.hpp @@ -47,15 +47,14 @@ #define MEMOP_PUSH_REGION 3 #define MEMOP_POP_REGION 4 -#include #include #include -struct SpaceHandle { - char name[64]; -}; +#include "kp_core.hpp" + +namespace KokkosTools::MemoryEvents { -char space_name[16][64]; +extern char space_name[16][64]; struct EventRecord { const void* ptr; @@ -88,3 +87,4 @@ struct EventRecord { } }; +} // namespace KokkosTools::MemoryEvents diff --git a/profiling/memory-hwm-mpi/CMakeLists.txt b/profiling/memory-hwm-mpi/CMakeLists.txt new file mode 100644 index 000000000..7deaa5ebf --- /dev/null +++ b/profiling/memory-hwm-mpi/CMakeLists.txt @@ -0,0 +1,7 @@ +if(NOT MPI_FOUND OR NOT TARGET MPI::MPI_CXX) + message(FATAL_ERROR "kp_hwm_mpi requires MPI") +endif() + +kp_add_library(kp_hwm_mpi kp_hwm_mpi.cpp) + +target_link_libraries(kp_hwm_mpi PRIVATE MPI::MPI_CXX) diff --git a/profiling/memory-hwm-mpi/kp_hwm_mpi.cpp b/profiling/memory-hwm-mpi/kp_hwm_mpi.cpp index 3bc9bc955..3bfb7a2a4 100644 --- a/profiling/memory-hwm-mpi/kp_hwm_mpi.cpp +++ b/profiling/memory-hwm-mpi/kp_hwm_mpi.cpp @@ -46,6 +46,11 @@ #include #include +#include "kp_core.hpp" + +namespace KokkosTools { +namespace HighwaterMarkMPI { + static int world_rank = 0; static int world_size = 1; @@ -56,10 +61,10 @@ static int world_size = 1; # define RU_MAXRSS_UNITS 1 #endif -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { int mpi_is_initialized; MPI_Initialized(&mpi_is_initialized); if (!mpi_is_initialized) { @@ -75,7 +80,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, } } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { if (world_rank == 0) { printf("\n"); printf("KokkosP: Finalization of profiling library.\n"); @@ -106,3 +111,24 @@ extern "C" void kokkosp_finalize_library() { printf("\n"); } } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + return my_event_set; +} + +}} // namespace KokkosTools::HighwaterMarkMPI + + +extern "C" { + +namespace impl = KokkosTools::HighwaterMarkMPI; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) + + +} // extern "C" \ No newline at end of file diff --git a/profiling/memory-hwm/CMakeLists.txt b/profiling/memory-hwm/CMakeLists.txt new file mode 100644 index 000000000..b5c362e5f --- /dev/null +++ b/profiling/memory-hwm/CMakeLists.txt @@ -0,0 +1 @@ +kp_add_library(kp_hwm kp_hwm.cpp) \ No newline at end of file diff --git a/profiling/memory-hwm/Makefile b/profiling/memory-hwm/Makefile index 1a6473c76..709209e54 100644 --- a/profiling/memory-hwm/Makefile +++ b/profiling/memory-hwm/Makefile @@ -1,3 +1,5 @@ + + CXX=g++ CFLAGS=-shared -O3 -fPIC -std=c++11 diff --git a/profiling/memory-hwm/kp_hwm.cpp b/profiling/memory-hwm/kp_hwm.cpp index 9f039a7b3..703e7d76b 100644 --- a/profiling/memory-hwm/kp_hwm.cpp +++ b/profiling/memory-hwm/kp_hwm.cpp @@ -54,12 +54,17 @@ #include #include +#include "kp_core.hpp" + +namespace KokkosTools { +namespace HighwaterMark { + static uint64_t uniqID = 0; -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("KokkosP: Example Library Initialized (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); } @@ -71,7 +76,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, # define RU_MAXRSS_UNITS 1 #endif -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("\n"); printf("KokkosP: Finalization of profiling library.\n"); @@ -82,3 +87,26 @@ extern "C" void kokkosp_finalize_library() { (long) sys_resources.ru_maxrss * RU_MAXRSS_UNITS); printf("\n"); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + return my_event_set; +} + +// static auto event_set = get_event_set(); + +}} // namespace KokkosTools::HighwaterMark + +extern "C" { + +namespace impl = KokkosTools::HighwaterMark; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) + +// EXPOSE_KOKKOS_INTERFACE(KokkosTools::HighwaterMark::event_set) + +} // extern "C" \ No newline at end of file diff --git a/profiling/memory-usage/CMakeLists.txt b/profiling/memory-usage/CMakeLists.txt new file mode 100644 index 000000000..e3d0969f1 --- /dev/null +++ b/profiling/memory-usage/CMakeLists.txt @@ -0,0 +1,5 @@ +kp_add_library(kp_memory_usage kp_memory_usage.cpp) + +# enable headers from memory-events (kp_timer.hpp) +target_include_directories(kp_memory_usage + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../memory-events) \ No newline at end of file diff --git a/profiling/memory-usage/Makefile b/profiling/memory-usage/Makefile index 34d206a5f..8b1378917 100644 --- a/profiling/memory-usage/Makefile +++ b/profiling/memory-usage/Makefile @@ -1,15 +1 @@ -CXX=g++ -CXXFLAGS=-O3 -std=c++11 -g -fopenmp -SHARED_CXXFLAGS=-shared -fPIC -all: kp_memory_usage.so - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -CXXFLAGS+=-I${MAKEFILE_PATH} - -kp_memory_usage.so: ${MAKEFILE_PATH}kp_memory_usage.cpp ${MAKEFILE_PATH}kp_memory_events.hpp ${MAKEFILE_PATH}kp_timer.hpp - $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ ${MAKEFILE_PATH}kp_memory_usage.cpp - -clean: - rm *.so diff --git a/profiling/memory-usage/kp_memory_usage.cpp b/profiling/memory-usage/kp_memory_usage.cpp index 8bbae74d9..3e66c807a 100644 --- a/profiling/memory-usage/kp_memory_usage.cpp +++ b/profiling/memory-usage/kp_memory_usage.cpp @@ -53,9 +53,14 @@ #include #include -#include "kp_memory_events.hpp" +#include "kp_core.hpp" #include "kp_timer.hpp" +namespace KokkosTools { +namespace MemoryUsage { + +char space_name[16][64]; + int num_spaces; std::vector > space_size_track[16]; uint64_t space_size[16]; @@ -71,24 +76,24 @@ double max_mem_usage() { return max_rssKB*1024; } -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { num_spaces = 0; for(int i=0; i<16; i++) space_size[i] = 0; - + timer.reset(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { char* hostname = (char*) malloc(sizeof(char) * 256); gethostname(hostname, 256); int pid = getpid(); - for(int s = 0; s(space_size_track[s][i])/1024/1024); } - fclose(ofile); + fclose(ofile); } free(hostname); } -extern "C" void kokkosp_allocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { +void kokkosp_allocate_data(const SpaceHandle space, const char* label, const void* const ptr, const uint64_t size) { std::lock_guard lock(m); - + double time = timer.seconds(); - + int space_i = num_spaces; for(int s = 0; s lock(m); double time = timer.seconds(); @@ -150,3 +154,25 @@ extern "C" void kokkosp_deallocate_data(const SpaceHandle space, const char* lab } } +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.allocate_data = kokkosp_allocate_data; + my_event_set.deallocate_data = kokkosp_deallocate_data; + return my_event_set; +} + +}} // namespace KokkosTools::MemoryUsage + +extern "C" { + +namespace impl = KokkosTools::MemoryUsage; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) + +} // extern "C" \ No newline at end of file diff --git a/profiling/nvprof-connector/CMakeLists.txt b/profiling/nvprof-connector/CMakeLists.txt new file mode 100644 index 000000000..eae33dc2d --- /dev/null +++ b/profiling/nvprof-connector/CMakeLists.txt @@ -0,0 +1,4 @@ +find_package(CUDAToolkit REQUIRED) +kp_add_library(kp_nvprof_connector kp_nvprof_connector.cpp) + +target_link_libraries(kp_nvprof_connector CUDA::nvToolsExt) \ No newline at end of file diff --git a/profiling/nvprof-connector/kp_nvprof_connector.cpp b/profiling/nvprof-connector/kp_nvprof_connector.cpp index 4d96871c1..be91c5aff 100644 --- a/profiling/nvprof-connector/kp_nvprof_connector.cpp +++ b/profiling/nvprof-connector/kp_nvprof_connector.cpp @@ -51,22 +51,21 @@ #include "nvToolsExt.h" -struct Kokkos_Tools_ToolSettings -{ - bool requires_global_fencing; - bool padding[255]; -}; +#include "kp_core.hpp" -extern "C" void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { +namespace KokkosTools { +namespace NVProfConnector { + +void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { settings->requires_global_fencing = false; } static uint64_t nextKernelID; -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("-----------------------------------------------------------\n"); printf("KokkosP: NVTX Analyzer Connector (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); @@ -77,7 +76,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, nvtxMarkA("Kokkos::Initialization Complete"); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf("KokkosP: Finalization of NVTX Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); @@ -85,38 +84,75 @@ extern "C" void kokkosp_finalize_library() { nvtxMarkA("Kokkos::Finalization Complete"); } -extern "C" void kokkosp_begin_parallel_for(const char* name, +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { nvtxRangePush(name); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { nvtxRangePop(); } +void kokkosp_end_parallel_for(const uint64_t kID) { nvtxRangePop(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { nvtxRangePush(name); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { nvtxRangePop(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { nvtxRangePush(name); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { nvtxRangePop(); } -extern "C" void kokkosp_push_profile_region(char* regionName) { +void kokkosp_push_profile_region(const char* regionName) { nvtxRangePush(regionName); } -extern "C" void kokkosp_pop_profile_region() { +void kokkosp_pop_profile_region() { nvtxRangePop(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.request_tool_settings = kokkosp_request_tool_settings; + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +}} // namespace KokkosTools::NVProfConnector + +extern "C" { + +namespace impl = KokkosTools::NVProfConnector; + +EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/nvprof-focused-connector/CMakeLists.txt b/profiling/nvprof-focused-connector/CMakeLists.txt new file mode 100644 index 000000000..072198bf5 --- /dev/null +++ b/profiling/nvprof-focused-connector/CMakeLists.txt @@ -0,0 +1,4 @@ +find_package(CUDAToolkit REQUIRED) +kp_add_library(kp_nvprof_focused_connector kp_nvprof_focused_connector.cpp) + +target_link_libraries(kp_nvprof_focused_connector CUDA::nvToolsExt) \ No newline at end of file diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp b/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp index b3a82f890..865fa5621 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp +++ b/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp @@ -48,16 +48,22 @@ #include #include #include + #include "kp_nvprof_focused_connector_domain.h" +#include "kp_core.hpp" + +namespace KokkosTools { +namespace NVProfFocusedConnector { + static KernelNVProfFocusedConnectorInfo* currentKernel; static std::unordered_map domain_map; static uint64_t nextKernelID; -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + struct Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("-----------------------------------------------------------\n"); printf("KokkosP: NVProf Analyzer Focused Connector (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); @@ -95,42 +101,73 @@ void focusedConnectorExecuteEnd() { currentKernel = NULL; } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf("KokkosP: Finalization of NVProf Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); } -extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { focusedConnectorExecuteEnd(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_SCAN); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { focusedConnectorExecuteEnd(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_REDUCE); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { focusedConnectorExecuteEnd(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +}} // KokkosTools::NVProfFocusedConnector + +extern "C" { + +namespace impl = KokkosTools::NVProfFocusedConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h b/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h index d0e6ae2cf..1c62b53e1 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h +++ b/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h @@ -49,6 +49,9 @@ #include "nvToolsExt.h" +namespace KokkosTools { +namespace NVProfFocusedConnector { + enum KernelExecutionType { PARALLEL_FOR = 0, PARALLEL_REDUCE = 1, @@ -113,3 +116,5 @@ class KernelNVProfFocusedConnectorInfo { }; #endif + +}} // KokkosTools::NVProfFocusedConnector \ No newline at end of file diff --git a/profiling/papi-connector/CMakeLists.txt b/profiling/papi-connector/CMakeLists.txt new file mode 100644 index 000000000..478e996b1 --- /dev/null +++ b/profiling/papi-connector/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(kp_papi_connector SHARED kp_papi_connector.cpp) + +target_link_libraries(kp_papi_connector PRIVATE PAPI::PAPI) \ No newline at end of file diff --git a/profiling/simple-kernel-timer-json/Makefile b/profiling/simple-kernel-timer-json/Makefile deleted file mode 100644 index e676b3581..000000000 --- a/profiling/simple-kernel-timer-json/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -CXX=g++ -CXXFLAGS=-O3 -std=c++11 -g -SHARED_CXXFLAGS=-shared -fPIC - -all: kp_kernel_timer_json.so - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -CXXFLAGS+=-I${MAKEFILE_PATH} - -kp_kernel_timer_json.so: ${MAKEFILE_PATH}kp_kernel_timer.cpp ${MAKEFILE_PATH}kp_kernel_info.h - $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ ${MAKEFILE_PATH}kp_kernel_timer.cpp - -clean: - rm *.so diff --git a/profiling/simple-kernel-timer-json/kp_kernel_info.h b/profiling/simple-kernel-timer-json/kp_kernel_info.h deleted file mode 100644 index 5c75d1bf0..000000000 --- a/profiling/simple-kernel-timer-json/kp_kernel_info.h +++ /dev/null @@ -1,230 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact David Poliakoff (dzpolia@sandia.gov) -// -// ************************************************************************ -//@HEADER - -#ifndef _H_KOKKOSP_KERNEL_INFO -#define _H_KOKKOSP_KERNEL_INFO - -#include -#include -#include - -#if defined(__GXX_ABI_VERSION) -#define HAVE_GCC_ABI_DEMANGLE -#endif - -#if defined(HAVE_GCC_ABI_DEMANGLE) -#include -#endif // HAVE_GCC_ABI_DEMANGLE - -char* demangleName(char* kernelName) -{ -#if defined(HAVE_GCC_ABI_DEMANGLE) - int status = -1; - char* demangledKernelName = abi::__cxa_demangle(kernelName, NULL, NULL, &status); - if (status==0) { - free(kernelName); - kernelName = demangledKernelName; - } -#endif // HAVE_GCC_ABI_DEMANGLE - return kernelName; -} - -double seconds() { - struct timeval now; - gettimeofday(&now, NULL); - - return (double) (now.tv_sec + (now.tv_usec * 1.0e-6)); -} - -enum KernelExecutionType { - PARALLEL_FOR = 0, - PARALLEL_REDUCE = 1, - PARALLEL_SCAN = 2 -}; - -class KernelPerformanceInfo { - public: - KernelPerformanceInfo(std::string kName, KernelExecutionType kernelType) : - kType(kernelType) { - - kernelName = (char*) malloc(sizeof(char) * (kName.size() + 1)); - regionName = ""; - strcpy(kernelName, kName.c_str()); - - callCount = 0; - time = 0; - } - - ~KernelPerformanceInfo() { - free(kernelName); - } - - KernelExecutionType getKernelType() { - return kType; - } - - void incrementCount() { - callCount++; - } - - void addTime(double t) { - time += t; - timeSq += (t*t); - } - - void addFromTimer() { - addTime(seconds() - startTime); - - incrementCount(); - } - - void startTimer() { - startTime = seconds(); - } - - uint64_t getCallCount() { - return callCount; - } - - double getTime() { - return time; - } - - double getTimeSq() { - return timeSq; - } - - char* getName() { - return kernelName; - } - - void addCallCount(const uint64_t newCalls) { - callCount += newCalls; - } - - bool readFromFile(FILE* input) { - uint32_t recordLen = 0; - uint32_t actual_read = fread(&recordLen, sizeof(recordLen), 1, input); - if(actual_read != 1) return false; - - char* entry = (char*) malloc(recordLen); - fread(entry, recordLen, 1, input); - - uint32_t nextIndex = 0; - uint32_t kernelNameLength; - copy((char*) &kernelNameLength, &entry[nextIndex], sizeof(kernelNameLength)); - nextIndex += sizeof(kernelNameLength); - - if(strlen(kernelName) > 0) { - free(kernelName); - } - - kernelName = (char*) malloc( sizeof(char) * (kernelNameLength + 1)); - copy(kernelName, &entry[nextIndex], kernelNameLength); - kernelName[kernelNameLength] = '\0'; - - kernelName = demangleName(kernelName); - - nextIndex += kernelNameLength; - - copy((char*) &callCount, &entry[nextIndex], sizeof(callCount)); - nextIndex += sizeof(callCount); - - copy((char*) &time, &entry[nextIndex], sizeof(time)); - nextIndex += sizeof(time); - - copy((char*) &timeSq, &entry[nextIndex], sizeof(timeSq)); - nextIndex += sizeof(timeSq); - - uint32_t kernelT = 0; - copy((char*) &kernelT, &entry[nextIndex], sizeof(kernelT)); - nextIndex += sizeof(kernelT); - - if(kernelT == 0) { - kType = PARALLEL_FOR; - } else if(kernelT == 1) { - kType = PARALLEL_REDUCE; - } else if(kernelT == 2) { - kType = PARALLEL_SCAN; - } - - free(entry); - return true; - } - - void writeToFile(FILE* output, char* indent) { - fprintf(output, "%s{\n", indent); - - char* indentBuffer = (char*) malloc( sizeof(char) * 256 ); - sprintf(indentBuffer, "%s ", indent); - - fprintf(output, "%s\"kernel-name\" : \"%s\",\n", indentBuffer, kernelName); - fprintf(output, "%s\"region\" : \"%s\",\n", indentBuffer, regionName); - fprintf(output, "%s\"call-count\" : %lu,\n", indentBuffer, callCount); - fprintf(output, "%s\"total-time\" : %f,\n", indentBuffer, time); - fprintf(output, "%s\"time-per-call\" : %16.8f,\n", indentBuffer, (time / - static_cast(std::max( - static_cast(1), callCount)))); - fprintf(output, "%s\"kernel-type\" : \"%s\"\n", indentBuffer, - (kType == PARALLEL_FOR) ? "PARALLEL-FOR" : - (kType == PARALLEL_REDUCE) ? "PARALLEL-REDUCE" : "PARALLEL-SCAN"); - - fprintf(output, "%s}", indent); - } - - private: - void copy(char* dest, const char* src, uint32_t len) { - for(uint32_t i = 0; i < len; i++) { - dest[i] = src[i]; - } - } - - char* kernelName; - char* regionName; - uint64_t callCount; - double time; - double timeSq; - double startTime; - KernelExecutionType kType; -}; - -#endif diff --git a/profiling/simple-kernel-timer/CMakeLists.txt b/profiling/simple-kernel-timer/CMakeLists.txt new file mode 100644 index 000000000..74518bedd --- /dev/null +++ b/profiling/simple-kernel-timer/CMakeLists.txt @@ -0,0 +1,23 @@ +# shared global objects +add_library(kp_kernel_shared STATIC kp_shared.cpp) +list(APPEND EXPORT_TARGETS kp_kernel_shared) +set(EXPORT_TARGETS ${EXPORT_TARGETS} CACHE STRING "" FORCE) + +if(NOT MSVC) + set_property(TARGET kp_kernel_shared PROPERTY POSITION_INDEPENDENT_CODE ON) +endif() + +# Add JSON kernel-timer +kp_add_library(kp_kernel_timer_json kp_kernel_timer_json.cpp) +target_link_libraries(kp_kernel_timer_json PRIVATE kp_kernel_shared) + +# Add binary kernel-timer +kp_add_library(kp_kernel_timer kp_kernel_timer.cpp) +target_link_libraries(kp_kernel_timer PRIVATE kp_kernel_shared) + +# Add binary utilities +add_executable(kp_reader kp_reader.cpp) +target_link_libraries(kp_reader PRIVATE kp_kernel_timer) + +add_executable(kp_json_writer kp_json_writer.cpp) +target_link_libraries(kp_json_writer PRIVATE kp_kernel_timer) diff --git a/profiling/simple-kernel-timer/kp_json_writer.cpp b/profiling/simple-kernel-timer/kp_json_writer.cpp index 31a8b2adc..ceab113af 100644 --- a/profiling/simple-kernel-timer/kp_json_writer.cpp +++ b/profiling/simple-kernel-timer/kp_json_writer.cpp @@ -50,7 +50,9 @@ #include #include -#include "kp_kernel_info.h" +#include "kp_shared.h" + +using namespace KokkosTools::KernelTimer; // clang-format on bool is_region(KernelPerformanceInfo const& kp) { @@ -86,10 +88,6 @@ inline void write_json(std::ostream& os, KernelPerformanceInfo const& kp, } // clang-format off -bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, KernelPerformanceInfo* right) { - return left->getTime() > right->getTime(); -}; - int find_index(std::vector& kernels, const char* kernelName) { diff --git a/profiling/simple-kernel-timer/kp_kernel_info.h b/profiling/simple-kernel-timer/kp_kernel_info.h index 0fe3ec537..b1a78ad62 100644 --- a/profiling/simple-kernel-timer/kp_kernel_info.h +++ b/profiling/simple-kernel-timer/kp_kernel_info.h @@ -56,7 +56,9 @@ #include #endif // HAVE_GCC_ABI_DEMANGLE -char* demangleName(char* kernelName) +namespace KokkosTools::KernelTimer { + +inline char* demangleName(char* kernelName) { #if defined(HAVE_GCC_ABI_DEMANGLE) int status = -1; @@ -69,7 +71,7 @@ char* demangleName(char* kernelName) return kernelName; } -double seconds() { +inline double seconds() { struct timeval now; gettimeofday(&now, NULL); @@ -80,7 +82,7 @@ enum KernelExecutionType { PARALLEL_FOR = 0, PARALLEL_REDUCE = 1, PARALLEL_SCAN = 2, - REGION = 3 + REGION = 3 }; class KernelPerformanceInfo { @@ -90,6 +92,7 @@ class KernelPerformanceInfo { kernelName = (char*) malloc(sizeof(char) * (kName.size() + 1)); strcpy(kernelName, kName.c_str()); + // regionName = ""; callCount = 0; time = 0; @@ -194,7 +197,7 @@ class KernelPerformanceInfo { return true; } - void writeToFile(FILE* output) { + void writeToBinaryFile(FILE* output) { const uint32_t kernelNameLen = (uint32_t) strlen(kernelName); const uint32_t recordLen = @@ -232,6 +235,26 @@ class KernelPerformanceInfo { free(entry); } + void writeToJSONFile(FILE* output, const char* indent) { + fprintf(output, "%s{\n", indent); + + char* indentBuffer = (char*) malloc( sizeof(char) * 256 ); + sprintf(indentBuffer, "%s ", indent); + + fprintf(output, "%s\"kernel-name\" : \"%s\",\n", indentBuffer, kernelName); + // fprintf(output, "%s\"region\" : \"%s\",\n", indentBuffer, regionName); + fprintf(output, "%s\"call-count\" : %lu,\n", indentBuffer, callCount); + fprintf(output, "%s\"total-time\" : %f,\n", indentBuffer, time); + fprintf(output, "%s\"time-per-call\" : %16.8f,\n", indentBuffer, (time / + static_cast(std::max( + static_cast(1), callCount)))); + fprintf(output, "%s\"kernel-type\" : \"%s\"\n", indentBuffer, + (kType == PARALLEL_FOR) ? "PARALLEL-FOR" : + (kType == PARALLEL_REDUCE) ? "PARALLEL-REDUCE" : "PARALLEL-SCAN"); + + fprintf(output, "%s}", indent); + } + private: void copy(char* dest, const char* src, uint32_t len) { for(uint32_t i = 0; i < len; i++) { @@ -240,6 +263,7 @@ class KernelPerformanceInfo { } char* kernelName; + // const char* regionName; uint64_t callCount; double time; double timeSq; @@ -247,4 +271,6 @@ class KernelPerformanceInfo { KernelExecutionType kType; }; +} // namespace KokkosTools::KernelTimer + #endif diff --git a/profiling/simple-kernel-timer/kp_kernel_timer.cpp b/profiling/simple-kernel-timer/kp_kernel_timer.cpp index aeabe5073..71aeb85b2 100644 --- a/profiling/simple-kernel-timer/kp_kernel_timer.cpp +++ b/profiling/simple-kernel-timer/kp_kernel_timer.cpp @@ -40,70 +40,21 @@ // ************************************************************************ //@HEADER -#include -#include -#include -#include -#include -#include #include -#include #include -#include #include - #include -#include "kp_kernel_info.h" - -bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, KernelPerformanceInfo* right) { - return left->getTime() > right->getTime(); -}; - -static uint64_t uniqID = 0; -static KernelPerformanceInfo* currentEntry; -static std::map count_map; -static double initTime; -static char* outputDelimiter; -static int current_region_level = 0; -static KernelPerformanceInfo* regions[512]; - -#define MAX_STACK_SIZE 128 - -void increment_counter(const char* name, KernelExecutionType kType) { - std::string nameStr(name); - - if(count_map.find(name) == count_map.end()) { - KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); - count_map.insert(std::pair(nameStr, info)); - - currentEntry = info; - } else { - currentEntry = count_map[nameStr]; - } - - currentEntry->startTimer(); -} - -void increment_counter_region(const char* name, KernelExecutionType kType) { - std::string nameStr(name); - if(count_map.find(name) == count_map.end()) { - KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); - count_map.insert(std::pair(nameStr, info)); +#include "kp_core.hpp" +#include "kp_shared.h" - regions[current_region_level] = info; - } else { - regions[current_region_level] = count_map[nameStr]; - } - - regions[current_region_level]->startTimer(); - current_region_level++; -} +namespace KokkosTools { +namespace KernelTimer { -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { const char* output_delim_env = getenv("KOKKOSP_OUTPUT_DELIM"); if(NULL == output_delim_env) { @@ -117,12 +68,13 @@ extern "C" void kokkosp_init_library(const int loadSeq, // initialize regions to 0s so we know if there is an object there memset(®ions[0], 0, 512 * sizeof(KernelPerformanceInfo*)); - printf("KokkosP: Example Library Initialized (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); + printf("KokkosP: Example Library Initialized (sequence is %d, version: %llu)\n", + loadSeq, (long long unsigned int)interfaceVer); initTime = seconds(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { double finishTime = seconds(); double kernelTimes = 0; @@ -141,7 +93,7 @@ extern "C" void kokkosp_finalize_library() { std::vector kernelList; for(auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); kernel_itr++) { - kernel_itr->second->writeToFile(output_data); + kernel_itr->second->writeToBinaryFile(output_data); } fclose(output_data); @@ -265,7 +217,7 @@ extern "C" void kokkosp_finalize_library() { } -extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; if( (NULL == name) || (strcmp("", name) == 0) ) { @@ -276,11 +228,11 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devI increment_counter(name, PARALLEL_FOR); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; if( (NULL == name) || (strcmp("", name) == 0) ) { @@ -291,11 +243,11 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t dev increment_counter(name, PARALLEL_SCAN); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; if( (NULL == name) || (strcmp("", name) == 0) ) { @@ -306,29 +258,29 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t d increment_counter(name, PARALLEL_REDUCE); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_push_profile_region(char* regionName) { +void kokkosp_push_profile_region(char* regionName) { increment_counter_region(regionName, REGION); } -extern "C" void kokkosp_pop_profile_region() { +void kokkosp_pop_profile_region() { current_region_level--; - - // current_region_level is out of bounds, inform the user they + + // current_region_level is out of bounds, inform the user they // called popRegion too many times. if (current_region_level < 0) { current_region_level = 0; std::cerr << "WARNING:: Kokkos::Profiling::popRegion() called outside " << " of an actve region. Previous regions: "; - /* This code block will walk back through the non-null regions + /* This code block will walk back through the non-null regions * pointers and print the names. This takes advantage of a slight - * issue with regions logic: we never actually delete the + * issue with regions logic: we never actually delete the * KernelPerformanceInfo objects. If that ever changes this needs - * to be updated. + * to be updated. */ for (int i = 0; i < 5; i++) { if (regions[i] != 0 ) { @@ -343,3 +295,34 @@ extern "C" void kokkosp_pop_profile_region() { regions[current_region_level]->addFromTimer(); } } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +}} // namespace KokkosTools::KernelTimer + +extern "C" { + +namespace impl = KokkosTools::KernelTimer; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/simple-kernel-timer-json/kp_kernel_timer.cpp b/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp similarity index 69% rename from profiling/simple-kernel-timer-json/kp_kernel_timer.cpp rename to profiling/simple-kernel-timer/kp_kernel_timer_json.cpp index 8bab668e0..eab62714b 100644 --- a/profiling/simple-kernel-timer-json/kp_kernel_timer.cpp +++ b/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp @@ -40,51 +40,23 @@ // ************************************************************************ //@HEADER -#include -#include -#include -#include -#include -#include #include #include #include -#include - #include -#include "kp_kernel_info.h" - -bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, KernelPerformanceInfo* right) { - return left->getTime() > right->getTime(); -}; - -static uint64_t uniqID = 0; -static KernelPerformanceInfo* currentEntry; -static std::map count_map; -static double initTime; -static char* outputDelimiter; -#define MAX_STACK_SIZE 128 +#include "kp_core.hpp" +#include "kp_shared.h" -void increment_counter(const char* name, KernelExecutionType kType) { - std::string nameStr(name); - - if(count_map.find(name) == count_map.end()) { - KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); - count_map.insert(std::pair(nameStr, info)); - - currentEntry = info; - } else { - currentEntry = count_map[nameStr]; - } +using namespace KokkosTools::KernelTimer; - currentEntry->startTimer(); -} +namespace KokkosTools { +namespace KernelTimerJSON { -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { const char* output_delim_env = getenv("KOKKOSP_OUTPUT_DELIM"); if(NULL == output_delim_env) { @@ -95,30 +67,31 @@ extern "C" void kokkosp_init_library(const int loadSeq, sprintf(outputDelimiter, "%s", output_delim_env); } - printf("KokkosP: LDMS JSON Connector Initialized (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); + printf("KokkosP: LDMS JSON Connector Initialized (sequence is %d, version: %llu)\n", + loadSeq, (long long unsigned int)interfaceVer); initTime = seconds(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { double finishTime = seconds(); double kernelTimes = 0; - + char* mpi_rank = getenv("OMPI_COMM_WORLD_RANK"); - + char* hostname = (char*) malloc(sizeof(char) * 256); gethostname(hostname, 256); - + char* fileOutput = (char*) malloc(sizeof(char) * 256); sprintf(fileOutput, "%s-%d-%s.json", hostname, (int) getpid(), (NULL == mpi_rank) ? "0" : mpi_rank); - + free(hostname); FILE* output_data = fopen(fileOutput, "w"); const double totalExecuteTime = (finishTime - initTime); std::vector kernelList; - + for(auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); kernel_itr++) { kernelList.push_back(kernel_itr->second); kernelTimes += kernel_itr->second->getTime(); @@ -127,26 +100,26 @@ extern "C" void kokkosp_finalize_library() { std::sort(kernelList.begin(), kernelList.end(), compareKernelPerformanceInfo); fprintf(output_data, "{\n\"kokkos-kernel-data\" : {\n"); - fprintf(output_data, " \"mpi-rank\" : %s,\n", + fprintf(output_data, " \"mpi-rank\" : %s,\n", (NULL == mpi_rank) ? "0" : mpi_rank); fprintf(output_data, " \"total-app-time\" : %10.3f,\n", totalExecuteTime); fprintf(output_data, " \"total-kernel-times\" : %10.3f,\n", kernelTimes); fprintf(output_data, " \"total-non-kernel-times\" : %10.3f,\n", (totalExecuteTime - kernelTimes)); - + const double percentKokkos = (kernelTimes / totalExecuteTime) * 100.0; fprintf(output_data, " \"percent-in-kernels\" : %6.2f,\n", percentKokkos); fprintf(output_data, " \"unique-kernel-calls\" : %22lu,\n", (uint64_t) count_map.size()); fprintf(output_data, "\n"); - + fprintf(output_data, " \"kernel-perf-info\" : [\n"); - + #define KERNEL_INFO_INDENT " " - + bool print_comma = false; for(auto const& kernel : count_map) { if (print_comma) fprintf(output_data, ",\n"); - kernel.second->writeToFile(output_data, KERNEL_INFO_INDENT); + kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT); print_comma = true; } @@ -156,7 +129,7 @@ extern "C" void kokkosp_finalize_library() { fclose(output_data); } -extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; if( (NULL == name) || (strcmp("", name) == 0) ) { @@ -167,11 +140,11 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devI increment_counter(name, PARALLEL_FOR); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; if( (NULL == name) || (strcmp("", name) == 0) ) { @@ -182,11 +155,11 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t dev increment_counter(name, PARALLEL_SCAN); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { currentEntry->addFromTimer(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; if( (NULL == name) || (strcmp("", name) == 0) ) { @@ -197,7 +170,37 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t d increment_counter(name, PARALLEL_REDUCE); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { currentEntry->addFromTimer(); } +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +}} // namespace KokkosTools::KernelTimerJSON + +extern "C" { + +namespace impl = KokkosTools::KernelTimerJSON; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/simple-kernel-timer/kp_reader.cpp b/profiling/simple-kernel-timer/kp_reader.cpp index 09aa11134..6f9acd767 100644 --- a/profiling/simple-kernel-timer/kp_reader.cpp +++ b/profiling/simple-kernel-timer/kp_reader.cpp @@ -47,11 +47,9 @@ #include #include -#include "kp_kernel_info.h" +#include "kp_shared.h" -bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, KernelPerformanceInfo* right) { - return left->getTime() > right->getTime(); -}; +using namespace KokkosTools::KernelTimer; int find_index(std::vector& kernels, const char* kernelName) { diff --git a/profiling/memory-usage/kp_memory_events.hpp b/profiling/simple-kernel-timer/kp_shared.cpp similarity index 59% rename from profiling/memory-usage/kp_memory_events.hpp rename to profiling/simple-kernel-timer/kp_shared.cpp index 16c7e2cba..1d8c313d5 100644 --- a/profiling/memory-usage/kp_memory_events.hpp +++ b/profiling/simple-kernel-timer/kp_shared.cpp @@ -1,14 +1,13 @@ -/* //@HEADER // ************************************************************************ -// +// // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). -// +// // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,52 +35,22 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact David Poliakoff (dzpolia@sandia.gov) -// +// Questions? Contact David Poliakoff (dzpolia@sandia.gov) +// // ************************************************************************ //@HEADER -// */ - -#define MEMOP_ALLOCATE 1 -#define MEMOP_DEALLOCATE 2 -#include - -struct SpaceHandle { - char name[64]; -}; - -char space_name[16][64]; - -struct EventRecord { - const void* ptr; - uint64_t size; - int operation; - int space; - double time; - char name[256]; +#include "kp_shared.h" - EventRecord(const void* const ptr_, const uint64_t size_, const int operation_, - const int space_, const double time_, const char* const name_) { - ptr = ptr_; - size = size_; - operation = operation_; - space = space_; - time = time_; - strncpy(name,name_,256); - } +namespace KokkosTools { +namespace KernelTimer { - void print_record() const { - if(operation == MEMOP_ALLOCATE) - printf("%lf %16p %14d %16s Allocate %s\n",time,ptr,size,space<0?"":space_name[space],name); - if(operation == MEMOP_DEALLOCATE) - printf("%lf %16p %14d %16s DeAllocate %s\n",time,ptr,-size,space<0?"":space_name[space],name); - } - void print_record(FILE* ofile) const { - if(operation == MEMOP_ALLOCATE) - fprintf(ofile,"%lf %16p %14d %16s Allocate %s\n",time,ptr,size,space<0?"":space_name[space],name); - if(operation == MEMOP_DEALLOCATE) - fprintf(ofile,"%lf %16p %14d %16s DeAllocate %s\n",time,ptr,-size,space<0?"":space_name[space],name); - } -}; +uint64_t uniqID = 0; +KernelPerformanceInfo* currentEntry; +std::map count_map; +double initTime; +char* outputDelimiter; +int current_region_level = 0; +KernelPerformanceInfo* regions[512]; +}} // namespace KokkosTools::KernelTimer \ No newline at end of file diff --git a/profiling/simple-kernel-timer/kp_shared.h b/profiling/simple-kernel-timer/kp_shared.h new file mode 100644 index 000000000..0d73cd86c --- /dev/null +++ b/profiling/simple-kernel-timer/kp_shared.h @@ -0,0 +1,98 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact David Poliakoff (dzpolia@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef _H_KOKKOSP_KERNEL_SHARED +#define _H_KOKKOSP_KERNEL_SHARED + +#include +#include +#include "kp_kernel_info.h" + +namespace KokkosTools::KernelTimer { + +extern uint64_t uniqID; +extern KernelPerformanceInfo* currentEntry; +extern std::map count_map; +extern double initTime; +extern char* outputDelimiter; +extern int current_region_level; +extern KernelPerformanceInfo* regions[512]; + +inline void increment_counter(const char* name, KernelExecutionType kType) { + std::string nameStr(name); + + if(count_map.find(name) == count_map.end()) { + KernelPerformanceInfo *info = new KernelPerformanceInfo(nameStr, kType); + count_map.insert(std::pair(nameStr, info)); + + currentEntry = info; + } else { + currentEntry = count_map[nameStr]; + } + + currentEntry->startTimer(); +} + +inline void increment_counter_region(const char* name, KernelExecutionType kType) { + std::string nameStr(name); + + if(count_map.find(name) == count_map.end()) { + KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); + count_map.insert(std::pair(nameStr, info)); + + regions[current_region_level] = info; + } else { + regions[current_region_level] = count_map[nameStr]; + } + + regions[current_region_level]->startTimer(); + current_region_level++; +} + +inline bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, KernelPerformanceInfo* right) +{ + return left->getTime() > right->getTime(); +}; + +} // namespace KokkosTools::KernelTimer + +#endif // _H_KOKKOSP_KERNEL_SHARED \ No newline at end of file diff --git a/profiling/space-time-stack/CMakeLists.txt b/profiling/space-time-stack/CMakeLists.txt new file mode 100644 index 000000000..2cc44373d --- /dev/null +++ b/profiling/space-time-stack/CMakeLists.txt @@ -0,0 +1,5 @@ +kp_add_library(kp_space_time_stack kp_space_time_stack.cpp) + +if(USE_MPI) + target_link_libraries(kp_space_time_stack PRIVATE MPI::MPI_CXX) +endif() \ No newline at end of file diff --git a/profiling/space-time-stack/kp_space_time_stack.cpp b/profiling/space-time-stack/kp_space_time_stack.cpp index 6b9ad39aa..eb00fd987 100644 --- a/profiling/space-time-stack/kp_space_time_stack.cpp +++ b/profiling/space-time-stack/kp_space_time_stack.cpp @@ -57,9 +57,7 @@ #include #include -#ifndef USE_MPI -#define USE_MPI 1 -#endif +#include "kp_core.hpp" #if USE_MPI #include @@ -67,15 +65,8 @@ #include -namespace { - -struct KokkosPDeviceInfo { - std::uint32_t deviceID; -}; - -struct SpaceHandle { - char name[64]; -}; +namespace KokkosTools { +namespace SpaceTimeStack { enum Space { SPACE_HOST, @@ -516,10 +507,10 @@ struct StackNode { struct Allocation { std::string name; - void* ptr; + const void* ptr; std::uint64_t size; StackNode* frame; - Allocation(std::string&& name_in, void* ptr_in, std::uint64_t size_in, + Allocation(std::string&& name_in, const void* ptr_in, std::uint64_t size_in, StackNode* frame_in): name(std::move(name_in)),ptr(ptr_in),size(size_in),frame(frame_in) { } @@ -533,14 +524,14 @@ struct Allocations { std::uint64_t total_size; std::set alloc_set; Allocations():total_size(0) {} - void allocate(std::string&& name, void* ptr, std::uint64_t size, + void allocate(std::string&& name, const void* ptr, std::uint64_t size, StackNode* frame) { auto res = alloc_set.emplace( Allocation(std::move(name), ptr, size, frame)); assert(res.second); total_size += size; } - void deallocate(std::string&& name, void* ptr, std::uint64_t size, + void deallocate(std::string&& name, const void* ptr, std::uint64_t size, StackNode* frame) { auto key = Allocation(std::move(name), ptr, size, frame); auto it = alloc_set.find(key); @@ -724,14 +715,14 @@ struct State { void pop_region() { end_frame(now()); } - void allocate(Space space, const char* name, void* ptr, std::uint64_t size) { + void allocate(Space space, const char* name, const void* ptr, std::uint64_t size) { current_allocations[space].allocate( std::string(name), ptr, size, stack_frame); if (current_allocations[space].total_size > hwm_allocations[space].total_size) { hwm_allocations[space] = current_allocations[space]; } } - void deallocate(Space space, const char* name, void* ptr, std::uint64_t size) { + void deallocate(Space space, const char* name, const void* ptr, std::uint64_t size) { current_allocations[space].deallocate( std::string(name), ptr, size, stack_frame); } @@ -754,72 +745,70 @@ struct State { State* global_state = nullptr; -} // end anonymous namespace - -extern "C" void kokkosp_init_library( - int loadseq, uint64_t, uint32_t ndevinfos, KokkosPDeviceInfo* devinfos) { - (void)loadseq; - (void)ndevinfos; - (void)devinfos; +void kokkosp_init_library( + int /* loadseq */, + uint64_t /* interfaceVer */, + uint32_t /* ndevinfos */, + Kokkos_Profiling_KokkosPDeviceInfo* /* devinfos */) { global_state = new State(); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { delete global_state; global_state = nullptr; } -extern "C" void kokkosp_begin_parallel_for( +void kokkosp_begin_parallel_for( const char* name, std::uint32_t devid, std::uint64_t* kernid) { (void) devid; *kernid = global_state->begin_kernel(name, STACK_FOR); } -extern "C" void kokkosp_begin_parallel_reduce( +void kokkosp_begin_parallel_reduce( const char* name, std::uint32_t devid, std::uint64_t* kernid) { (void) devid; *kernid = global_state->begin_kernel(name, STACK_REDUCE); } -extern "C" void kokkosp_begin_parallel_scan( +void kokkosp_begin_parallel_scan( const char* name, std::uint32_t devid, std::uint64_t* kernid) { (void) devid; *kernid = global_state->begin_kernel(name, STACK_SCAN); } -extern "C" void kokkosp_end_parallel_for(std::uint64_t kernid) { +void kokkosp_end_parallel_for(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_end_parallel_reduce(std::uint64_t kernid) { +void kokkosp_end_parallel_reduce(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_end_parallel_scan(std::uint64_t kernid) { +void kokkosp_end_parallel_scan(std::uint64_t kernid) { global_state->end_kernel(kernid); } -extern "C" void kokkosp_push_profile_region(const char* name) { +void kokkosp_push_profile_region(const char* name) { global_state->push_region(name); } -extern "C" void kokkosp_pop_profile_region() { +void kokkosp_pop_profile_region() { global_state->pop_region(); } -extern "C" void kokkosp_allocate_data( - SpaceHandle handle, const char* name, void* ptr, uint64_t size) { +void kokkosp_allocate_data( + SpaceHandle handle, const char* name, const void* ptr, uint64_t size) { auto space = get_space(handle); global_state->allocate(space, name, ptr, size); } -extern "C" void kokkosp_deallocate_data( - SpaceHandle handle, const char* name, void* ptr, uint64_t size) { +void kokkosp_deallocate_data( + SpaceHandle handle, const char* name, const void* ptr, uint64_t size) { auto space = get_space(handle); global_state->deallocate(space, name, ptr, size); } -extern "C" void kokkosp_begin_deep_copy( +void kokkosp_begin_deep_copy( SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr, SpaceHandle src_handle, const char* src_name, const void* src_ptr, uint64_t size) { @@ -829,6 +818,47 @@ extern "C" void kokkosp_begin_deep_copy( src_space, src_name, src_ptr, size); } -extern "C" void kokkosp_end_deep_copy() { +void kokkosp_end_deep_copy() { global_state->end_deep_copy(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.allocate_data = kokkosp_allocate_data; + my_event_set.deallocate_data = kokkosp_deallocate_data; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +}} // KokkosTools::SpaceTimeStack + +extern "C" { + +namespace impl = KokkosTools::SpaceTimeStack; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" \ No newline at end of file diff --git a/profiling/systemtap-connector/CMakeLists.txt b/profiling/systemtap-connector/CMakeLists.txt new file mode 100644 index 000000000..6fac2e459 --- /dev/null +++ b/profiling/systemtap-connector/CMakeLists.txt @@ -0,0 +1,20 @@ +set(PROBES_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/probes.d) +set(PROBES_HEADER ${CMAKE_CURRENT_BINARY_DIR}/probes.h) +set(PROBES_OBJECT ${CMAKE_CURRENT_BINARY_DIR}/probes.o) + +# Note: connect external/generated object file via imported object library +add_custom_command(OUTPUT ${PROBES_OBJECT} + COMMAND dtrace -C -G -s ${PROBES_SOURCE} -o ${PROBES_OBJECT} + DEPENDS ${PROBES_SOURCE} VERBATIM) +add_library(kp_systemtap_probe OBJECT IMPORTED) +set_property(TARGET kp_systemtap_probe PROPERTY IMPORTED_OBJECTS ${PROBES_OBJECT}) + +kp_add_library(kp_systemtap_connector kp_systemtap_connector.cpp ${PROBES_HEADER} + $) +set_property(SOURCE ${PROBES_HEADER} PROPERTY HEADER_FILE_ONLY ON) + +# Note: connect generated header +target_include_directories(kp_systemtap_connector PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +add_custom_command(OUTPUT ${PROBES_HEADER} + COMMAND dtrace -C -h -s ${PROBES_SOURCE} -o ${PROBES_HEADER} + DEPENDS ${PROBES_SOURCE} VERBATIM) diff --git a/profiling/systemtap-connector/kp_systemtap_connector.cpp b/profiling/systemtap-connector/kp_systemtap_connector.cpp index 410121156..57de9aa16 100644 --- a/profiling/systemtap-connector/kp_systemtap_connector.cpp +++ b/profiling/systemtap-connector/kp_systemtap_connector.cpp @@ -51,15 +51,15 @@ #include #include "probes.h" +#include "kp_core.hpp" -struct SpaceHandle { - char name[64]; -}; +namespace KokkosTools { +namespace SystemtapConnector { static uint64_t next_kernid; static uint32_t next_sec_id; -extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devid, uint64_t* kernid) +void kokkosp_begin_parallel_for(const char* name, const uint32_t devid, uint64_t* kernid) { *kernid = next_kernid++; if ( KOKKOS_END_PARALLEL_FOR_ENABLED()) { @@ -67,7 +67,7 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devi } } -extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devid, uint64_t* kernid) +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devid, uint64_t* kernid) { *kernid = next_kernid++; if (KOKKOS_BEGIN_PARALLEL_SCAN_ENABLED()) { @@ -75,7 +75,7 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t dev } } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devid, uint64_t* kernid) +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devid, uint64_t* kernid) { *kernid = next_kernid++; if (KOKKOS_BEGIN_PARALLEL_REDUCE_ENABLED()) { @@ -83,73 +83,73 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t d } } -extern "C" void kokkosp_end_parallel_scan(uint64_t kernid) +void kokkosp_end_parallel_scan(uint64_t kernid) { if (KOKKOS_END_PARALLEL_SCAN_ENABLED()) { KOKKOS_END_PARALLEL_SCAN(kernid); } } -extern "C" void kokkosp_end_parallel_for(uint64_t kernid) +void kokkosp_end_parallel_for(uint64_t kernid) { if (KOKKOS_END_PARALLEL_FOR_ENABLED()) { KOKKOS_END_PARALLEL_FOR(kernid); } } -extern "C" void kokkosp_end_parallel_reduce(uint64_t kernid) +void kokkosp_end_parallel_reduce(uint64_t kernid) { if (KOKKOS_END_PARALLEL_REDUCE_ENABLED()) { KOKKOS_END_PARALLEL_REDUCE(kernid); } } -extern "C" void kokkosp_init_library(const int loadseq, +void kokkosp_init_library(const int loadseq, const uint64_t version, const uint32_t ndevinfos, - void* deviceinfos) + Kokkos_Profiling_KokkosPDeviceInfo* deviceinfos) { if (KOKKOS_INIT_LIBRARY_ENABLED()) { KOKKOS_INIT_LIBRARY(loadseq, version, ndevinfos, deviceinfos); } } -extern "C" void kokkosp_finalize_library() +void kokkosp_finalize_library() { if (KOKKOS_FINALIZE_LIBRARY_ENABLED()) { KOKKOS_FINALIZE_LIBRARY(); } } -extern "C" void kokkosp_push_profile_region(const char* name) +void kokkosp_push_profile_region(const char* name) { if (KOKKOS_PUSH_PROFILE_REGION_ENABLED()) { KOKKOS_PUSH_PROFILE_REGION(name); } } -extern "C" void kokkosp_pop_profile_region() +void kokkosp_pop_profile_region() { if (KOKKOS_POP_PROFILE_REGION_ENABLED()) { KOKKOS_POP_PROFILE_REGION(); } } -extern "C" void kokkosp_allocate_data(SpaceHandle handle, const char* name, void* ptr, uint64_t size) +void kokkosp_allocate_data(SpaceHandle handle, const char* name, const void* ptr, uint64_t size) { if (KOKKOS_ALLOCATE_DATA_ENABLED()) { KOKKOS_ALLOCATE_DATA(handle, name, ptr, size); } } -extern "C" void kokkosp_deallocate_data(SpaceHandle handle, const char* name, void* ptr, uint64_t size) +void kokkosp_deallocate_data(SpaceHandle handle, const char* name, const void* ptr, uint64_t size) { if (KOKKOS_DEALLOCATE_DATA_ENABLED()) { KOKKOS_DEALLOCATE_DATA(handle, name, ptr, size); } } -extern "C" void kokkosp_begin_deep_copy( +void kokkosp_begin_deep_copy( SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr, SpaceHandle src_handle, const char* src_name, const void* src_ptr, uint64_t size) @@ -161,14 +161,14 @@ extern "C" void kokkosp_begin_deep_copy( } } -extern "C" void kokkosp_end_deep_copy() +void kokkosp_end_deep_copy() { if (KOKKOS_END_DEEP_COPY_ENABLED()) { KOKKOS_END_DEEP_COPY(); } } -extern "C" void kokkosp_create_profile_section(const char* name, uint32_t* sec_id) +void kokkosp_create_profile_section(const char* name, uint32_t* sec_id) { *sec_id = next_sec_id++; if (KOKKOS_CREATE_PROFILE_SECTION_ENABLED()) { @@ -176,30 +176,82 @@ extern "C" void kokkosp_create_profile_section(const char* name, uint32_t* sec_i } } -extern "C" void kokkosp_start_profile_section(const uint32_t sec_id) +void kokkosp_start_profile_section(const uint32_t sec_id) { if (KOKKOS_START_PROFILE_SECTION_ENABLED()) { KOKKOS_START_PROFILE_SECTION(sec_id); } } -extern "C" void kokkosp_stop_profile_section(const uint32_t sec_id) +void kokkosp_stop_profile_section(const uint32_t sec_id) { if (KOKKOS_STOP_PROFILE_SECTION_ENABLED()) { KOKKOS_STOP_PROFILE_SECTION(sec_id); } } -extern "C" void kokkosp_destroy_profile_section(const uint32_t sec_id) +void kokkosp_destroy_profile_section(const uint32_t sec_id) { if (KOKKOS_DESTROY_PROFILE_SECTION_ENABLED()) { KOKKOS_DESTROY_PROFILE_SECTION(sec_id); } } -extern "C" void kokkosp_profile_event(const char* name) +void kokkosp_profile_event(const char* name) { if (KOKKOS_PROFILE_EVENT_ENABLED()) { KOKKOS_PROFILE_EVENT(name); } } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.push_region = kokkosp_push_profile_region; + my_event_set.pop_region = kokkosp_pop_profile_region; + my_event_set.allocate_data = kokkosp_allocate_data; + my_event_set.deallocate_data = kokkosp_deallocate_data; + my_event_set.begin_deep_copy = kokkosp_begin_deep_copy; + my_event_set.end_deep_copy = kokkosp_end_deep_copy; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + my_event_set.create_profile_section = kokkosp_create_profile_section; + my_event_set.start_profile_section = kokkosp_start_profile_section; + my_event_set.stop_profile_section = kokkosp_stop_profile_section; + my_event_set.destroy_profile_section = kokkosp_destroy_profile_section; + my_event_set.profile_event = kokkosp_profile_event; + return my_event_set; +} + +}} // namespace KokkosTools::SystemtapConnector + + +extern "C" { + +namespace impl = KokkosTools::SystemtapConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) +EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_ALLOCATE(impl::kokkosp_allocate_data) +EXPOSE_DEALLOCATE(impl::kokkosp_deallocate_data) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section) +EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section) +EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section) +EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section) +EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event) + +} // extern "C" \ No newline at end of file diff --git a/profiling/variorum-connector/CMakeLists.txt b/profiling/variorum-connector/CMakeLists.txt new file mode 100644 index 000000000..ce74690a2 --- /dev/null +++ b/profiling/variorum-connector/CMakeLists.txt @@ -0,0 +1,9 @@ +# Based on Makefile authored by Zachary S. Frye (CASC at LLNL) in July 2020 + +kp_add_library(kp_variorum_connector variorum-connector.cpp) + +target_link_libraries(kp_variorum_connector PRIVATE variorum::variorum) + +if(USE_MPI) + target_link_libraries(kp_variorum_connector PRIVATE MPI::MPI_CXX) +endif() diff --git a/profiling/variorum-connector/Makefile b/profiling/variorum-connector/Makefile index 8018ba6c5..3bc9964b7 100644 --- a/profiling/variorum-connector/Makefile +++ b/profiling/variorum-connector/Makefile @@ -1,4 +1,4 @@ -#Author: Zachary S. Frye +# Author: Zachary S. Frye #Organization: CASC at LLNL #Date: July 2020 #Description: This is a simple makefile for testing and developing the Kokkos-variorum connector diff --git a/profiling/variorum-connector/variorum-connector.cpp b/profiling/variorum-connector/variorum-connector.cpp index b8928494b..61c19a2a3 100644 --- a/profiling/variorum-connector/variorum-connector.cpp +++ b/profiling/variorum-connector/variorum-connector.cpp @@ -66,14 +66,14 @@ extern "C" { #include } -#ifndef USE_MPI -#define USE_MPI 1 -#endif +#include "kp_core.hpp" #if USE_MPI #include #endif +namespace KokkosTools { +namespace VariorumConnector { bool filterKernels; uint64_t nextKernelID; @@ -181,6 +181,7 @@ char * variorum_json_call() { //Pre: None //Post: An output message if variourum returned an error or if it functioned correctly void variorum_call_mpi() { +#if USE_MPI if(usingMPI == true) { int rank; std::string output; @@ -239,7 +240,7 @@ void variorum_call_mpi() { } file.close(); } - +#endif } //Function: variorum_call @@ -266,8 +267,8 @@ void variorum_call() { } -extern "C" void kokkosp_init_library(const int loadSeq, - const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, + const uint32_t devInfoCount, Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { char * outputPathChar; try { @@ -337,6 +338,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, throw 20; } if (strcmp(usingMPIstr, "true") == 0) { +#if USE_MPI usingMPI = true; try{ char* perRankOutput = getenv("RANKED_OUTPUT"); @@ -355,6 +357,10 @@ extern "C" void kokkosp_init_library(const int loadSeq, std::cout << "Ranked output will no be used, error setting paramters" << std::endl; mpiOutPut = false; } +#else + usingMPI = false; + std::cout << "Ignoring MPI enabled in Variorum: the connector was built without MPI support" << std::endl; +#endif } } catch (int e) { std::cout << "No MPI Option provided, not using per rank output" << std::endl; @@ -368,7 +374,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { if(usingMPI) { variorum_call_mpi(); @@ -385,7 +391,7 @@ extern "C" void kokkosp_finalize_library() { std::cout << "The kokkos library was alive for " << total_time << " seconds." << std::endl; } -extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { std::cout << "Device ID: " << devID << "\n"; if(usingMPI) { variorum_call_mpi(); @@ -395,7 +401,7 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devI } } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { if(usingMPI) { variorum_call_mpi(); } @@ -404,7 +410,7 @@ extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { } } -extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { std::cout << "Device ID: " << devID << "\n"; if(usingMPI) { variorum_call_mpi(); @@ -414,7 +420,7 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t dev } } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { if(usingMPI) { variorum_call_mpi(); } @@ -423,7 +429,7 @@ extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { } } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { std::cout << "Device ID: " << devID << "\n"; if(usingMPI) { variorum_call_mpi(); @@ -433,7 +439,7 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t d } } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { if(usingMPI) { variorum_call_mpi(); } @@ -442,4 +448,33 @@ extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { } } +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +}} // namespace KokkosTools::VariorumConnector + +extern "C" { + +namespace impl = KokkosTools::VariorumConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +} // extern "C" \ No newline at end of file diff --git a/profiling/vtune-connector/CMakeLists.txt b/profiling/vtune-connector/CMakeLists.txt new file mode 100644 index 000000000..8a795e518 --- /dev/null +++ b/profiling/vtune-connector/CMakeLists.txt @@ -0,0 +1,5 @@ +find_package(ITT REQUIRED) + +kp_add_library(kp_vtune_connector kp_vtune_connector.cpp) + +target_link_libraries(kp_vtune_connector ittapi) diff --git a/profiling/vtune-connector/kp_vtune_connector.cpp b/profiling/vtune-connector/kp_vtune_connector.cpp index 531827470..aaf5d3016 100644 --- a/profiling/vtune-connector/kp_vtune_connector.cpp +++ b/profiling/vtune-connector/kp_vtune_connector.cpp @@ -46,18 +46,21 @@ #include #include #include -#include +#include "kp_core.hpp" #include "kp_vtune_connector_domain.h" +namespace KokkosTools { +namespace VTuneConnector { + static KernelVTuneConnectorInfo* currentKernel; static std::unordered_map domain_map; static uint64_t nextKernelID; -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("-----------------------------------------------------------\n"); printf("KokkosP: VTune Analyzer Connector (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); @@ -69,7 +72,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, __itt_event_start(startEv); } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf("KokkosP: Finalization of VTune Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); @@ -79,7 +82,7 @@ extern "C" void kokkosp_finalize_library() { __itt_event_start(finalEv); } -extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; std::string nameStr(name); @@ -97,12 +100,12 @@ extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devI __itt_frame_begin_v3(currentKernel->getDomain(), NULL); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { __itt_frame_end_v3(currentKernel->getDomain(), NULL); currentKernel = NULL; } -extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; std::string nameStr(name); @@ -121,12 +124,12 @@ extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t dev } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { __itt_frame_end_v3(currentKernel->getDomain(), NULL); currentKernel = NULL; } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; std::string nameStr(name); @@ -145,8 +148,38 @@ extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t d } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { __itt_frame_end_v3(currentKernel->getDomain(), NULL); currentKernel = NULL; } +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +}} // namespace KokkosTools::VTuneConnector + +extern "C" { + +namespace impl = KokkosTools::VTuneConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/vtune-connector/kp_vtune_connector_domain.h b/profiling/vtune-connector/kp_vtune_connector_domain.h index 7438715af..bb8483cc9 100644 --- a/profiling/vtune-connector/kp_vtune_connector_domain.h +++ b/profiling/vtune-connector/kp_vtune_connector_domain.h @@ -44,11 +44,13 @@ #define _H_KOKKOSP_KERNEL_VTUNE_CONNECTOR_INFO #include -#include #include #include "ittnotify.h" +namespace KokkosTools { +namespace VTuneConnector { + enum KernelExecutionType { PARALLEL_FOR = 0, PARALLEL_REDUCE = 1, @@ -94,4 +96,6 @@ class KernelVTuneConnectorInfo { __itt_string_handle* domainNameHandle; }; -#endif +}} // namespace KokkosTools::VTuneConnector + +#endif // _H_KOKKOSP_KERNEL_VTUNE_CONNECTOR_INFO diff --git a/profiling/vtune-focused-connector/CMakeLists.txt b/profiling/vtune-focused-connector/CMakeLists.txt new file mode 100644 index 000000000..192a43add --- /dev/null +++ b/profiling/vtune-focused-connector/CMakeLists.txt @@ -0,0 +1,5 @@ +find_package(ITT REQUIRED) + +kp_add_library(kp_vtune_focused_connector kp_vtune_focused_connector.cpp) + +target_link_libraries(kp_vtune_focused_connector ittapi) \ No newline at end of file diff --git a/profiling/vtune-focused-connector/kp_vtune_focused_connector.cpp b/profiling/vtune-focused-connector/kp_vtune_focused_connector.cpp index d2b56a15b..7fc5a28f2 100644 --- a/profiling/vtune-focused-connector/kp_vtune_focused_connector.cpp +++ b/profiling/vtune-focused-connector/kp_vtune_focused_connector.cpp @@ -46,18 +46,21 @@ #include #include #include -#include +#include "kp_core.hpp" #include "kp_vtune_focused_connector_domain.h" +namespace KokkosTools { +namespace VTuneFocusedConnector { + static KernelVTuneFocusedConnectorInfo* currentKernel; static std::unordered_map domain_map; static uint64_t nextKernelID; -extern "C" void kokkosp_init_library(const int loadSeq, +void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, - void* deviceInfo) { + Kokkos_Profiling_KokkosPDeviceInfo* deviceInfo) { printf("-----------------------------------------------------------\n"); printf("KokkosP: VTune Analyzer Connector (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); @@ -97,7 +100,7 @@ void focusedConnectorExecuteEnd() { currentKernel = NULL; } -extern "C" void kokkosp_finalize_library() { +void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf("KokkosP: Finalization of VTune Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); @@ -105,35 +108,66 @@ extern "C" void kokkosp_finalize_library() { __itt_detach(); } -extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { +void kokkosp_end_parallel_for(const uint64_t kID) { focusedConnectorExecuteEnd(); } -extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_SCAN); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { +void kokkosp_end_parallel_scan(const uint64_t kID) { focusedConnectorExecuteEnd(); } -extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { +void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_REDUCE); focusedConnectorExecuteStart(); } -extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { +void kokkosp_end_parallel_reduce(const uint64_t kID) { focusedConnectorExecuteEnd(); } + +Kokkos::Tools::Experimental::EventSet get_event_set() { + Kokkos::Tools::Experimental::EventSet my_event_set; + memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.init = kokkosp_init_library; + my_event_set.finalize = kokkosp_finalize_library; + my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; + my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; + my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; + my_event_set.end_parallel_for = kokkosp_end_parallel_for; + my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; + my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; + return my_event_set; +} + +}} // namespace KokkosTools::VTuneFocusedConnector + +extern "C" { + +namespace impl = KokkosTools::VTuneFocusedConnector; + +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) + +} // extern "C" diff --git a/profiling/vtune-focused-connector/kp_vtune_focused_connector_domain.h b/profiling/vtune-focused-connector/kp_vtune_focused_connector_domain.h index 916ec0119..79026ac37 100644 --- a/profiling/vtune-focused-connector/kp_vtune_focused_connector_domain.h +++ b/profiling/vtune-focused-connector/kp_vtune_focused_connector_domain.h @@ -44,11 +44,13 @@ #define _H_KOKKOSP_KERNEL_VTUNE_CONNECTOR_INFO #include -#include #include #include "ittnotify.h" +namespace KokkosTools { +namespace VTuneFocusedConnector { + enum KernelExecutionType { PARALLEL_FOR = 0, PARALLEL_REDUCE = 1, @@ -93,5 +95,6 @@ class KernelVTuneFocusedConnectorInfo { __itt_domain* domain; __itt_string_handle* domainNameHandle; }; +}} // namespace KokkosTools::VTuneFocusedConnector #endif diff --git a/tpls/Caliper b/tpls/Caliper new file mode 160000 index 000000000..b4314be9d --- /dev/null +++ b/tpls/Caliper @@ -0,0 +1 @@ +Subproject commit b4314be9dcdfcc1c28854e545a7cf1bcd34141d4 diff --git a/tpls/apex b/tpls/apex new file mode 160000 index 000000000..48b7831c3 --- /dev/null +++ b/tpls/apex @@ -0,0 +1 @@ +Subproject commit 48b7831c30c4202bc3c655c2bc4f552217b1eb00