Skip to content

Commit fbd08fb

Browse files
pruthvistonypytorchmergebot
authored andcommitted
Introduce TORCH_DISABLE_GPU_ASSERTS (pytorch#84190)
- Asserts for CUDA are enabled by default - Disabled for ROCm by default by setting `TORCH_DISABLE_GPU_ASSERTS` to `ON` - Can be enabled for ROCm by setting above variable to`OFF` during build or can be forcefully enabled by setting `ROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON` This is follow up changes as per comment in PR pytorch#81790, comment [link](pytorch#81790 (comment)) Pull Request resolved: pytorch#84190 Approved by: https://github.com/jeffdaily, https://github.com/malfet
1 parent 70b00b1 commit fbd08fb

File tree

7 files changed

+31
-21
lines changed

7 files changed

+31
-21
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ if(NOT USE_XNNPACK AND CMAKE_VERSION VERSION_LESS ${XNNPACK_MIN_CMAKE_VER})
285285
endif()
286286
option(USE_ZMQ "Use ZMQ" OFF)
287287
option(USE_ZSTD "Use ZSTD" OFF)
288+
option(TORCH_DISABLE_GPU_ASSERTS "Disable GPU asserts by default" OFF)
288289
# Ensure that an ITT build is the default for x86 CPUs
289290
cmake_dependent_option(
290291
USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON

c10/macros/Macros.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -326,9 +326,8 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
326326
// CUDA_KERNEL_ASSERT checks the assertion
327327
// even when NDEBUG is defined. This is useful for important assertions in CUDA
328328
// code that would otherwise be suppressed when building Release.
329-
#if defined(__ANDROID__) || defined(__APPLE__) || \
330-
(defined(USE_ROCM) && ROCM_VERSION < 40100) || \
331-
(defined(USE_ROCM) && defined(ROCM_DISABLE_GPU_ASSERTS))
329+
#if defined(__ANDROID__) || defined(__APPLE__) || \
330+
(defined(USE_ROCM) && ROCM_VERSION < 40100)
332331
// Those platforms do not support assert()
333332
#define CUDA_KERNEL_ASSERT(cond)
334333
#define SYCL_KERNEL_ASSERT(cond)
@@ -368,7 +367,9 @@ extern SYCL_EXTERNAL void __assert_fail(
368367
unsigned int line,
369368
const char* func);
370369
#else // __SYCL_DEVICE_ONLY__
371-
#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)))
370+
#if ( \
371+
defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)) && \
372+
!defined(TORCH_DISABLE_GPU_ASSERTS))
372373
// CUDA supports __assert_fail function which are common for both device
373374
// and host side code.
374375
__host__ __device__
@@ -386,7 +387,7 @@ __host__ __device__
386387
const char* function) throw() __attribute__((__noreturn__));
387388

388389
#if (defined(__HIP_ARCH__) || defined(__HIP__)) && \
389-
!defined(ROCM_DISABLE_GPU_ASSERTS)
390+
!defined(TORCH_DISABLE_GPU_ASSERTS)
390391
// ROCm supports __assert_fail only as a device side function.
391392
__device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail(
392393
const char* assertion,

caffe2/core/macros.h.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ static_assert(
4444
#cmakedefine CAFFE2_USE_NVTX
4545
#cmakedefine CAFFE2_USE_ITT
4646
#cmakedefine CAFFE2_USE_TRT
47+
#cmakedefine TORCH_DISABLE_GPU_ASSERTS
4748

4849
#ifndef EIGEN_MPL2_ONLY
4950
#cmakedefine EIGEN_MPL2_ONLY
@@ -85,4 +86,5 @@ static_assert(
8586
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
8687
{"USE_ITT", "${CAFFE2_USE_ITT}"}, \
8788
{"USE_TRT", "${CAFFE2_USE_TRT}"}, \
89+
{"TORCH_DISABLE_GPU_ASSERTS", "${TORCH_DISABLE_GPU_ASSERTS}"}, \
8890
}

cmake/Dependencies.cmake

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,16 @@ if(ANDROID)
12481248
list(APPEND Caffe2_DEPENDENCY_LIBS log)
12491249
endif()
12501250

1251+
# ---[ Kernel asserts
1252+
# Kernel asserts are enabled by default for CUDA and disabled for ROCm.
1253+
# For ROCm, it can be enabled by setting ROCM_FORCE_ENABLE_GPU_ASSERTS
1254+
if(USE_ROCM AND ROCM_FORCE_ENABLE_GPU_ASSERTS)
1255+
message(STATUS "Forcefully enabling kernel asserts on ROCM")
1256+
elseif(USE_ROCM AND NOT ROCM_FORCE_ENABLE_GPU_ASSERTS)
1257+
message(STATUS "Disabling kernel asserts for ROCm")
1258+
caffe2_update_option(TORCH_DISABLE_GPU_ASSERTS ON)
1259+
endif()
1260+
12511261
# ---[ LLVM
12521262
if(USE_LLVM)
12531263
message(STATUS "Looking for LLVM in ${USE_LLVM}")

cmake/Summary.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,5 @@ function(caffe2_print_configuration_summary)
199199
# coreml
200200
message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}")
201201
message(STATUS " BUILD_LAZY_TS_BACKEND : ${BUILD_LAZY_TS_BACKEND}")
202+
message(STATUS " TORCH_DISABLE_GPU_ASSERTS : ${TORCH_DISABLE_GPU_ASSERTS}")
202203
endfunction()

cmake/public/LoadHIP.cmake

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,6 @@ message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
143143
# Add HIP to the CMAKE Module Path
144144
set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})
145145

146-
#Disable kernel assert due to performance regression
147-
set(ROCM_ENABLE_KERNEL_ASSERTS FALSE CACHE BOOL "Kernel asserts are disabled by default for ROCm")
148-
149146
macro(find_package_and_print_version PACKAGE_NAME)
150147
find_package("${PACKAGE_NAME}" ${ARGN})
151148
message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
@@ -286,19 +283,6 @@ if(HIP_FOUND)
286283
find_package_and_print_version(hipcub REQUIRED)
287284
find_package_and_print_version(rocthrust REQUIRED)
288285

289-
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
290-
if(ROCM_ENABLE_KERNEL_ASSERTS)
291-
message("ROCm version >= 4.1; enabling asserts")
292-
else()
293-
add_definitions(-DROCM_DISABLE_GPU_ASSERTS)
294-
message("ROCm version >= 4.1; kernel asserts are disabled")
295-
endif()
296-
else()
297-
# Disable Asserts In Code (Can't use asserts on HIP stack.)
298-
add_definitions(-DNDEBUG)
299-
message("ROCm version < 4.1; disablng asserts")
300-
endif()
301-
302286
if(HIP_COMPILER STREQUAL clang)
303287
set(hip_library_name amdhip64)
304288
else()

docs/source/notes/hip.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,14 @@ Refer to CUDA Semantics doc
144144
---------------------------
145145

146146
For any sections not listed here, please refer to the CUDA semantics doc: :ref:`cuda-semantics`
147+
148+
149+
Enabling kernel asserts
150+
-----------------------
151+
152+
Kernel asserts are supported on ROCm, but they are disabled due to performance overhead. It can be enabled
153+
by recompiling the PyTorch from source.
154+
155+
Please add below line as an argument to cmake command parameters::
156+
157+
-DROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON

0 commit comments

Comments
 (0)