Skip to content

Commit d125694

Browse files
malfetfacebook-github-bot
authored andcommitted
Move CUDA async warning to suffix (pytorch#59467)
Summary: After the change async error warnings look as follows: ``` $ python -c "import torch;torch.eye(3,3,device='cuda:777')" Traceback (most recent call last): File "<string>", line 1, in <module> RuntimeError: CUDA error: invalid device ordinal CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1. ``` Pull Request resolved: pytorch#59467 Reviewed By: ngimel Differential Revision: D28904360 Pulled By: malfet fbshipit-source-id: 2a8fa5affed5b4ffcaa602c8ab2669061cde7db0
1 parent f23c45b commit d125694

File tree

3 files changed

+21
-18
lines changed

3 files changed

+21
-18
lines changed

c10/cuda/CUDAException.h

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,21 @@ class C10_CUDA_API CUDAError : public c10::Error {
3333
} \
3434
} while (0)
3535
#else
36-
#define C10_CUDA_CHECK(EXPR) \
37-
do { \
38-
cudaError_t __err = EXPR; \
39-
if (__err != cudaSuccess) { \
40-
auto error_unused C10_UNUSED = cudaGetLastError(); \
41-
auto _cuda_check_prefix = c10::cuda::get_cuda_check_prefix(); \
42-
throw c10::CUDAError( \
43-
{__func__, __FILE__, static_cast<uint32_t>(__LINE__)}, \
44-
TORCH_CHECK_MSG( \
45-
false, "", _cuda_check_prefix, cudaGetErrorString(__err))); \
46-
} \
36+
#define C10_CUDA_CHECK(EXPR) \
37+
do { \
38+
cudaError_t __err = EXPR; \
39+
if (__err != cudaSuccess) { \
40+
auto error_unused C10_UNUSED = cudaGetLastError(); \
41+
auto _cuda_check_suffix = c10::cuda::get_cuda_check_suffix(); \
42+
throw c10::CUDAError( \
43+
{__func__, __FILE__, static_cast<uint32_t>(__LINE__)}, \
44+
TORCH_CHECK_MSG( \
45+
false, \
46+
"", \
47+
"CUDA error: ", \
48+
cudaGetErrorString(__err), \
49+
_cuda_check_suffix)); \
50+
} \
4751
} while (0)
4852
#endif
4953

c10/cuda/CUDAFunctions.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,17 +141,16 @@ void device_synchronize() {
141141
C10_CUDA_CHECK(cudaDeviceSynchronize());
142142
}
143143

144-
const char* get_cuda_check_prefix() noexcept {
144+
const char* get_cuda_check_suffix() noexcept {
145145
static char* device_blocking_flag = getenv("CUDA_LAUNCH_BLOCKING");
146146
static bool blocking_enabled =
147147
(device_blocking_flag && atoi(device_blocking_flag));
148148
if (blocking_enabled) {
149-
return "CUDA error: ";
149+
return "";
150150
} else {
151-
return "CUDA kernel errors might be "
152-
"asynchronously reported at some other API call,so the "
153-
"stacktrace below might be incorrect. For debugging "
154-
"consider passing CUDA_LAUNCH_BLOCKING=1. CUDA error: ";
151+
return "\nCUDA kernel errors might be asynchronously reported at some"
152+
" other API call,so the stacktrace below might be incorrect."
153+
"\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.";
155154
}
156155
}
157156

c10/cuda/CUDAFunctions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ C10_CUDA_API void set_device(DeviceIndex device);
3030

3131
C10_CUDA_API void device_synchronize();
3232

33-
C10_CUDA_API const char* get_cuda_check_prefix() noexcept;
33+
C10_CUDA_API const char* get_cuda_check_suffix() noexcept;
3434

3535
} // namespace cuda
3636
} // namespace c10

0 commit comments

Comments
 (0)