Move CUDA async warning to suffix (pytorch#59467)

malfet · facebook-github-bot · commit d125694d0bc4 · 2021-06-04T17:26:28.000-07:00
Summary: After the change async error warnings look as follows: ``` $ python -c "import torch;torch.eye(3,3,device='cuda:777')" Traceback (most recent call last): File "<string>", line 1, in <module> RuntimeError: CUDA error: invalid device ordinal CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1. ``` Pull Request resolved: pytorch#59467 Reviewed By: ngimel Differential Revision: D28904360 Pulled By: malfet fbshipit-source-id: 2a8fa5affed5b4ffcaa602c8ab2669061cde7db0
diff --git a/c10/cuda/CUDAException.h b/c10/cuda/CUDAException.h
@@ -33,17 +33,21 @@ class C10_CUDA_API CUDAError : public c10::Error {
     }                                                            \
   } while (0)
 #else
-#define C10_CUDA_CHECK(EXPR)                                              \
-  do {                                                                    \
-    cudaError_t __err = EXPR;                                             \
-    if (__err != cudaSuccess) {                                           \
-      auto error_unused C10_UNUSED = cudaGetLastError();                  \
-      auto _cuda_check_prefix = c10::cuda::get_cuda_check_prefix();       \
-      throw c10::CUDAError(                                               \
-          {__func__, __FILE__, static_cast<uint32_t>(__LINE__)},          \
-          TORCH_CHECK_MSG(                                                \
-              false, "", _cuda_check_prefix, cudaGetErrorString(__err))); \
-    }                                                                     \
+#define C10_CUDA_CHECK(EXPR)                                        \
+  do {                                                              \
+    cudaError_t __err = EXPR;                                       \
+    if (__err != cudaSuccess) {                                     \
+      auto error_unused C10_UNUSED = cudaGetLastError();            \
+      auto _cuda_check_suffix = c10::cuda::get_cuda_check_suffix(); \
+      throw c10::CUDAError(                                         \
+          {__func__, __FILE__, static_cast<uint32_t>(__LINE__)},    \
+          TORCH_CHECK_MSG(                                          \
+              false,                                                \
+              "",                                                   \
+              "CUDA error: ",                                       \
+              cudaGetErrorString(__err),                            \
+              _cuda_check_suffix));                                 \
+    }                                                               \
   } while (0)
 #endif
 
diff --git a/c10/cuda/CUDAFunctions.cpp b/c10/cuda/CUDAFunctions.cpp
@@ -141,17 +141,16 @@ void device_synchronize() {
   C10_CUDA_CHECK(cudaDeviceSynchronize());
 }
 
-const char* get_cuda_check_prefix() noexcept {
+const char* get_cuda_check_suffix() noexcept {
   static char* device_blocking_flag = getenv("CUDA_LAUNCH_BLOCKING");
   static bool blocking_enabled =
       (device_blocking_flag && atoi(device_blocking_flag));
   if (blocking_enabled) {
-    return "CUDA error: ";
+    return "";
   } else {
-    return "CUDA kernel errors might be "
-           "asynchronously reported at some other API call,so the "
-           "stacktrace below might be incorrect. For debugging "
-           "consider passing CUDA_LAUNCH_BLOCKING=1. CUDA error: ";
+    return "\nCUDA kernel errors might be asynchronously reported at some"
+           " other API call,so the stacktrace below might be incorrect."
+           "\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.";
   }
 }
 
diff --git a/c10/cuda/CUDAFunctions.h b/c10/cuda/CUDAFunctions.h
@@ -30,7 +30,7 @@ C10_CUDA_API void set_device(DeviceIndex device);
 
 C10_CUDA_API void device_synchronize();
 
-C10_CUDA_API const char* get_cuda_check_prefix() noexcept;
+C10_CUDA_API const char* get_cuda_check_suffix() noexcept;
 
 } // namespace cuda
 } // namespace c10