[MPS] Move abs op to Metal (pytorch#155474)

kurtamohler · pytorchmergebot · commit e7698ff5cf40 · 2025-06-10T00:23:59.000Z
Pull Request resolved: pytorch#155474 Approved by: https://github.com/Skylion007, https://github.com/malfet
diff --git a/aten/src/ATen/native/mps/kernels/UnaryKernel.metal b/aten/src/ATen/native/mps/kernels/UnaryKernel.metal
@@ -43,6 +43,17 @@ struct sigmoid_functor {
   }
 };
 
+struct abs_functor {
+  template <typename T, enable_if_t<!is_complex_v<T>, bool> = true>
+  inline T operator()(const T x) {
+    return static_cast<T>(precise::abs(x));
+  }
+  template <typename T, enable_if_t<is_complex_v<T>, bool> = true>
+  inline T operator()(const T x) {
+    return T(::precise::sqrt(dot(x, x)), 0);
+  }
+};
+
 struct sin_functor {
   template <typename T>
   inline enable_if_t<is_scalar_floating_point_v<T>, T> operator()(const T x) {
@@ -321,6 +332,14 @@ REGISTER_UNARY_OP(bitwise_not, char, char);
 REGISTER_UNARY_OP(bitwise_not, uchar, uchar);
 REGISTER_UNARY_OP(bitwise_not, bool, bool);
 
+REGISTER_UNARY_OP(abs, int, int);
+REGISTER_UNARY_OP(abs, long, long);
+REGISTER_UNARY_OP(abs, short, short);
+REGISTER_UNARY_OP(abs, char, char);
+REGISTER_UNARY_OP(abs, uchar, uchar);
+REGISTER_UNARY_OP(abs, float, float);
+REGISTER_UNARY_OP(abs, half, half);
+
 #define INSTANTIATE_UNARY_KERNELS2(DTYPE0, DTYPE1) \
   REGISTER_UNARY_OP(erf, DTYPE1, DTYPE0);          \
   REGISTER_UNARY_OP(erfc, DTYPE1, DTYPE0);         \
@@ -343,6 +362,7 @@ REGISTER_UNARY_OP(bitwise_not, bool, bool);
 #if __METAL_VERSION__ >= 310
 INSTANTIATE_UNARY_KERNELS2(bfloat, bfloat);
 REGISTER_UNARY_OP(neg, bfloat, bfloat);
+REGISTER_UNARY_OP(abs, bfloat, bfloat);
 #endif
 INSTANTIATE_UNARY_KERNELS2(half, half);
 INSTANTIATE_UNARY_KERNELS2(float, float);
@@ -357,6 +377,7 @@ INSTANTIATE_UNARY_KERNELS2(float, long);
   REGISTER_UNARY_OP(neg, DTYPE##2, DTYPE##2);     \
   REGISTER_UNARY_OP(exp, DTYPE##2, DTYPE##2);     \
   REGISTER_UNARY_OP(sigmoid, DTYPE##2, DTYPE##2); \
+  REGISTER_UNARY_OP(abs, DTYPE##2, DTYPE##2);     \
   REGISTER_UNARY_OP(exp2, DTYPE##2, DTYPE##2);    \
   REGISTER_UNARY_OP(log, DTYPE##2, DTYPE##2);     \
   REGISTER_UNARY_OP(log10, DTYPE##2, DTYPE##2);   \
diff --git a/aten/src/ATen/native/mps/operations/UnaryKernel.mm b/aten/src/ATen/native/mps/operations/UnaryKernel.mm
@@ -30,6 +30,7 @@ static void round_decimals_kernel(TensorIteratorBase& iter, int64_t decimals) {
 REGISTER_UNARY_TI_DISPATCH(erfinv);
 REGISTER_UNARY_TI_DISPATCH(sinc);
 REGISTER_UNARY_TI_DISPATCH(tanh);
+REGISTER_UNARY_TI_DISPATCH(abs);
 REGISTER_UNARY_TI_DISPATCH(sin);
 REGISTER_UNARY_TI_DISPATCH(cos);
 REGISTER_UNARY_TI_DISPATCH(tan);
diff --git a/aten/src/ATen/native/mps/operations/UnaryOps.mm b/aten/src/ATen/native/mps/operations/UnaryOps.mm
@@ -12,7 +12,6 @@
 #include <ATen/NativeFunctions.h>
 #else
 #include <ATen/ops/_copy_from_and_resize.h>
-#include <ATen/ops/abs_native.h>
 #include <ATen/ops/acos_native.h>
 #include <ATen/ops/acosh_native.h>
 #include <ATen/ops/angle_native.h>
@@ -206,36 +205,6 @@ static void unary_op(const Tensor& self,
 CREATE_MPS_STRUCTURED_UNARY_TORCH_IMPL_FUNC(acosh_out_mps, acosh)
 CREATE_MPS_STRUCTURED_UNARY_TORCH_IMPL_FUNC(atanh_out_mps, atanh)
 
-Tensor& abs_out_mps(const Tensor& self, Tensor& output) {
-  using namespace mps;
-
-  if (!output.is_same_size(self)) {
-    output.resize_(self.sizes());
-  }
-
-  if (self.numel() == 0) {
-    return output;
-  }
-
-  if (supportsComplex() || !self.is_complex()) {
-    unary_op_noresize(self, output, "abs_out_mps", ^MPSGraphTensor*(MPSGraph* mpsGraph, MPSGraphTensor* inputTensor) {
-      auto rc = [mpsGraph absoluteWithTensor:inputTensor name:nil];
-      if (self.is_complex()) {
-        rc = [mpsGraph realPartOfTensor:rc name:nil];
-      }
-      return rc;
-    });
-  } else {
-    Tensor realInput = at::view_as_real(self);
-    unary_op_noresize(
-        realInput, output, "abs_out_mps", ^MPSGraphTensor*(MPSGraph* mpsGraph, MPSGraphTensor* inputTensor) {
-          auto rc = lengthOfComplexAsReal(mpsGraph, inputTensor);
-          return [mpsGraph reshapeTensor:rc withShape:getMPSShape(output) name:nil];
-        });
-  }
-  return output;
-}
-
 Tensor& logical_not_out_mps(const Tensor& self, Tensor& output) {
   auto bool_self = self.to(ScalarType::Bool);
   mps::unary_op(bool_self, output, "logical_not_out_mps", [](MPSGraph* mpsGraph, MPSGraphTensor* inputTensor) {
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -357,8 +357,7 @@
 - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   device_check: NoCheck   # TensorIterator
   dispatch:
-    CPU, CUDA: abs_out
-    MPS: abs_out_mps
+    CPU, CUDA, MPS: abs_out
     SparseCPU, SparseCUDA: abs_sparse_out
     SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
   tags: pointwise
diff --git a/torch/testing/_internal/common_modules.py b/torch/testing/_internal/common_modules.py
@@ -3852,7 +3852,7 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad
                        unittest.expectedFailure,
                        'TestModule',
                        'test_memory_format',
-                       active_if=operator.itemgetter('training'),
+                       active_if=operator.itemgetter('training') and not _macos15_or_newer,
                        device_type='mps',
                    ),)
                ),