[MPS] Add Inverse op. (pytorch#90428)

kulinseth · pytorchmergebot · commit 8ecb49b8fbe2 · 2022-12-19T22:00:12.000Z
Pull Request resolved: pytorch#90428 Approved by: https://github.com/DenisVieriu97, https://github.com/malfet
diff --git a/aten/src/ATen/native/mps/MPSGraphVenturaOps.h b/aten/src/ATen/native/mps/MPSGraphVenturaOps.h
@@ -14,4 +14,7 @@
 - (MPSGraphTensor *)argSortWithTensor:(MPSGraphTensor *)tensor
                                        axis:(NSInteger)axis
                                        name:(NSString *)name;
+
+- (MPSGraphTensor *)inverseOfTensor: (MPSGraphTensor *)tensor
+                                       name:(NSString *)name;
 @end
diff --git a/aten/src/ATen/native/mps/operations/Inverse.mm b/aten/src/ATen/native/mps/operations/Inverse.mm
@@ -0,0 +1,87 @@
+#include <ATen/ATen.h>
+#include <ATen/native/mps/OperationUtils.h>
+#include <ATen/native/mps/MPSGraphVenturaOps.h>
+#include <torch/library.h>
+#include <c10/util/Optional.h>
+
+
+namespace at {
+namespace native {
+
+TORCH_IMPL_FUNC(linalg_inv_ex_out_mps)(const Tensor& A, bool check_errors, const Tensor& result, const Tensor& info)
+{
+    TORCH_CHECK(result.is_mps(), "Output tensor is not MPS");
+    if (!is_macos_13_or_newer()) {
+      TORCH_WARN_ONCE("torch.linalg_inv_ex.inverse is supported by MPS on MacOS 13+, please upgrade. Falling back to CPU.");
+      auto cpu_info = at::empty({0}, kInt, c10::nullopt, kCPU, c10::nullopt, c10::nullopt);
+      auto cpu_result = result.clone().to("cpu");
+      at::linalg_inv_ex_out(cpu_result, cpu_info, A.to("cpu"));
+      info.copy_(cpu_info);
+      result.copy_(cpu_result);
+      return;
+    }
+
+    using namespace mps;
+    MPSStream* stream = getCurrentMPSStream();
+    info.zero_();
+
+    struct CachedGraph : public MPSCachedGraph
+    {
+        CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+        MPSGraphTensor* inputTensor_ = nil;
+        MPSGraphTensor* outputTensor_ = nil;
+    };
+
+    Tensor output = result;
+    bool isContiguous = true;
+    if (!result.is_contiguous()) {
+        output = result.contiguous();
+        isContiguous = false;
+    }
+
+    MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+    @autoreleasepool {
+        string key = "inv_out_mps" + getTensorsStringKey({A});
+        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        if(!cachedGraph)
+        {
+            MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+                CachedGraph *newCachedGraph = nil;
+                @autoreleasepool {
+                    MPSGraph* mpsGraph = make_mps_graph();
+                    newCachedGraph = new CachedGraph(mpsGraph);
+                    MPSGraphTensor* inputTensor= mpsGraphRankedPlaceHolder(mpsGraph, A);
+                    MPSGraphTensor* outputTensor = [mpsGraph inverseOfTensor: inputTensor
+                                                                    name: nil];
+
+                    newCachedGraph->inputTensor_ = inputTensor;
+                    newCachedGraph->outputTensor_ = outputTensor;
+                }
+
+                return newCachedGraph;
+
+            });
+            cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+        }
+
+        Placeholder inputPlaceholder = Placeholder(cachedGraph->inputTensor_, A);
+        Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, isContiguous ? result : output);
+
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+            inputPlaceholder.getMPSGraphTensor() : inputPlaceholder.getMPSGraphTensorData()
+        };
+
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+            outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+        };
+
+        runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+        if (!isContiguous) {
+            result.copy_(output);
+        }
+    }
+}
+}
+}
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -12897,6 +12897,7 @@
   structured: True
   dispatch:
     CPU, CUDA: linalg_inv_ex_out
+    MPS: linalg_inv_ex_out_mps
 
 - func: linalg_inv(Tensor A) -> Tensor
   python_module: linalg
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -4729,6 +4729,21 @@ def helper(shape, diag=0):
         helper((2, 8, 4, 5), diag=-2)
         helper((2, 8, 4, 5), diag=-3)
 
+    # Test inverse
+    def test_inverse(self):
+        def helper(n):
+            cpu_input = torch.randn(n, n, device='cpu')
+            mps_input = cpu_input.to('mps')
+
+            cpu_result = torch.linalg.inv(cpu_input)
+            mps_result = torch.linalg.inv(mps_input)
+            self.assertEqual(cpu_result, mps_result)
+
+        helper(2)
+        helper(6)
+        helper(3)
+        helper(8)
+
     # Test tril
     def test_tril(self):
         def helper(shape, diag=0):
@@ -7796,6 +7811,7 @@ class TestConsistency(TestCase):
         'diag_embed': [torch.uint8],
         'diagonal_scatter': [torch.uint8],
         'index_add': None,
+        'linalg.inv': ['f32'],
         'log1p': None,
         'long': None,
         'nn.functional.avg_pool1d': [torch.int64],
@@ -7814,7 +7830,6 @@ class TestConsistency(TestCase):
         'slice_scatter': [torch.uint8],
         'square': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8],  # moved from section below
 
-
         # ALLOW_LIST doesn't know about variants
         'nn.functional.padconstant': None,