Rename potrf to cholesky (pytorch#12699)

vishwakftw · facebook-github-bot · commit d714ecf879ae · 2018-11-01T15:10:55.000-07:00
Summary:
This PR performs a renaming of the function `potrf` responsible for the Cholesky
decomposition on positive definite matrices to `cholesky` as NumPy and TF do.

Billing of changes
- make potrf cname for cholesky in Declarations.cwrap
- modify the function names in ATen/core
- modify the function names in Python frontend
- issue warnings when potrf is called to notify users of the change

Reviewed By: soumith

Differential Revision: D10528361

Pulled By: zou3519

fbshipit-source-id: 19d9bcf8ffb38def698ae5acf30743884dda0d88
diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h
@@ -747,7 +747,7 @@ class CAFFE2_API Tensor {
   std::tuple<Tensor,Tensor> symeig(bool eigenvectors=false, bool upper=true) const;
   std::tuple<Tensor,Tensor> eig(bool eigenvectors=false) const;
   std::tuple<Tensor,Tensor,Tensor> svd(bool some=true, bool compute_uv=true) const;
-  Tensor potrf(bool upper=true) const;
+  Tensor cholesky(bool upper=false) const;
   Tensor potrs(const Tensor & input2, bool upper=true) const;
   Tensor potri(bool upper=true) const;
   std::tuple<Tensor,Tensor> pstrf(bool upper=true, Scalar tol=-1) const;
diff --git a/aten/src/ATen/core/TensorMethods.h b/aten/src/ATen/core/TensorMethods.h
@@ -1508,8 +1508,8 @@ inline std::tuple<Tensor,Tensor> Tensor::eig(bool eigenvectors) const {
 inline std::tuple<Tensor,Tensor,Tensor> Tensor::svd(bool some, bool compute_uv) const {
     return type().svd(*this, some, compute_uv);
 }
-inline Tensor Tensor::potrf(bool upper) const {
-    return type().potrf(*this, upper);
+inline Tensor Tensor::cholesky(bool upper) const {
+    return type().cholesky(*this, upper);
 }
 inline Tensor Tensor::potrs(const Tensor & input2, bool upper) const {
     return type().potrs(*this, input2, upper);
diff --git a/aten/src/ATen/core/Type.h b/aten/src/ATen/core/Type.h
@@ -704,7 +704,7 @@ struct CAFFE2_API Type {
   virtual std::tuple<Tensor,Tensor> symeig(const Tensor & self, bool eigenvectors, bool upper) const = 0;
   virtual std::tuple<Tensor,Tensor> eig(const Tensor & self, bool eigenvectors) const = 0;
   virtual std::tuple<Tensor,Tensor,Tensor> svd(const Tensor & self, bool some, bool compute_uv) const = 0;
-  virtual Tensor potrf(const Tensor & self, bool upper) const = 0;
+  virtual Tensor cholesky(const Tensor & self, bool upper) const = 0;
   virtual Tensor potrs(const Tensor & self, const Tensor & input2, bool upper) const = 0;
   virtual Tensor potri(const Tensor & self, bool upper) const = 0;
   virtual std::tuple<Tensor,Tensor> pstrf(const Tensor & self, bool upper, Scalar tol) const = 0;
diff --git a/aten/src/ATen/core/aten_interned_strings.h b/aten/src/ATen/core/aten_interned_strings.h
@@ -236,6 +236,7 @@ _(aten, cauchy) \
 _(aten, ceil) \
 _(aten, celu) \
 _(aten, chain_matmul) \
+_(aten, cholesky) \
 _(aten, chunk) \
 _(aten, clamp) \
 _(aten, clamp_max) \
@@ -510,7 +511,6 @@ _(aten, pinverse) \
 _(aten, pixel_shuffle) \
 _(aten, poisson) \
 _(aten, polygamma) \
-_(aten, potrf) \
 _(aten, potri) \
 _(aten, potrs) \
 _(aten, pow) \
diff --git a/aten/src/ATen/native/LegacyDefinitions.cpp b/aten/src/ATen/native/LegacyDefinitions.cpp
@@ -491,11 +491,11 @@ std::tuple<Tensor,Tensor,Tensor> svd(const Tensor & self, bool some, bool comput
   return at::_th_svd(self, some, compute_uv);
 }
 
-Tensor & potrf_out(Tensor & result, const Tensor & self, bool upper) {
+Tensor & cholesky_out(Tensor & result, const Tensor & self, bool upper) {
   return at::_th_potrf_out(result, self, upper);
 }
 
-Tensor potrf(const Tensor & self, bool upper) {
+Tensor cholesky(const Tensor & self, bool upper) {
   return at::_th_potrf(self, upper);
 }
 
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -2785,10 +2785,10 @@
   variants: method, function
   device_guard: false
 
-- func: potrf_out(Tensor result, Tensor self, bool upper=true) -> Tensor
+- func: cholesky_out(Tensor result, Tensor self, bool upper=false) -> Tensor
   device_guard: false
 
-- func: potrf(Tensor self, bool upper=true) -> Tensor
+- func: cholesky(Tensor self, bool upper=false) -> Tensor
   variants: method, function
   device_guard: false
 
diff --git a/docs/source/tensors.rst b/docs/source/tensors.rst
@@ -180,6 +180,7 @@ view of a storage and defines numeric operations on it.
    .. automethod:: ceil
    .. automethod:: ceil_
    .. automethod:: char
+   .. automethod:: cholesky
    .. automethod:: chunk
    .. automethod:: clamp
    .. automethod:: clamp_
diff --git a/docs/source/torch.rst b/docs/source/torch.rst
@@ -288,6 +288,7 @@ BLAS and LAPACK Operations
 .. autofunction:: btrisolve
 .. autofunction:: btriunpack
 .. autofunction:: chain_matmul
+.. autofunction:: cholesky
 .. autofunction:: dot
 .. autofunction:: eig
 .. autofunction:: gels
diff --git a/test/test_autograd.py b/test/test_autograd.py
@@ -2025,13 +2025,13 @@ def test_cat_empty(self):
                               True, f_args_variable, f_args_tensor)
 
     @skipIfNoLapack
-    def test_potrf(self):
-        root = Variable(torch.tril(torch.rand(S, S)), requires_grad=True)
+    def test_cholesky(self):
+        root = torch.tril(torch.rand(S, S)).requires_grad_()
 
         def run_test(upper):
             def func(root):
                 x = torch.mm(root, root.t())
-                return torch.potrf(x, upper)
+                return torch.cholesky(x, upper)
 
             gradcheck(func, [root])
             gradgradcheck(func, [root])
diff --git a/test/test_distributions.py b/test/test_distributions.py
@@ -1721,14 +1721,14 @@ def test_multivariate_normal_shape(self):
         tmp = torch.randn(3, 10)
         cov = (torch.matmul(tmp, tmp.t()) / tmp.size(-1)).requires_grad_()
         prec = cov.inverse().requires_grad_()
-        scale_tril = torch.potrf(cov, upper=False).requires_grad_()
+        scale_tril = torch.cholesky(cov, upper=False).requires_grad_()
 
         # construct batch of PSD covariances
         tmp = torch.randn(6, 5, 3, 10)
         cov_batched = (tmp.unsqueeze(-2) * tmp.unsqueeze(-3)).mean(-1).requires_grad_()
         prec_batched = [C.inverse() for C in cov_batched.view((-1, 3, 3))]
         prec_batched = torch.stack(prec_batched).view(cov_batched.shape)
-        scale_tril_batched = [torch.potrf(C, upper=False) for C in cov_batched.view((-1, 3, 3))]
+        scale_tril_batched = [torch.cholesky(C, upper=False) for C in cov_batched.view((-1, 3, 3))]
         scale_tril_batched = torch.stack(scale_tril_batched).view(cov_batched.shape)
 
         # ensure that sample, batch, event shapes all handled correctly
@@ -1764,7 +1764,7 @@ def test_multivariate_normal_log_prob(self):
         tmp = torch.randn(3, 10)
         cov = (torch.matmul(tmp, tmp.t()) / tmp.size(-1)).requires_grad_()
         prec = cov.inverse().requires_grad_()
-        scale_tril = torch.potrf(cov, upper=False).requires_grad_()
+        scale_tril = torch.cholesky(cov, upper=False).requires_grad_()
 
         # check that logprob values match scipy logpdf,
         # and that covariance and scale_tril parameters are equivalent
@@ -1802,7 +1802,7 @@ def test_multivariate_normal_sample(self):
         tmp = torch.randn(3, 10)
         cov = (torch.matmul(tmp, tmp.t()) / tmp.size(-1)).requires_grad_()
         prec = cov.inverse().requires_grad_()
-        scale_tril = torch.potrf(cov, upper=False).requires_grad_()
+        scale_tril = torch.cholesky(cov, upper=False).requires_grad_()
 
         self._check_sampler_sampler(MultivariateNormal(mean, cov),
                                     scipy.stats.multivariate_normal(mean.detach().numpy(), cov.detach().numpy()),
@@ -1823,7 +1823,7 @@ def test_multivariate_normal_properties(self):
         m = MultivariateNormal(loc=loc, scale_tril=scale_tril)
         self.assertEqual(m.covariance_matrix, m.scale_tril.mm(m.scale_tril.t()))
         self.assertEqual(m.covariance_matrix.mm(m.precision_matrix), torch.eye(m.event_shape[0]))
-        self.assertEqual(m.scale_tril, torch.potrf(m.covariance_matrix, upper=False))
+        self.assertEqual(m.scale_tril, torch.cholesky(m.covariance_matrix, upper=False))
 
     def test_multivariate_normal_moments(self):
         set_rng_seed(0)  # see Note [Randomized statistical tests]
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -5303,19 +5303,19 @@ def test_cholesky(self):
         A = torch.mm(x, x.t())
 
         # default Case
-        C = torch.potrf(A)
-        B = torch.mm(C.t(), C)
+        C = torch.cholesky(A)
+        B = torch.mm(C, C.t())
         self.assertEqual(A, B, 1e-14)
 
         # test Upper Triangular
-        U = torch.potrf(A, True)
+        U = torch.cholesky(A, True)
         B = torch.mm(U.t(), U)
-        self.assertEqual(A, B, 1e-14, 'potrf (upper) did not allow rebuilding the original matrix')
+        self.assertEqual(A, B, 1e-14, 'cholesky (upper) did not allow rebuilding the original matrix')
 
         # test Lower Triangular
-        L = torch.potrf(A, False)
+        L = torch.cholesky(A, False)
         B = torch.mm(L, L.t())
-        self.assertEqual(A, B, 1e-14, 'potrf (lower) did not allow rebuilding the original matrix')
+        self.assertEqual(A, B, 1e-14, 'cholesky (lower) did not allow rebuilding the original matrix')
 
     @skipIfNoLapack
     def test_potrs(self):
@@ -5332,12 +5332,12 @@ def test_potrs(self):
         a = torch.mm(a, a.t())
 
         # upper Triangular Test
-        U = torch.potrf(a)
-        x = torch.potrs(b, U)
+        U = torch.cholesky(a, True)
+        x = torch.potrs(b, U, True)
         self.assertLessEqual(b.dist(torch.mm(a, x)), 1e-12)
 
         # lower Triangular Test
-        L = torch.potrf(a, False)
+        L = torch.cholesky(a, False)
         x = torch.potrs(b, L, False)
         self.assertLessEqual(b.dist(torch.mm(a, x)), 1e-12)
 
@@ -5356,17 +5356,17 @@ def test_potri(self):
         inv0 = torch.inverse(a)
 
         # default case
-        chol = torch.potrf(a)
-        inv1 = torch.potri(chol)
+        chol = torch.cholesky(a)
+        inv1 = torch.potri(chol, False)
         self.assertLessEqual(inv0.dist(inv1), 1e-12)
 
         # upper Triangular Test
-        chol = torch.potrf(a, True)
+        chol = torch.cholesky(a, True)
         inv1 = torch.potri(chol, True)
         self.assertLessEqual(inv0.dist(inv1), 1e-12)
 
         # lower Triangular Test
-        chol = torch.potrf(a, False)
+        chol = torch.cholesky(a, False)
         inv1 = torch.potri(chol, False)
         self.assertLessEqual(inv0.dist(inv1), 1e-12)
 
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
@@ -184,6 +184,9 @@
 - name: ceil(Tensor self)
   self: zeros_like(grad)
 
+- name: cholesky(Tensor self, bool upper)
+  self: cholesky_backward(grad, upper, result)
+
 # For clamp, gradient is not defined at the boundaries. But empirically it's helpful
 # to be able to get gradient on min and max, so we return the subgradient 1 for these cases.
 - name: clamp(Tensor self, Scalar? min, Scalar? max)
@@ -563,9 +566,6 @@
 - name: poisson(Tensor self, Generator generator)
   self: zeros_like(self)
 
-- name: potrf(Tensor self, bool upper)
-  self: potrf_backward(grad, upper, result)
-
 - name: potri(Tensor self, bool upper)
   self: not_implemented("potri")
 
diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp
@@ -626,7 +626,7 @@ Tensor masked_scatter_backward(const Tensor & grad, const Tensor & mask, IntList
   return mask_selected.view(sizes);
 }
 
-Tensor potrf_backward(Tensor grad, bool upper, Tensor L) {
+Tensor cholesky_backward(Tensor grad, bool upper, Tensor L) {
   // cf. Iain Murray (2016); arXiv 1602.07527
   if (upper) {
     L = L.t();
diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py
@@ -525,6 +525,13 @@ def add_docstr_all(method, docstr):
 In-place version of :meth:`~Tensor.ceil`
 """)
 
+add_docstr_all('cholesky',
+               r"""
+cholesky(upper=False) -> Tensor
+
+See :func:`torch.cholesky`
+""")
+
 add_docstr_all('clamp',
                r"""
 clamp(min, max) -> Tensor
@@ -1619,13 +1626,6 @@ def callable(a, b) -> number
     torch.Size([5, 2, 3])
 """)
 
-add_docstr_all('potrf',
-               r"""
-potrf(upper=True) -> Tensor
-
-See :func:`torch.potrf`
-""")
-
 add_docstr_all('potri',
                r"""
 potri(upper=True) -> Tensor
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
@@ -825,6 +825,51 @@ def parse_kwargs(desc):
     tensor([-2.1763, -0.4713, -0.6986,  1.3702])
 """)
 
+add_docstr(torch.cholesky, r"""
+cholesky(a, upper=False, out=None) -> Tensor
+
+Computes the Cholesky decomposition of a symmetric positive-definite
+matrix :math:`A`.
+
+If :attr:`upper` is ``True``, the returned matrix `U` is upper-triangular, and
+the decomposition has the form:
+
+.. math::
+
+  A = U^TU
+
+If :attr:`upper` is ``False``, the returned matrix `L` is lower-triangular, and
+the decomposition has the form:
+
+.. math::
+
+    A = LL^T
+
+Args:
+    a (Tensor): the input 2-D tensor, a symmetric positive-definite matrix
+    upper (bool, optional): flag that indicates whether to return the
+                            upper or lower triangular matrix. Default: ``False``
+    out (Tensor, optional): the output matrix
+
+Example::
+
+    >>> a = torch.randn(3, 3)
+    >>> a = torch.mm(a, a.t()) # make symmetric positive definite
+    >>> l = torch.cholesky(a)
+    >>> a
+    tensor([[ 2.4112, -0.7486,  1.4551],
+            [-0.7486,  1.3544,  0.1294],
+            [ 1.4551,  0.1294,  1.6724]])
+    >>> l
+    tensor([[ 1.5528,  0.0000,  0.0000],
+            [-0.4821,  1.0592,  0.0000],
+            [ 0.9371,  0.5487,  0.7023]])
+    >>> torch.mm(l, l.t())
+    tensor([[ 2.4112, -0.7486,  1.4551],
+            [-0.7486,  1.3544,  0.1294],
+            [ 1.4551,  0.1294,  1.6724]])
+""")
+
 add_docstr(torch.clamp,
            r"""
 clamp(input, min, max, out=None) -> Tensor
@@ -3249,51 +3294,6 @@ def parse_kwargs(desc):
 
 """)
 
-add_docstr(torch.potrf, r"""
-potrf(a, upper=True, out=None) -> Tensor
-
-Computes the Cholesky decomposition of a symmetric positive-definite
-matrix :math:`A`.
-
-If :attr:`upper` is ``True``, the returned matrix `U` is upper-triangular, and
-the decomposition has the form:
-
-.. math::
-
-  A = U^TU
-
-If :attr:`upper` is ``False``, the returned matrix `L` is lower-triangular, and
-the decomposition has the form:
-
-.. math::
-
-    A = LL^T
-
-Args:
-    a (Tensor): the input 2-D tensor, a symmetric positive-definite matrix
-    upper (bool, optional): flag that indicates whether to return the
-                            upper or lower triangular matrix
-    out (Tensor, optional): the output matrix
-
-Example::
-
-    >>> a = torch.randn(3, 3)
-    >>> a = torch.mm(a, a.t()) # make symmetric positive definite
-    >>> u = torch.potrf(a)
-    >>> a
-    tensor([[ 2.4112, -0.7486,  1.4551],
-            [-0.7486,  1.3544,  0.1294],
-            [ 1.4551,  0.1294,  1.6724]])
-    >>> u
-    tensor([[ 1.5528, -0.4821,  0.9371],
-            [ 0.0000,  1.0592,  0.5486],
-            [ 0.0000,  0.0000,  0.7023]])
-    >>> torch.mm(u.t(), u)
-    tensor([[ 2.4112, -0.7486,  1.4551],
-            [-0.7486,  1.3544,  0.1294],
-            [ 1.4551,  0.1294,  1.6724]])
-""")
-
 add_docstr(torch.potri, r"""
 potri(u, upper=True, out=None) -> Tensor
 
@@ -3322,7 +3322,7 @@ def parse_kwargs(desc):
 
     >>> a = torch.randn(3, 3)
     >>> a = torch.mm(a, a.t()) # make symmetric positive definite
-    >>> u = torch.potrf(a)
+    >>> u = torch.cholesky(a)
     >>> a
     tensor([[  0.9935,  -0.6353,   1.5806],
             [ -0.6353,   0.8769,  -1.7183],
@@ -3367,7 +3367,7 @@ def parse_kwargs(desc):
 
     >>> a = torch.randn(3, 3)
     >>> a = torch.mm(a, a.t()) # make symmetric positive definite
-    >>> u = torch.potrf(a)
+    >>> u = torch.cholesky(a)
     >>> a
     tensor([[ 0.7747, -1.9549,  1.3086],
             [-1.9549,  6.7546, -5.4114],
diff --git a/torch/distributions/multivariate_normal.py b/torch/distributions/multivariate_normal.py
diff --git a/torch/functional.py b/torch/functional.py
diff --git a/torch/tensor.py b/torch/tensor.py

Original file line number	Diff line number	Diff line change
`@@ -1508,8 +1508,8 @@ inline std::tuple<Tensor,Tensor> Tensor::eig(bool eigenvectors) const {`
`1508`	`1508`	`inline std::tuple<Tensor,Tensor,Tensor> Tensor::svd(bool some, bool compute_uv) const {`
`1509`	`1509`	`return type().svd(*this, some, compute_uv);`
`1510`	`1510`	`}`
`1511`		`-inline Tensor Tensor::potrf(bool upper) const {`
`1512`		`- return type().potrf(*this, upper);`
	`1511`	`+inline Tensor Tensor::cholesky(bool upper) const {`
	`1512`	`+ return type().cholesky(*this, upper);`
`1513`	`1513`	`}`
`1514`	`1514`	`inline Tensor Tensor::potrs(const Tensor & input2, bool upper) const {`
`1515`	`1515`	`return type().potrs(*this, input2, upper);`
Original file line number	Diff line number	Diff line change
`@@ -491,11 +491,11 @@ std::tuple<Tensor,Tensor,Tensor> svd(const Tensor & self, bool some, bool comput`
`491`	`491`	`return at::_th_svd(self, some, compute_uv);`
`492`	`492`	`}`
`493`	`493`
`494`		`-Tensor & potrf_out(Tensor & result, const Tensor & self, bool upper) {`
	`494`	`+Tensor & cholesky_out(Tensor & result, const Tensor & self, bool upper) {`
`495`	`495`	`return at::_th_potrf_out(result, self, upper);`
`496`	`496`	`}`
`497`	`497`
`498`		`-Tensor potrf(const Tensor & self, bool upper) {`
	`498`	`+Tensor cholesky(const Tensor & self, bool upper) {`
`499`	`499`	`return at::_th_potrf(self, upper);`
`500`	`500`	`}`
`501`	`501`
Original file line number	Diff line number	Diff line change
`@@ -626,7 +626,7 @@ Tensor masked_scatter_backward(const Tensor & grad, const Tensor & mask, IntList`
`626`	`626`	`return mask_selected.view(sizes);`
`627`	`627`	`}`
`628`	`628`
`629`		`-Tensor potrf_backward(Tensor grad, bool upper, Tensor L) {`
	`629`	`+Tensor cholesky_backward(Tensor grad, bool upper, Tensor L) {`
`630`	`630`	`// cf. Iain Murray (2016); arXiv 1602.07527`
`631`	`631`	`if (upper) {`
`632`	`632`	`L = L.t();`