No-batch-dim support for ConvNd (pytorch#70506)

jbschlosser · facebook-github-bot · commit 7b8f73dd32a8 · 2022-01-06T16:53:50.000-08:00
Summary: Pull Request resolved: pytorch#70506 Test Plan: Imported from OSS Reviewed By: albanD Differential Revision: D33355034 Pulled By: jbschlosser fbshipit-source-id: 5a42645299b1d82cee7d461826acca1c5b35a71c
diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
@@ -575,6 +575,30 @@ static void check_shape_backward(
   check_shape_forward(input, weight_sizes, /*bias=*/ Tensor(), params);
 }
 
+// Given an input tensor and an expected number of spatial dimensions, checks that the
+// input is a valid shape and returns the batched form of the input.
+//
+// Args:
+//     input (Tensor): Input tensor
+//     num_spatial_dims (int): Number of spatial dimensions expected for the input
+//     func_name (string): Function name to produce a nice error message for invalid input
+//
+// Returns a std::tuple containing:
+//     batched_input (Tensor): Input with a batch dimension
+//     is_batched (bool): Indicates whether the original input was already batched
+static std::tuple<Tensor, bool> batchify(
+    const Tensor& input,
+    const int64_t num_spatial_dims,
+    const std::string& func_name) {
+  const auto dim_count_no_batch = num_spatial_dims + 1;
+  const auto dim_count_batch = dim_count_no_batch + 1;
+  const auto is_batched = (input.dim() == dim_count_batch);
+  TORCH_CHECK(input.dim() == dim_count_no_batch || is_batched,
+      "Expected ", dim_count_no_batch, "D (unbatched) or ", dim_count_batch,
+      "D (batched) input to ", func_name, ", but got input of size: ", input.sizes());
+  return std::make_tuple(is_batched ? input : input.unsqueeze(0), is_batched);
+}
+
 static void check_input_same_type_as_parameters(
     const Tensor& input,
     const Tensor& weight,
@@ -618,36 +642,45 @@ static at::Tensor subtensor(at::Tensor& tensor, int dim, int groups, int g) {
 
 
 at::Tensor conv1d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
     IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, int64_t groups) {
   // See [Note: hacky wrapper removal for optional tensor]
   c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt);
   const Tensor& bias = *bias_maybe_owned;
 
-  return at::convolution(input, weight, bias, stride, padding, dilation,
-                         false, {0}, groups);
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 1, "conv1d");
+  auto output = at::convolution(input, weight, bias, stride, padding, dilation, false, {0}, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 at::Tensor conv2d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
     IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, int64_t groups) {
   // See [Note: hacky wrapper removal for optional tensor]
   c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt);
   const Tensor& bias = *bias_maybe_owned;
 
-  return at::convolution(input, weight, bias, stride, padding, dilation,
-                         false, {{0, 0}}, groups);
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 2, "conv2d");
+  auto output = at::convolution(input, weight, bias, stride, padding, dilation, false, {{0, 0}}, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 at::Tensor conv3d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
     IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, int64_t groups) {
   // See [Note: hacky wrapper removal for optional tensor]
   c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt);
   const Tensor& bias = *bias_maybe_owned;
 
-  return at::convolution(input, weight, bias, stride, padding, dilation,
-                         false, {{0, 0, 0}}, groups);
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 3, "conv3d");
+  auto output = at::convolution(input, weight, bias, stride, padding, dilation, false, {{0, 0, 0}}, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 
@@ -736,60 +769,84 @@ Tensor _convolution_mode(
 }
 
 at::Tensor conv1d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias,
     IntArrayRef stride, c10::string_view padding, IntArrayRef dilation,
     int64_t groups) {
-  return at::_convolution_mode(
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 1, "conv1d");
+  auto output = at::_convolution_mode(
       input, weight, bias, stride, std::move(padding), dilation, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 at::Tensor conv2d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias,
     IntArrayRef stride, c10::string_view padding, IntArrayRef dilation,
     int64_t groups) {
-  return at::_convolution_mode(
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 2, "conv2d");
+  auto output = at::_convolution_mode(
       input, weight, bias, stride, std::move(padding), dilation, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 at::Tensor conv3d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias,
     IntArrayRef stride, c10::string_view padding, IntArrayRef dilation,
     int64_t groups) {
-  return at::_convolution_mode(
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 3, "conv3d");
+  auto output = at::_convolution_mode(
       input, weight, bias, stride, std::move(padding), dilation, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 at::Tensor conv_transpose1d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
     IntArrayRef stride, IntArrayRef padding, IntArrayRef output_padding, int64_t groups, IntArrayRef dilation) {
   // See [Note: hacky wrapper removal for optional tensor]
   c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt);
   const Tensor& bias = *bias_maybe_owned;
 
-  return at::convolution(input, weight, bias, stride, padding, dilation,
-                         true, output_padding, groups);
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 1, "conv_transpose1d");
+  auto output = at::convolution(
+      input, weight, bias, stride, padding, dilation, true, output_padding, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 at::Tensor conv_transpose2d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
     IntArrayRef stride, IntArrayRef padding, IntArrayRef output_padding, int64_t groups, IntArrayRef dilation) {
   // See [Note: hacky wrapper removal for optional tensor]
   c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt);
   const Tensor& bias = *bias_maybe_owned;
 
-  return at::convolution(input, weight, bias, stride, padding, dilation,
-                         true, output_padding, groups);
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 2, "conv_transpose2d");
+  auto output = at::convolution(
+      input, weight, bias, stride, padding, dilation, true, output_padding, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 at::Tensor conv_transpose3d(
-    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
+    const Tensor& input_, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
     IntArrayRef stride, IntArrayRef padding, IntArrayRef output_padding, int64_t groups, IntArrayRef dilation) {
   // See [Note: hacky wrapper removal for optional tensor]
   c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt);
   const Tensor& bias = *bias_maybe_owned;
 
-  return at::convolution(input, weight, bias, stride, padding, dilation,
-                         true, output_padding, groups);
+  Tensor input;
+  bool is_batched;
+  std::tie(input, is_batched) = batchify(input_, /*num_spatial_dims=*/ 3, "conv_transpose3d");
+  auto output = at::convolution(
+      input, weight, bias, stride, padding, dilation, true, output_padding, groups);
+  return is_batched ? output : output.squeeze(0);
 }
 
 at::Tensor convolution(
diff --git a/test/test_cpp_extensions_aot.py b/test/test_cpp_extensions_aot.py
@@ -159,7 +159,7 @@ def test_conv_backend_override(self):
         bias = torch.empty(6, device='ort')
 
         # Make sure forward is overriden
-        out = torch.nn.functional.conv1d(input, weight, bias, 2, 0, 1, 1)
+        out = torch.nn.functional.conv2d(input, weight, bias, 2, 0, 1, 1)
         self.assertEqual(ort_extension.get_test_int(), 2)
         self.assertEqual(out.shape[0], input.shape[0])
         self.assertEqual(out.shape[1], weight.shape[0])
diff --git a/test/test_jit.py b/test/test_jit.py
@@ -13005,7 +13005,7 @@ def forward(self, x):
                 return self.conv(x)
         foo = Foo()
         # testing that the correct error message propagates
-        with self.assertRaisesRegex(RuntimeError, "Expected 4-dimensional input for 4-dimensional weight"):
+        with self.assertRaisesRegex(RuntimeError, r"Expected 3D \(unbatched\) or 4D \(batched\) input to conv2d"):
             foo(torch.ones([123]))  # wrong size
 
     def test_builtin_error_messsage(self):
diff --git a/test/test_modules.py b/test/test_modules.py
@@ -12,7 +12,7 @@
 from torch.testing._internal.common_modules import module_db, modules
 from torch.testing._internal.common_utils import (
     TestCase, run_tests, freeze_rng_state, mock_wrapper, get_tensors_from, gradcheck, gradgradcheck)
-from unittest.mock import patch
+from unittest.mock import patch, call
 
 
 class TestModule(TestCase):
@@ -122,9 +122,9 @@ def test_factory_kwargs(self, device, dtype, module_info):
                     with patch.object(torch.nn.UninitializedBuffer, '__new__', uninit_buffer_new):
                         m = module_cls(*args, **kwargs)
                         uninit_param_new.mock.assert_has_calls(
-                            [mock.call(device=device, dtype=dtype) for _ in uninit_param_new.mock.mock_calls])
+                            [call(device=device, dtype=dtype) for _ in uninit_param_new.mock.mock_calls])
                         uninit_buffer_new.mock.assert_has_calls(
-                            [mock.call(device=device, dtype=dtype) for _ in uninit_buffer_new.mock.mock_calls])
+                            [call(device=device, dtype=dtype) for _ in uninit_buffer_new.mock.mock_calls])
             else:
                 # Check device placement and dtype for created parameters and buffers.
                 # Only verify floating point dtypes since that's what the kwarg applies to.
@@ -421,9 +421,13 @@ def _test_gradients_helper(self, device, dtype, module_info, check):
 
             params = tuple(m.parameters())
 
-            # === Perform gradient check on the input_args ===
+            # === Lazy modules need to see an input to initialize params before gradcheck is run. ===
             input_args, input_kwargs = module_input.forward_input.args, module_input.forward_input.kwargs
+            if issubclass(module_info.module_cls, torch.nn.modules.lazy.LazyModuleMixin):
+                with torch.no_grad():
+                    m(*input_args, **input_kwargs)
 
+            # === Perform gradient check on the input_args ===
             other_kwargs = {}
             kwarg_tensors = []
             for name, obj in input_kwargs.items():
diff --git a/test/test_nn.py b/test/test_nn.py
@@ -895,8 +895,8 @@ def test_mismatch_shape_conv2d(self):
         w = torch.randn(6, 1, 5, 5)
 
         with self.assertRaisesRegex(RuntimeError,
-                                    r'Expected 4-dimensional input for 4-dimensional weight \[6, 1, 5, 5\],' +
-                                    r' but got 5-dimensional input of size \[1, 10, 1, 28, 28\] instead'):
+                                    r'Expected 3D \(unbatched\) or 4D \(batched\) input to conv2d, but got ' +
+                                    r'input of size: \[1, 10, 1, 28, 28\]'):
 
             F.conv2d(x, w)
 
@@ -6172,9 +6172,9 @@ def test_conv_modules_raise_error_on_incorrect_input_size(self):
                        nn.Conv2d(3, 8, 3).to(dtype), nn.ConvTranspose2d(3, 8, 3).to(dtype),
                        nn.Conv3d(3, 8, 3).to(dtype), nn.ConvTranspose3d(3, 8, 3).to(dtype)]
 
-            invalid_input_dims = [(2, 4), (2, 4),
-                                  (3, 5), (3, 5),
-                                  (4, 6), (4, 6)]
+            invalid_input_dims = [(1, 4), (1, 4),
+                                  (2, 5), (2, 5),
+                                  (3, 6), (3, 6)]
 
             for invalid_dims, module in zip(invalid_input_dims, modules):
                 for dims in invalid_dims:
@@ -13402,7 +13402,7 @@ def test_conv2d_same_padding_backward(self, device):
         gx_expect, gy_expect = x.grad, y.grad
         x.grad, y.grad = None, None
 
-        z = F.conv1d(x, y, padding='same')
+        z = F.conv2d(x, y, padding='same')
         z.sum().backward()
         self.assertEqual(gx_expect, x.grad)
         self.assertEqual(gy_expect, y.grad)
diff --git a/torch/nn/modules/conv.py b/torch/nn/modules/conv.py
diff --git a/torch/testing/_internal/common_modules.py b/torch/testing/_internal/common_modules.py