Hacky support for meta tensor serialization. (pytorch#62192)

ezyang · facebook-github-bot · commit cf1f59452b05 · 2021-07-26T14:33:45.000-07:00
Summary: Pull Request resolved: pytorch#62192 This support is hacky because it doesn't preserve meta tensor storage sharing (e.g., if you serialize a model with shared storage, e.g., a tensor and a view on a tensor, when I deserialize the viewing relationship will be broken and these are just different tensors.) The hack is also durable, in the sense that we will be on the hook for supporting `_rebuild_meta_tensor_no_storage` in perpetuity in the future, even if we change our mind about the serialization format. This unblocks an FB production use case. I didn't add C++ support to minimize blast area of this patch. Signed-off-by: Edward Z. Yang <ezyang@fb.com> Test Plan: Imported from OSS Reviewed By: zou3519 Differential Revision: D29910535 Pulled By: ezyang fbshipit-source-id: d98dcdd0108dfc3ae730a071d3c583b6d0281d21
diff --git a/test/test_serialization.py b/test/test_serialization.py
@@ -741,6 +741,16 @@ def test_pathlike_serialization(self):
             torch.save(model, path)
             torch.load(path)
 
+    def test_meta_serialization(self):
+        big_model = torch.nn.Conv2d(20000, 320000, kernel_size=3, device='meta')
+
+        with BytesIOContext() as f:
+            torch.save(big_model, f)
+            f.seek(0)
+            state = torch.load(f)
+
+        self.assertEqual(state.weight.size(), big_model.weight.size())
+
     def run(self, *args, **kwargs):
         with serialization_method(use_zip=True):
             return super(TestSerialization, self).run(*args, **kwargs)
diff --git a/torch/_tensor.py b/torch/_tensor.py
@@ -136,6 +136,16 @@ def _reduce_ex_internal(self, proto):
                        str(self.device),
                        self.requires_grad)
             return (torch._utils._rebuild_mlc_tensor, arg_mlc)
+        if self.device.type == 'meta':
+            # NB: This implementation BREAKS storage sharing.  Current
+            # hypothesis is that no one cares for meta tensors.
+            arg_meta = (
+                self.dtype,
+                tuple(self.size()),
+                self.stride(),
+                self.requires_grad,
+            )
+            return (torch._utils._rebuild_meta_tensor_no_storage, arg_meta)
         if self.is_quantized:
             # quantizer_params can be different type based on torch attribute
             quantizer_params: Union[Tuple[torch.qscheme, float, int], Tuple[Any, Tensor, Tensor, int]]
diff --git a/torch/_utils.py b/torch/_utils.py
@@ -185,6 +185,10 @@ def _rebuild_mlc_tensor(data, dtype, device, requires_grad):
     return tensor
 
 
+def _rebuild_meta_tensor_no_storage(dtype, size, stride, requires_grad):
+    return torch.empty_strided(size, stride, dtype=dtype, device='meta', requires_grad=requires_grad)
+
+
 def _rebuild_qtensor(storage, storage_offset, size, stride, quantizer_params, requires_grad, backward_hooks):
     qscheme = quantizer_params[0]
     if qscheme == torch.per_tensor_affine: