eb8680
diff --git a/‎aten/src/ATen/core/Tensor.h
Lines changed: 13 additions & 1 deletion b/‎aten/src/ATen/core/Tensor.h
Lines changed: 13 additions & 1 deletion
diff --git a/‎aten/src/ATen/core/TensorMethods.h
Lines changed: 3 additions & 3 deletions b/‎aten/src/ATen/core/TensorMethods.h
Lines changed: 3 additions & 3 deletions
diff --git a/‎aten/src/ATen/core/TensorOptions.h
Lines changed: 64 additions & 7 deletions b/‎aten/src/ATen/core/TensorOptions.h
Lines changed: 64 additions & 7 deletions
diff --git a/‎aten/src/ATen/core/Type.h
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/core/Type.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/core/typeid.h
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/core/typeid.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/function_wrapper.py
Lines changed: 5 additions & 1 deletion b/‎aten/src/ATen/function_wrapper.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎aten/src/ATen/native/TensorConversions.cpp
Lines changed: 44 additions & 14 deletions b/‎aten/src/ATen/native/TensorConversions.cpp
Lines changed: 44 additions & 14 deletions
diff --git a/‎aten/src/ATen/native/native_functions.yaml
Lines changed: 6 additions & 3 deletions b/‎aten/src/ATen/native/native_functions.yaml
Lines changed: 6 additions & 3 deletions
diff --git a/‎aten/src/ATen/templates/Tensor.h
Lines changed: 12 additions & 0 deletions b/‎aten/src/ATen/templates/Tensor.h
Lines changed: 12 additions & 0 deletions
@@ -650,9 +650,9 @@ class CAFFE2_API Tensor {
   std::vector<Tensor> unbind(int64_t dim=0) const;
   Tensor to_sparse(int64_t sparse_dim) const;
   Tensor to_sparse() const;
+  Tensor to(const TensorOptions & options, bool non_blocking=false, bool copy=false) const;
   Tensor to(Device device, ScalarType dtype, bool non_blocking=false, bool copy=false) const;
   Tensor to(ScalarType dtype, bool non_blocking=false, bool copy=false) const;
-  Tensor to(Device device, bool non_blocking=false, bool copy=false) const;
   Tensor to(const Tensor & other, bool non_blocking=false, bool copy=false) const;
   Scalar _local_scalar() const;
   int64_t storage_offset() const;
@@ -774,6 +774,18 @@ class CAFFE2_API Tensor {
   Tensor remainder(Scalar other) const;
   Tensor remainder(const Tensor & other) const;
 
+  // We changed .dtype() to return a TypeMeta in #12766. Ideally, we want the
+  // at::kDouble and its friends to be TypeMeta's, but that hasn't happened yet.
+  // Before that change, we make this method to maintain BC for C++ usage like
+  // `x.to(y.dtype)`.
+  // TODO: remove following two after at::kDouble and its friends are TypeMeta's.
+  inline Tensor to(caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
+    return this->to(/*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
+  }
+  inline Tensor to(Device device, caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
+    return this->to(device, /*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
+  }
+
   template <typename F, typename... Args>
   auto m(F func, Args&&... params) const -> decltype(func(*this, std::forward<Args>(params)...)) {
     return func(*this, std::forward<Args>(params)...);
 
@@ -1226,15 +1226,15 @@ inline Tensor Tensor::to_sparse(int64_t sparse_dim) const {
 inline Tensor Tensor::to_sparse() const {
     return type().to_sparse(*this);
 }
+inline Tensor Tensor::to(const TensorOptions & options, bool non_blocking, bool copy) const {
+    return type().to(*this, options, non_blocking, copy);
+}
 inline Tensor Tensor::to(Device device, ScalarType dtype, bool non_blocking, bool copy) const {
     return type().to(*this, device, dtype, non_blocking, copy);
 }
 inline Tensor Tensor::to(ScalarType dtype, bool non_blocking, bool copy) const {
     return type().to(*this, dtype, non_blocking, copy);
 }
-inline Tensor Tensor::to(Device device, bool non_blocking, bool copy) const {
-    return type().to(*this, device, non_blocking, copy);
-}
 inline Tensor Tensor::to(const Tensor & other, bool non_blocking, bool copy) const {
     return type().to(*this, other, non_blocking, copy);
 }
 
@@ -8,6 +8,7 @@
 #include <ATen/core/ScalarTypeUtils.h>
 
 #include "c10/util/Optional.h"
+#include "c10/util/C++17.h"
 
 #include <cstddef>
 #include <iosfwd>
@@ -58,6 +59,51 @@ CAFFE2_API const DefaultTensorOptions& getDefaultTensorOptions();
 ///     at::zeros({2,2}, at::device({at::kCUDA, 1})); // place on device 1
 ///     at::zeros({2,2}, at::requires_grad());
 ///
+
+/// NOTE [ TensorOptions Constructors ]
+///
+/// TensorOptions is like a dictionary with entries from the set:
+/// {requires_grad, is_variable, device, dtype, layout}, where each entry may be
+/// unspecified (i.e., is optional). It is used to specify the properties of
+/// tensors in many places both in C++ internal and API, e.g., tensor factory
+/// methods like `at::empty({10}, options)`, tensor conversions like
+/// `tensor.to(...)`, etc.
+///
+/// To provide a simple API that is consistent with Python, where one can do
+/// `torch.empty(sizes, X)` with `X` being a `torch.device`, `torch.dtype`, or a
+/// `torch.layout`, we want TensorOptions to be implicitly convertible from
+/// `ScalarType dtype`, `Layout layout` and `Device device`. Therefore, we have
+/// three implicit constructors from each of these three types.
+///
+/// This is sufficient for `ScalarType` and `Layout` as they are simple Enum
+/// classes. However, `Device` is an ordinary class with implicit constructors
+/// `Device(DeviceType, DeviceIndex = -1)` and `Device(std::string)` to be
+/// consistent with Python API, where strings are treated as equivalent with a
+/// `torch.device` object (e.g., "cuda:1" can be passed to everywhere a
+/// `torch.device("cuda:1")` is accepted). To support the syntax
+/// `at::empty({10}, {kCUDA, 1})` and `tensor.to(kCUDA)`, we need to make sure
+/// that `TensorOptions` is implicitly constructible with any argments that a
+/// `Device` can constructed from. So we have,
+///
+///    /* implicit */ TensorOptions(T&& device) : TensorOptions() {
+///      this->set_device(device);
+///    }
+///
+///    template <typename... Args,
+///             typename = std::enable_if_t<std::is_constructible<Device, Args&&...>::value>>
+///    /* implicit */  TensorOptions(Args&&... args)
+///     : TensorOptions(Device(std::forward<Args>(args)...)) {}
+///
+///
+/// But this will be problematic. Consider this: `TensorOptions({kCUDA, 1})`.
+/// Compiler will compain about ambiguity between the copy constructor and the
+/// `Device` constructor because `{kCUDA, 1}` can be converted to both a
+/// `TensorOption` and a `Device`.
+///
+/// To get around this, we templatize the `Device` constructor. Since overload
+/// resolution is done before template resolution, our problem is solved.
+
+
 struct CAFFE2_API TensorOptions {
   TensorOptions()
     : requires_grad_(false)
@@ -75,20 +121,31 @@ struct CAFFE2_API TensorOptions {
   }
 
   /// Constructs a `TensorOptions` object with the given device.
-  /* implicit */ TensorOptions(Device device) : TensorOptions() {
-    this->set_device(device);
+  /// See NOTE [ TensorOptions Constructors ] on why this is templatized.
+  template<typename T,
+           typename = c10::guts::enable_if_t<std::is_same<c10::guts::decay_t<T>, Device>::value>>
+  /* implicit */ TensorOptions(T&& device) : TensorOptions() {
+    this->set_device(std::forward<T>(device));
   }
 
+  /// Constructs a `TensorOptions` object from arguments allowed in `Device`
+  /// constructors.
+  ///
+  /// See NOTE [ TensorOptions Constructors ].
+  ///
+  /// NB: Ideally we only allow implicit constructors here. But there is no easy
+  ///     way to detect them. So we have this one that allows explicit
+  ///     constructors too.
+  template <typename... Args,
+            typename = c10::guts::enable_if_t<std::is_constructible<Device, Args&&...>::value>>
+   /* implicit */ TensorOptions(Args&&... args)
+    : TensorOptions(Device(std::forward<Args>(args)...)) {}
+
   /// Constructs a `TensorOptions` object from a backend, forwarded to the
   /// `Device` constructor.
   /* implicit */ TensorOptions(Backend backend)
       : TensorOptions(Device(backendToDeviceType(backend))) {}
 
-  /// Constructs a `TensorOptions` object from a device type, forwarded to the
-  /// `Device` constructor.
-  /* implicit */ TensorOptions(DeviceType device_type)
-      : TensorOptions(Device(device_type)) {}
-
   /// Constructs a `TensorOptions` object with the given dtype.
   /* implicit */ TensorOptions(caffe2::TypeMeta dtype) : TensorOptions() {
     this->set_dtype(dtype);
 
@@ -604,9 +604,9 @@ struct CAFFE2_API Type {
   virtual std::vector<Tensor> unbind(const Tensor & self, int64_t dim) const = 0;
   virtual Tensor to_sparse(const Tensor & self, int64_t sparse_dim) const = 0;
   virtual Tensor to_sparse(const Tensor & self) const = 0;
+  virtual Tensor to(const Tensor & self, const TensorOptions & options, bool non_blocking, bool copy) const = 0;
   virtual Tensor to(const Tensor & self, Device device, ScalarType dtype, bool non_blocking, bool copy) const = 0;
   virtual Tensor to(const Tensor & self, ScalarType dtype, bool non_blocking, bool copy) const = 0;
-  virtual Tensor to(const Tensor & self, Device device, bool non_blocking, bool copy) const = 0;
   virtual Tensor to(const Tensor & self, const Tensor & other, bool non_blocking, bool copy) const = 0;
   virtual Scalar _local_scalar(const Tensor & self) const = 0;
   virtual int64_t storage_offset(const Tensor & self) const = 0;
 
@@ -351,7 +351,7 @@ class CAFFE2_API TypeMeta {
  private:
   // TypeMeta can only be created by Make, making sure that we do not
   // create incorrectly mixed up TypeMeta objects.
-  constexpr TypeMeta(const detail::TypeMetaData* data) noexcept : data_(data) {}
+  explicit constexpr TypeMeta(const detail::TypeMetaData* data) noexcept : data_(data) {}
 
  public:
   /**
 
@@ -527,6 +527,7 @@ def __getitem__(self, x):
     ('buffers', Optional[List[str]]),
     ('returns', List[ReturnType]),
     ('inplace', bool),
+    ('is_factory_method', bool),
     ('abstract', bool),
     ('requires_tensor', bool),
     ('device_guard', bool),
@@ -924,6 +925,7 @@ def process_option(option, output_options):
             buffers=buffer_names,
             returns=option['returns'],
             inplace=option['inplace'],
+            is_factory_method=False,
             # See Note [Abstract ATen methods]
             abstract=abstract,
             requires_tensor=option.get('requires_tensor', False),
@@ -1070,7 +1072,8 @@ def find_formal(formal_name, formals):
 
         is_method = 'method' in option['variants']
         is_namespace_function = 'function' in option['variants']
-        is_factory_method = find_formal('TensorOptions', formals) and not dispatch_options
+        is_factory_method = find_formal('TensorOptions', formals) and \
+            not dispatch_options and 'method' not in option['variants']
         is_deprecated_factory_method = len(formals) > 0 and \
             formals[0]['dynamic_type'] == 'Type' and \
             option['return_type'] == 'Tensor' and option['deprecated']
@@ -1171,6 +1174,7 @@ def find_formal(formal_name, formals):
             buffers=None,
             returns=option['returns'],
             inplace=option['inplace'],
+            is_factory_method=is_factory_method,
             # See Note [Abstract ATen methods]
             abstract=abstract,
             requires_tensor=option.get('requires_tensor', False),
 
@@ -1,23 +1,59 @@
 #include "ATen/ATen.h"
 #include "ATen/NativeFunctions.h"
+#include "c10/util/Optional.h"
 
 namespace at {
 namespace native {
 
-static void ensure_has_index(Device* device) {
-  if (!device->is_cuda() || device->has_index()) {
-    return;
+// Since the given Device may not have device_index set (i.e., having it as -1
+// representing the current device), we need to set the device_index before
+// comparing against the current device object in Tensor.
+// This always **copies** but this is intended because (1) we shouldn't modify
+// input argument, and (2) Device is small anyways.
+static inline Device ensure_has_index(const Device &device) {
+  if (!device.is_cuda() || device.has_index()) {
+    return device;
   }
-  device->set_index(at::current_device());
+  return Device(device.type(), at::current_device());
 }
 
-static Tensor to_impl(const Tensor& self, const TensorOptions& options, bool non_blocking) {
+static inline Tensor to_impl(const Tensor& self, const TensorOptions& options, bool non_blocking) {
   return self.type().toBackend(options.backend()).toScalarType(typeMetaToScalarType(options.dtype()))
                     .copy(self, non_blocking, options.device());
 }
 
+Tensor to(const Tensor& self, const TensorOptions& options, bool non_blocking, bool copy) {
+  AT_CHECK(options.requires_grad_opt() == c10::nullopt,
+           "to(options) expects unset requires_grad flag, but got "
+           "options.requires_grad set as ", options.requires_grad());
+
+  const auto & layout_opt = options.layout_opt();
+  AT_CHECK(!layout_opt || self.layout() == layout_opt.value(),
+           "to(options) doesn't support converting to a different layout, "
+           "but got self.layout being ", self.layout(),
+           " and options.layout set as ", options.layout());
+
+  auto device_opt = options.device_opt();
+  if (device_opt) {
+    device_opt = ensure_has_index(device_opt.value());
+  }
+  const auto & dtype_opt = options.dtype_opt();
+  if ((!device_opt || self.device() == device_opt.value()) &&
+      (!dtype_opt  || self.dtype()  ==  dtype_opt.value()) && !copy) {
+    return self;
+  }
+  auto specified_options = self.options();
+  if (device_opt) {
+    specified_options = specified_options.device(device_opt.value());
+  }
+  if (dtype_opt) {
+    specified_options = specified_options.dtype(dtype_opt.value());
+  }
+  return to_impl(self, specified_options, non_blocking);
+}
+
 Tensor to(const Tensor& self, Device device, ScalarType dtype, bool non_blocking, bool copy) {
-  ensure_has_index(&device);
+  device = ensure_has_index(device);
   if (self.device() == device && self.dtype() == dtype && !copy) {
     return self;
   }
@@ -31,17 +67,11 @@ Tensor to(const Tensor& self, ScalarType dtype, bool non_blocking, bool copy) {
   return to_impl(self, self.options().dtype(dtype), non_blocking);
 }
 
-Tensor to(const Tensor& self, Device device, bool non_blocking, bool copy) {
-  ensure_has_index(&device);
-  if (self.device() == device && !copy) {
-    return self;
-  }
-  return to_impl(self, self.options().device(device), non_blocking);
-}
-
 Tensor to(const Tensor& self, const Tensor& other, bool non_blocking, bool copy) {
   auto self_options = self.options();
   auto options = other.options();
+  // Tensor.options() always have everything filled so we are happy and don't
+  // even need to fill in device index.
   if (self_options == options && !copy) {
     return self;
   }
 
@@ -2211,15 +2211,18 @@
     CPU: dense_to_sparse
     CUDA: dense_to_sparse
 
-- func: to(Tensor self, Device device, ScalarType dtype, bool non_blocking=false, bool copy=false) -> Tensor
+# to(Device) must not exist because all constructors of Device also works for
+# TensorOptions. Otherwise, an ambiguity error is thrown.
+# See NOTE [ TensorOptions Constructors ].
+- func: to(Tensor self, TensorOptions options, bool non_blocking=false, bool copy=false) -> Tensor
   variants: method
   device_guard: False
 
-- func: to(Tensor self, ScalarType dtype, bool non_blocking=false, bool copy=false) -> Tensor
+- func: to(Tensor self, Device device, ScalarType dtype, bool non_blocking=false, bool copy=false) -> Tensor
   variants: method
   device_guard: False
 
-- func: to(Tensor self, Device device, bool non_blocking=false, bool copy=false) -> Tensor
+- func: to(Tensor self, ScalarType dtype, bool non_blocking=false, bool copy=false) -> Tensor
   variants: method
   device_guard: False
 
 
@@ -261,6 +261,18 @@ class CAFFE2_API Tensor {
   //Tensor * add(Tensor & b);
   ${tensor_method_declarations}
 
+  // We changed .dtype() to return a TypeMeta in #12766. Ideally, we want the
+  // at::kDouble and its friends to be TypeMeta's, but that hasn't happened yet.
+  // Before that change, we make this method to maintain BC for C++ usage like
+  // `x.to(y.dtype)`.
+  // TODO: remove following two after at::kDouble and its friends are TypeMeta's.
+  inline Tensor to(caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
+    return this->to(/*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
+  }
+  inline Tensor to(Device device, caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const {
+    return this->to(device, /*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy);
+  }
+
   template <typename F, typename... Args>
   auto m(F func, Args&&... params) const -> decltype(func(*this, std::forward<Args>(params)...)) {
     return func(*this, std::forward<Args>(params)...);
Original file line number	Diff line number	Diff line change
`@@ -1226,15 +1226,15 @@ inline Tensor Tensor::to_sparse(int64_t sparse_dim) const {`
`1226`	`1226`	`inline Tensor Tensor::to_sparse() const {`
`1227`	`1227`	`return type().to_sparse(*this);`
`1228`	`1228`	`}`
	`1229`	`+inline Tensor Tensor::to(const TensorOptions & options, bool non_blocking, bool copy) const {`
	`1230`	`+ return type().to(*this, options, non_blocking, copy);`
	`1231`	`+}`
`1229`	`1232`	`inline Tensor Tensor::to(Device device, ScalarType dtype, bool non_blocking, bool copy) const {`
`1230`	`1233`	`return type().to(*this, device, dtype, non_blocking, copy);`
`1231`	`1234`	`}`
`1232`	`1235`	`inline Tensor Tensor::to(ScalarType dtype, bool non_blocking, bool copy) const {`
`1233`	`1236`	`return type().to(*this, dtype, non_blocking, copy);`
`1234`	`1237`	`}`
`1235`		`-inline Tensor Tensor::to(Device device, bool non_blocking, bool copy) const {`
`1236`		`- return type().to(*this, device, non_blocking, copy);`
`1237`		`-}`
`1238`	`1238`	`inline Tensor Tensor::to(const Tensor & other, bool non_blocking, bool copy) const {`
`1239`	`1239`	`return type().to(*this, other, non_blocking, copy);`
`1240`	`1240`	`}`