eb8680
diff --git a/‎aten/src/ATen/cudnn/Descriptors.h
+1-1 b/‎aten/src/ATen/cudnn/Descriptors.h
+1-1
diff --git a/‎aten/src/ATen/native/Unique.cpp
+4-4 b/‎aten/src/ATen/native/Unique.cpp
+4-4
diff --git a/‎aten/src/ATen/native/cuda/EmbeddingBag.cu
+5-5 b/‎aten/src/ATen/native/cuda/EmbeddingBag.cu
+5-5
diff --git a/‎aten/src/ATen/native/cuda/LossCTC.cu
+3-3 b/‎aten/src/ATen/native/cuda/LossCTC.cu
+3-3
diff --git a/‎aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu
+4-4 b/‎aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu
+4-4
diff --git a/‎tools/autograd/derivatives.yaml
+6-6 b/‎tools/autograd/derivatives.yaml
+6-6
@@ -257,7 +257,7 @@ struct AT_CUDA_API DropoutDescriptor
     AT_CUDNN_CHECK(cudnnDropoutGetStatesSize(handle, &state_size));
     AT_ASSERT(type.is_cuda());
     AT_ASSERT(type.scalarType() == kByte);
-    state = at::empty({static_cast<int64_t>(state_size)}, type);
+    state = at::empty({static_cast<int64_t>(state_size)}, type.options());
     AT_CUDNN_CHECK(cudnnSetDropoutDescriptor(mut_desc(), handle, dropout, state.data_ptr(), state_size, seed));
   }
 
 
@@ -21,7 +21,7 @@ std::tuple<Tensor, Tensor> _unique_cpu_template(
   const Tensor& input = self.contiguous();
   const scalar_t* input_data = input.data<scalar_t>();
   std::unordered_set<scalar_t> set(input_data, input_data + input.numel());
-  Tensor output = at::empty({static_cast<int64_t>(set.size())}, input.type());
+  Tensor output = at::empty({static_cast<int64_t>(set.size())}, input.options());
   scalar_t* output_data = output.data<scalar_t>();
 
   if (sorted) {
@@ -32,7 +32,7 @@ std::tuple<Tensor, Tensor> _unique_cpu_template(
     std::copy(set.begin(), set.end(), output_data);
   }
 
-  Tensor inverse_indices = at::empty({0}, self.type().toScalarType(kLong));
+  Tensor inverse_indices = at::empty({0}, self.options().dtype(kLong));
   if (return_inverse) {
     inverse_indices.resize_(input.sizes());
     int64_t* inverse_indices_data = inverse_indices.data<int64_t>();
@@ -103,12 +103,12 @@ std::tuple<Tensor, Tensor> _unique_dim_cpu_template(
       return false;
     });
 
-  Tensor input_sorted = at::empty(input_flat.sizes(), input_flat.type());
+  Tensor input_sorted = at::empty(input_flat.sizes(), input_flat.options());
   for (int i = 0; i < indices.size(); ++i) {
     input_sorted[i] = input_flat[indices[i]];
   }
 
-  Tensor inverse_indices = at::empty(indices.size(), self.type().toScalarType(kLong));
+  Tensor inverse_indices = at::empty(indices.size(), self.options().dtype(kLong));
   std::vector<Tensor> input_unbind = at::unbind(input_sorted, 0);
   auto last = _unique_dim_cpu_impl(
     input_unbind.begin(), input_unbind.end(), indices, inverse_indices);
 
@@ -175,15 +175,15 @@ Tensor embedding_bag_backward_cuda_sum_avg(
 
   Tensor &bag_size = const_cast<Tensor &>(bag_size_);
 
-  auto grad_weight = at::zeros({num_weights, grad.size(1)}, grad.type());
+  auto grad_weight = at::zeros({num_weights, grad.size(1)}, grad.options());
 
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
   ptrdiff_t numel = indices.numel();
   int64_t stride = grad_weight.stride(0);
 
-  auto sorted_indices = indices.type().tensor(indices.sizes());
-  auto orig_indices = indices.type().tensor(indices.sizes());
+  auto sorted_indices = at::empty_like(indices);
+  auto orig_indices = at::empty_like(indices);
   using device_ptr = thrust::device_ptr<int64_t>;
 
   // Sort the inputs into sorted with the corresponding indices; we
@@ -208,7 +208,7 @@ Tensor embedding_bag_backward_cuda_sum_avg(
 
   Tensor count;
   if (scale_grad_by_freq) {
-    count = indices.type().tensor(indices.sizes());
+    count = at::empty_like(indices);
 
     auto allocator = THCThrustAllocator(globalContext().lazyInitCUDA());
     auto policy = thrust::cuda::par(allocator).on(stream);
@@ -278,7 +278,7 @@ Tensor embedding_bag_backward_cuda_max(const Tensor &grad,
                                    const Tensor &max_indices,
                                    int64_t num_weights) {
 
-  auto grad_weight = at::zeros({num_weights, grad.size(1)}, grad.type());
+  auto grad_weight = at::zeros({num_weights, grad.size(1)}, grad.options());
 
   int64_t stride = grad_weight.stride(0);
 
 
@@ -185,7 +185,7 @@ std::tuple<Tensor, Tensor> ctc_loss_gpu_template(const Tensor& log_probs, const
   int64_t tg_target_stride;
 
   int64_t max_target_length;
-  auto tg_batch_offsets = at::empty({batch_size}, TensorOptions(at::CPU(kLong)));
+  auto tg_batch_offsets = at::empty({batch_size}, at::device(at::kCPU).dtype(at::kLong));
   auto tg_batch_offsets_data = tg_batch_offsets.data<int64_t>();
   if (targets.dim() == 1) { // concatenated targets
     int64_t pos = 0;
@@ -219,8 +219,8 @@ std::tuple<Tensor, Tensor> ctc_loss_gpu_template(const Tensor& log_probs, const
 	     " (while checking arguments for ", c, ")");
   }
 
-  auto target_lengths_t = at::tensor(target_lengths, targets.options().device(at::Device(at::Device::Type::CPU)).dtype(kLong)).toType(targets.type().toScalarType(kLong));
-  auto input_lengths_t = at::tensor(input_lengths, targets.options().device(at::Device(at::Device::Type::CPU)).dtype(kLong)).toType(targets.type().toScalarType(kLong));
+  auto target_lengths_t = at::tensor(target_lengths, targets.options().dtype(kLong));
+  auto input_lengths_t = at::tensor(input_lengths, targets.options().dtype(kLong));
   tg_batch_offsets = tg_batch_offsets.toType(targets.type().toScalarType(kLong));
 
   Tensor log_alpha = at::empty({batch_size, log_probs.size(0), 2*max_target_length+1}, log_probs.options());
 
@@ -78,7 +78,7 @@ Tensor& s_addmm_out_sparse_dense_cuda(Tensor& r_, const Tensor& t, const SparseT
   LongTensor rowIndices = indices.select(0, 0);
   LongTensor colIndices = indices.select(0, 1);
   IntTensor csr = _to_csr_int(rowIndices, m, nnz);
-  IntTensor colIndicesInt = at::empty({colIndices.size(0)}, indices.type().toScalarType(kInt));
+  IntTensor colIndicesInt = at::empty({colIndices.size(0)}, indices.options().dtype(kInt));
   colIndicesInt.copy_(colIndices);
 
   // No half support, so we don't have to use CUDATypeConversion
@@ -153,7 +153,7 @@ Tensor s_addmm_sparse_dense_cuda(
     Scalar beta,
     Scalar alpha
 ) {
-  Tensor r = t.type().tensor();
+  Tensor r = at::empty({0}, t.options());
   s_addmm_out_sparse_dense_cuda(r, t, sparse, dense, beta, alpha);
   return r;
 }
@@ -208,7 +208,7 @@ SparseTensor& hspmm_out_sparse_cuda(SparseTensor& r_, const SparseTensor& sparse
 
   LongTensor indices = at::empty({1, nnz}, CUDA(kLong));
   // create values in column-major format to avoid copying in spaddmm
-  Tensor values = at::empty({n, nnz}, dense.type());
+  Tensor values = at::empty({n, nnz}, dense.options());
   values.transpose_(0, 1);
 
   // why does sparse need to be cloned? If this is really necessary maybe we
@@ -434,7 +434,7 @@ SparseTensor& mul_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t_, cons
   Tensor t_values_ = t._values();
   LongTensor s_indices_ = src._indices();
   Tensor s_values_ = src._values();
-  LongTensor r_indices_ = t_indices_.type().tensor({sparseDims, max_nnz});
+  LongTensor r_indices_ = at::empty({sparseDims, max_nnz}, t_indices_.options());
   Tensor r_values_ = _new_values_with_size_of(t_values_, max_nnz).zero_();
   r_.resize_as_(src);
   _get_sparse_impl(r_)->set_indices_and_values_unsafe(r_indices_, r_values_);
 
@@ -283,7 +283,7 @@
   self: grad
 
 - name: gather(Tensor self, int64_t dim, Tensor index)
-  self: at::zeros(self.sizes(), grad.type()).scatter_add_(dim, index, grad)
+  self: at::zeros(self.sizes(), grad.options()).scatter_add_(dim, index, grad)
 
 - name: ge_(Tensor self, Scalar other)
   self: zeros_like(self)
@@ -346,7 +346,7 @@
   value: grad.index_select(dim, index).sum()
 
 - name: index_select(Tensor self, int64_t dim, Tensor index)
-  self: at::zeros(self.sizes(), grad.type()).index_add_(dim, index, grad)
+  self: at::zeros(self.sizes(), grad.options()).index_add_(dim, index, grad)
 
 - name: inverse(Tensor self)
   self: -at::mm(result.t(), at::mm(grad, result.t()))
@@ -511,14 +511,14 @@
   self: zeros_like(grad)
 
 - name: normal(Tensor mean, double std, Generator generator)
-  mean: at::zeros(mean.sizes(), grad.type())
+  mean: at::zeros(mean.sizes(), grad.options())
 
 - name: normal(double mean, Tensor std, Generator generator)
-  std: at::zeros(std.sizes(), grad.type())
+  std: at::zeros(std.sizes(), grad.options())
 
 - name: normal(Tensor mean, Tensor std, Generator generator)
-  mean: at::zeros(mean.sizes(), grad.type())
-  std: at::zeros(std.sizes(), grad.type())
+  mean: at::zeros(mean.sizes(), grad.options())
+  std: at::zeros(std.sizes(), grad.options())
 
 - name: orgqr(Tensor self, Tensor input2)
   self: not_implemented("orgqr")
Original file line number	Diff line number	Diff line change
`@@ -257,7 +257,7 @@ struct AT_CUDA_API DropoutDescriptor`
`257`	`257`	`AT_CUDNN_CHECK(cudnnDropoutGetStatesSize(handle, &state_size));`
`258`	`258`	`AT_ASSERT(type.is_cuda());`
`259`	`259`	`AT_ASSERT(type.scalarType() == kByte);`
`260`		`- state = at::empty({static_cast<int64_t>(state_size)}, type);`
	`260`	`+ state = at::empty({static_cast<int64_t>(state_size)}, type.options());`
`261`	`261`	`AT_CUDNN_CHECK(cudnnSetDropoutDescriptor(mut_desc(), handle, dropout, state.data_ptr(), state_size, seed));`
`262`	`262`	`}`
`263`	`263`