Skip to content

Commit 8c8cd95

Browse files
Skylion007pytorchmergebot
authored andcommitted
Add missing moves to torch autograd (pytorch#92772)
Applies some additional std::move functions to torch/csrc/autograd to opportunities that were found via static analysis. Pull Request resolved: pytorch#92772 Approved by: https://github.com/ezyang
1 parent 2a8669c commit 8c8cd95

16 files changed

+119
-79
lines changed

torch/csrc/autograd/FunctionsManual.cpp

+60-43
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <ciso646>
3131
#include <functional>
3232
#include <numeric>
33+
#include <utility>
3334

3435
// Helper functions for autogenerated code
3536
// These used to be inlined into the codegened Functions.cpp
@@ -361,7 +362,7 @@ Tensor norm_jvp(
361362
const Tensor& self_t,
362363
const optional<Scalar>& p_,
363364
Tensor norm) {
364-
return norm_jvp(self_p, self_t, p_, norm, {}, true);
365+
return norm_jvp(self_p, self_t, p_, std::move(norm), {}, true);
365366
}
366367

367368
Tensor _nested_from_padded_backward(
@@ -389,7 +390,7 @@ Tensor linalg_vector_norm_jvp(
389390
// No need to handle the dtype arg as it's handled via broadcasting in the
390391
// function
391392
auto dim = opt_dim.value_or(IntArrayRef({}));
392-
return norm_jvp(self_p, self_t, scalar_ord, norm, dim, keepdim);
393+
return norm_jvp(self_p, self_t, scalar_ord, std::move(norm), dim, keepdim);
393394
}
394395

395396
Tensor linalg_vector_norm_backward(
@@ -402,7 +403,8 @@ Tensor linalg_vector_norm_backward(
402403
// No need to handle the dtype arg as it's handled via broadcasting in the
403404
// function
404405
auto dim = opt_dim.value_or(IntArrayRef({}));
405-
return norm_backward(grad, self, scalar_ord, norm, dim, keepdim);
406+
return norm_backward(
407+
std::move(grad), self, scalar_ord, std::move(norm), dim, keepdim);
406408
}
407409

408410
Tensor pow_backward(Tensor grad, const Tensor& self, const Scalar& exponent) {
@@ -415,7 +417,7 @@ Tensor pow_backward(Tensor grad, const Tensor& self, const Scalar& exponent) {
415417
Tensor out = (exponent.isComplex())
416418
? grad_lambda(exponent.toComplexDouble())
417419
: grad_lambda(exponent.toDouble());
418-
return handle_r_to_c(self, out);
420+
return handle_r_to_c(self, std::move(out));
419421
}
420422
}
421423

@@ -427,7 +429,7 @@ Tensor pow_backward_self(
427429
exponent == 0.0,
428430
at::zeros({}, grad.options()),
429431
grad * (exponent * self.pow(exponent - 1)).conj());
430-
return handle_r_to_c(self, out);
432+
return handle_r_to_c(self, std::move(out));
431433
}
432434

433435
// Caveats:
@@ -455,7 +457,7 @@ Tensor pow_backward_exponent(
455457
grad *
456458
at::where(
457459
cond, at::zeros({}, grad.options()), (result * self.log()).conj());
458-
return handle_r_to_c(exponent, out);
460+
return handle_r_to_c(exponent, std::move(out));
459461
}
460462

461463
Tensor pow_backward_exponent(
@@ -475,11 +477,11 @@ Tensor pow_backward_exponent(
475477
auto out = grad *
476478
at::where(cond(exponent),
477479
at::zeros({}, grad.options()),
478-
grad_lambda(result, base));
479-
return handle_r_to_c(exponent, out);
480+
grad_lambda(std::move(result), base));
481+
return handle_r_to_c(exponent, std::move(out));
480482
} else {
481-
auto out = grad * grad_lambda(result, base);
482-
return handle_r_to_c(exponent, out);
483+
auto out = grad * grad_lambda(std::move(result), base);
484+
return handle_r_to_c(exponent, std::move(out));
483485
}
484486
}
485487

@@ -521,7 +523,7 @@ Tensor masked_fill_backward(const Tensor& grad, const Tensor& mask) {
521523

522524
Tensor mul_tensor_backward(Tensor grad, Tensor other, ScalarType self_st) {
523525
auto out = grad * other.conj();
524-
return handle_r_to_c(self_st, out);
526+
return handle_r_to_c(self_st, std::move(out));
525527
}
526528

527529
Tensor div_tensor_self_backward(
@@ -534,11 +536,12 @@ Tensor div_tensor_self_backward(
534536
}
535537

536538
auto result = grad / other.conj();
537-
return handle_r_to_c(self_st, result);
539+
return handle_r_to_c(self_st, std::move(result));
538540
}
539541

540542
Tensor div_tensor_self_backward(Tensor grad, Tensor other, ScalarType self_st) {
541-
return div_tensor_self_backward(grad, other, self_st, c10::nullopt);
543+
return div_tensor_self_backward(
544+
std::move(grad), std::move(other), self_st, c10::nullopt);
542545
}
543546

544547
Tensor div_tensor_other_backward(
@@ -551,11 +554,12 @@ Tensor div_tensor_other_backward(
551554
}
552555

553556
auto result = -grad * ((self / other) / other).conj();
554-
return handle_r_to_c(other, result);
557+
return handle_r_to_c(std::move(other), std::move(result));
555558
}
556559

557560
Tensor div_tensor_other_backward(Tensor grad, Tensor self, Tensor other) {
558-
return div_tensor_other_backward(grad, self, other, c10::nullopt);
561+
return div_tensor_other_backward(
562+
std::move(grad), std::move(self), std::move(other), c10::nullopt);
559563
}
560564

561565
Tensor permute_backwards(const Tensor& grad, IntArrayRef fwd_dims) {
@@ -649,8 +653,9 @@ Tensor mean_backward(
649653
c10::SymInt numel,
650654
bool keepdim) {
651655
bool is_all_reduce = !opt_dim.has_value() || opt_dim.value().size() == 0;
652-
auto n = is_all_reduce ? numel : _safe_size(shape, opt_dim.value());
653-
return sum_backward(grad, shape, opt_dim, keepdim) / n;
656+
auto n =
657+
is_all_reduce ? std::move(numel) : _safe_size(shape, opt_dim.value());
658+
return sum_backward(grad, shape, opt_dim, keepdim) / std::move(n);
654659
}
655660

656661
std::vector<int64_t> reverse_list(const IntArrayRef list) {
@@ -692,7 +697,8 @@ Tensor prod_safe_zeros_backward(
692697

693698
Tensor narrow_reverse =
694699
reverse_dim(inp.narrow(dim, 1, inp.size(dim) - 1), dim);
695-
Tensor exclusive_reverse_nocp = at::cat({ones, narrow_reverse}, dim);
700+
Tensor exclusive_reverse_nocp =
701+
at::cat({std::move(ones), std::move(narrow_reverse)}, dim);
696702
Tensor exclusive_reverse =
697703
reverse_dim(exclusive_reverse_nocp.cumprod(dim), dim);
698704

@@ -1387,8 +1393,8 @@ Tensor renorm_backward(
13871393
}
13881394
grad_output =
13891395
grad_output.sum(reduce_dims, /*keepdim=*/true, /*dtype=*/real_acc_type);
1390-
auto nb =
1391-
norm_backward(grad_output, self, p, norm, reduce_dims, /*keepdim=*/true);
1396+
auto nb = norm_backward(
1397+
std::move(grad_output), self, p, norm, reduce_dims, /*keepdim=*/true);
13921398

13931399
auto invnorm = (norm + 1e-7).reciprocal();
13941400
auto grad_norm = maxnorm * invnorm * (grad - invnorm * nb);
@@ -1571,7 +1577,7 @@ Tensor std_backward(
15711577
c10::optional<int64_t> correction,
15721578
bool keepdim) {
15731579
auto grad_var = (grad / (result * 2)).masked_fill_(result == 0, 0);
1574-
return var_backward(grad_var, self, dim, correction, keepdim);
1580+
return var_backward(std::move(grad_var), self, dim, correction, keepdim);
15751581
}
15761582

15771583
Tensor var_mean_backward(
@@ -1593,7 +1599,7 @@ Tensor var_mean_backward(
15931599
dim_opt.value_or(IntArrayRef({})),
15941600
self.sym_numel(),
15951601
keepdim);
1596-
gself = gself.defined() ? gself + aux : aux;
1602+
gself = gself.defined() ? gself + aux : std::move(aux);
15971603
}
15981604
return gself;
15991605
}
@@ -1618,7 +1624,7 @@ Tensor std_mean_backward(
16181624
dim_opt.value_or(IntArrayRef({})),
16191625
self.sym_numel(),
16201626
keepdim);
1621-
gself = gself.defined() ? gself + aux : aux;
1627+
gself = gself.defined() ? gself + aux : std::move(aux);
16221628
}
16231629
return gself;
16241630
}
@@ -1637,8 +1643,9 @@ Tensor masked_scatter_backward(
16371643
// because mask_selected returns a 1-d tensor with size of masked elements
16381644
// that are 1, we need to fill out the rest with zeros then reshape back to
16391645
// tensor2's size.
1640-
auto zeros_fillin = at::zeros_symint({diff_nelem}, grad.options());
1641-
mask_selected = at::cat({mask_selected, zeros_fillin}, 0);
1646+
auto zeros_fillin =
1647+
at::zeros_symint({std::move(diff_nelem)}, grad.options());
1648+
mask_selected = at::cat({mask_selected, std::move(zeros_fillin)}, 0);
16421649
}
16431650
return mask_selected.view_symint(sizes);
16441651
}
@@ -1661,7 +1668,7 @@ Tensor cholesky_jvp(const Tensor& dA, const Tensor& L, bool upper) {
16611668
dL = at::linalg_solve_triangular(L_.mH(), dL, /*upper=*/true, /*left=*/false);
16621669
dL = dL.tril() - dL.diagonal(0, -2, -1).mul(0.5).diag_embed();
16631670
dL = L_.matmul(dL);
1664-
return upper ? dL.mH() : dL;
1671+
return upper ? dL.mH() : std::move(dL);
16651672
}
16661673

16671674
Tensor cholesky_backward(const Tensor& gL, bool upper, const Tensor& L) {
@@ -1899,7 +1906,7 @@ Tensor glu_double_backward(
18991906
auto gI_second_half =
19001907
ggI_second_half_times_first_half * gO * second_order_sh +
19011908
ggI_first_half * gO * sig_one_sub_sig;
1902-
return at::cat({gI_first_half, gI_second_half}, dim);
1909+
return at::cat({std::move(gI_first_half), std::move(gI_second_half)}, dim);
19031910
}
19041911

19051912
Tensor glu_double_backward_grad_output(
@@ -2919,7 +2926,8 @@ Tensor as_strided_scatter_backward(
29192926
grad_.new_zeros_symint(input_geometry.sym_sizes())
29202927
.as_strided_symint(
29212928
input_geometry.sym_sizes(), input_geometry.sym_strides());
2922-
auto result_slice = result.as_strided_symint(sizes, strides, storage_offset);
2929+
auto result_slice =
2930+
result.as_strided_symint(sizes, strides, std::move(storage_offset));
29232931
result_slice.copy_(grad_slice);
29242932
return result;
29252933
}
@@ -3014,7 +3022,12 @@ Tensor slice_backward_wrapper(
30143022
auto end_val = end.has_value() ? end.value() : INT64_MAX;
30153023

30163024
return slice_backward_symint(
3017-
grad, input_sizes, dim, start_val, end_val, step);
3025+
grad,
3026+
input_sizes,
3027+
dim,
3028+
std::move(start_val),
3029+
std::move(end_val),
3030+
std::move(step));
30183031
}
30193032

30203033
std::tuple<Tensor, Tensor, Tensor> linalg_svd_jvp(
@@ -3761,7 +3774,9 @@ Tensor differential_analytic_matrix_function(
37613774
// eg. if both are BatchedTensor at different level.
37623775
if (areAnyTensorSubclassLike({A, grad})) {
37633776
meta_grad = at::cat(
3764-
{at::cat({A, grad}, -1), at::cat({at::zeros_like(A), A}, -1)}, -2);
3777+
{at::cat({A, grad}, -1),
3778+
at::cat({at::zeros_like(A), std::move(A)}, -1)},
3779+
-2);
37653780
} else {
37663781
meta_grad = at::zeros(meta_grad_sizes, grad.options());
37673782
meta_grad.narrow(-2, 0, n).narrow(-1, 0, n).copy_(A);
@@ -4408,7 +4423,7 @@ std::tuple<Tensor, Tensor, Tensor> batchnorm_double_backward(
44084423
ggO = ggO.defined() ? ggO.add_(ggO_G_term) : ggO_G_term;
44094424
}
44104425
if (ggB.defined()) {
4411-
auto ggO_B_term = ggB_expanded;
4426+
auto ggO_B_term = std::move(ggB_expanded);
44124427
ggO = ggO.defined() ? ggO.add_(ggO_B_term) : ggO_B_term;
44134428
}
44144429

@@ -4547,7 +4562,7 @@ std::tuple<Tensor, Tensor, Tensor> layer_norm_double_backward(
45474562
ggO = ggO.defined() ? ggO.add_(ggO_G_term) : ggO_G_term;
45484563
}
45494564
if (ggB.defined()) {
4550-
auto ggO_B_term = ggB_expanded;
4565+
auto ggO_B_term = std::move(ggB_expanded);
45514566
ggO = ggO.defined() ? ggO.add_(ggO_B_term) : ggO_B_term;
45524567
}
45534568
if (ggO.defined()) {
@@ -4589,7 +4604,7 @@ infinitely_differentiable_native_group_norm_backward(
45894604
Tensor ds;
45904605
Tensor db;
45914606
if (dY.defined()) {
4592-
dY_tensor = dY.reshape_symint({N, G, D, HxW});
4607+
dY_tensor = dY.reshape_symint({N, G, D, std::move(HxW)});
45934608
ds = (dY_tensor * X_tensor).sum(3).unsqueeze_(-1);
45944609
db = dY_tensor.sum(3).unsqueeze_(-1);
45954610
}
@@ -4613,12 +4628,12 @@ infinitely_differentiable_native_group_norm_backward(
46134628
Tensor c = (isDefined(gamma) ? (db * gamma_tensor).sum(2) : db.sum(2))
46144629
.unsqueeze_(-2);
46154630
b = (c * mean_tensor - b) * rstd_cube * s;
4616-
c = -b * mean_tensor - c * rstd_tensor * s;
4631+
c = -b * mean_tensor - c * rstd_tensor * std::move(s);
46174632
dX = a * dY_tensor + b * X_tensor + c;
46184633
if (dmean.defined() && drstd.defined()) {
46194634
dX += var_mean_backward(
46204635
dvar,
4621-
dmean.view_symint({N, G, 1, 1}),
4636+
dmean.view_symint({std::move(N), G, 1, 1}),
46224637
X_tensor,
46234638
IntArrayRef{2, 3},
46244639
0,
@@ -4628,7 +4643,7 @@ infinitely_differentiable_native_group_norm_backward(
46284643
} else if (dmean.defined() && drstd.defined()) {
46294644
dX = var_mean_backward(
46304645
dvar,
4631-
dmean.view_symint({N, G, 1, 1}),
4646+
dmean.view_symint({std::move(N), G, 1, 1}),
46324647
X_tensor,
46334648
IntArrayRef{2, 3},
46344649
0,
@@ -5463,7 +5478,7 @@ Tensor linalg_lu_solve_jvp(
54635478
/*unitriangular*/ true)
54645479
.matmul(P.mT());
54655480
// dX = op_2(R^H) + S
5466-
return (left ? R.mH() : R) + S;
5481+
return (left ? R.mH() : std::move(R)) + S;
54675482
}
54685483
}
54695484

@@ -5546,7 +5561,7 @@ std::tuple<Tensor, Tensor> linalg_solve_backward(
55465561
gA_ = left ? -gB_.matmul(X_.mH()) : -X_.mH().matmul(gB_);
55475562
}
55485563
return std::make_tuple(
5549-
A_requires_grad ? gA_ : Tensor{},
5564+
A_requires_grad ? std::move(gA_) : Tensor{},
55505565
B_requires_grad ? matrix_to_vector(gB_) : Tensor{});
55515566
}
55525567

@@ -6116,7 +6131,7 @@ Tensor linalg_lu_backward(
61166131
/*left=*/true,
61176132
/*unitriangular=*/true);
61186133

6119-
return pivot ? P.matmul(std::move(A_grad)) : A_grad;
6134+
return pivot ? P.matmul(std::move(A_grad)) : std::move(A_grad);
61206135
} else if (m < n) {
61216136
// Wide case
61226137
// A1_grad = P L^{-H} [U1_grad + (L^H L_grad o 1_L - U_grad U^H o 1_U)
@@ -6275,7 +6290,8 @@ std::tuple<Tensor, Tensor> linalg_lu_jvp(
62756290
at::linalg_solve_triangular(
62766291
L1, PdA2, /*upper=*/false, /*left=*/true, /*unitriangular*/ true) -
62776292
dK.tril(-1).matmul(U2);
6278-
return std::make_tuple(std::move(dL1), at::cat({dU1, dU2}, /*dim=*/-1));
6293+
return std::make_tuple(
6294+
std::move(dL1), at::cat({std::move(dU1), std::move(dU2)}, /*dim=*/-1));
62796295
} else {
62806296
// we only need to update dL2 defined as
62816297
// dL2 := PdA2 U^{-1} - L2 dK.triu()
@@ -6284,7 +6300,8 @@ std::tuple<Tensor, Tensor> linalg_lu_jvp(
62846300
auto dL2 =
62856301
at::linalg_solve_triangular(U1, PdA2, /*upper=*/true, /*left=*/false) -
62866302
L2.matmul(dK.triu());
6287-
return std::make_tuple(at::cat({dL1, dL2}, /*dim=*/-2), std::move(dU1));
6303+
return std::make_tuple(
6304+
at::cat({std::move(dL1), std::move(dL2)}, /*dim=*/-2), std::move(dU1));
62886305
}
62896306
}
62906307

@@ -6471,7 +6488,7 @@ std::tuple<Tensor, Tensor> scatter_reduce_backward(
64716488
auto node = std::make_shared<DelayedError>(
64726489
"scatter_reduce(): Double backward is unsupported for src when >1 zeros in src are scattered to the same position in self",
64736490
/* num inputs */ 1);
6474-
auto result = node->apply({grad_src1});
6491+
auto result = node->apply({std::move(grad_src1)});
64756492
grad_src = result[0];
64766493
} else {
64776494
grad_src = grad_src1;
@@ -6565,7 +6582,7 @@ std::tuple<Tensor, Tensor> index_reduce_backward(
65656582
auto node = std::make_shared<DelayedError>(
65666583
"index_reduce(): Double backward is unsupported for source when >1 zeros in source are scattered to the same position in self",
65676584
/* num inputs */ 1);
6568-
auto result = node->apply({grad_src1});
6585+
auto result = node->apply({std::move(grad_src1)});
65696586
grad_src = result[0];
65706587
} else {
65716588
grad_src = grad_src1;

torch/csrc/autograd/VariableTypeManual.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <torch/csrc/utils/memory.h>
1212
#include <torch/library.h>
1313

14+
#include <utility>
15+
1416
using namespace at;
1517
using namespace torch::autograd::generated;
1618
using torch::autograd::as_view;
@@ -397,7 +399,7 @@ Tensor detach(c10::DispatchKeySet ks, const Tensor& self) {
397399
/* output */ out,
398400
/* is_bw_differentiable */ false,
399401
/* is_fw_differentiable */ false,
400-
/* view_func */ func,
402+
/* view_func */ std::move(func),
401403
/* creation_meta */ CreationMeta::DEFAULT,
402404
/*allow_tensor_metadata_change=*/false);
403405

@@ -421,7 +423,7 @@ Tensor _fw_primal(c10::DispatchKeySet ks, const Tensor& self, int64_t level) {
421423
/* output */ tmp,
422424
/* is_bw_differentiable */ true,
423425
/* is_fw_differentiable */ false,
424-
/* view_func */ func,
426+
/* view_func */ std::move(func),
425427
/* creation_meta */ CREATION_META_DEFINITION);
426428

427429
return result;
@@ -449,7 +451,7 @@ Tensor _make_dual(
449451
/* output */ tmp,
450452
/* is_bw_differentiable */ true,
451453
/* is_fw_differentiable */ false,
452-
/* view_func */ func,
454+
/* view_func */ std::move(func),
453455
/* creation_meta */ CREATION_META_DEFINITION);
454456

455457
return result;

0 commit comments

Comments
 (0)