Skip to content

Commit 583d216

Browse files
Skylion007pytorchmergebot
authored andcommitted
Fix: [ATen] add more missing moves - part 2 (pytorch#89000)
Applies some more missing std::move found by static analysis. This should improve performance and reduce unnecessary copies. This PR only targets ATen for now. And before you ask about the edits, std::move is optimal in a ternary operator as copy ellision cannot happen one. The best thing is probably rewriting it as an if else, but ultimately this should be performant enough. Followup to pytorch#88512 and pytorch#88514 Pull Request resolved: pytorch#89000 Approved by: https://github.com/ezyang
1 parent 9ef1d55 commit 583d216

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+247
-190
lines changed

aten/src/ATen/Dispatch.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ namespace at { namespace detail {
66
void record_kernel_function_dtype(std::string name) {
77
RECORD_FUNCTION_WITH_SCOPE(
88
at::RecordScope::KERNEL_FUNCTION_DTYPE,
9-
name,
9+
std::move(name),
1010
c10::ArrayRef<const c10::IValue>{});
1111
}
1212

aten/src/ATen/FunctionalInverses.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#include <ATen/ATen.h>
55
#include <ATen/ExpandUtils.h>
6+
7+
#include <utility>
68
namespace at {
79
namespace functionalization {
810

@@ -129,7 +131,7 @@ Tensor FunctionalInverses::_neg_view_copy_inverse(const Tensor& base, const Tens
129131

130132
Tensor FunctionalInverses::as_strided_copy_inverse(const Tensor& base, const Tensor& mutated_view, bool reapply_views, at::SymIntArrayRef size, at::SymIntArrayRef stride, c10::optional<c10::SymInt> storage_offset) {
131133
// Pessimism: we can't reapply views for as_strided_scatter.
132-
return base.as_strided_scatter_symint(mutated_view, size, stride, storage_offset);
134+
return base.as_strided_scatter_symint(mutated_view, size, stride, std::move(storage_offset));
133135
}
134136

135137
Tensor FunctionalInverses::diagonal_copy_inverse(const Tensor& base, const Tensor& mutated_view, bool reapply_views, int64_t offset, int64_t dim1, int64_t dim2) {
@@ -175,7 +177,7 @@ Tensor FunctionalInverses::lift_fresh_copy_inverse(const Tensor& base, const Ten
175177

176178
Tensor FunctionalInverses::slice_copy_Tensor_inverse(const Tensor& base, const Tensor& mutated_view, bool reapply_views, int64_t dim, c10::optional<c10::SymInt> start, c10::optional<c10::SymInt> end, c10::SymInt step) {
177179
// Pessimism: we can't reapply views for slice_scatter.
178-
return base.slice_scatter_symint(mutated_view, dim, start, end, step);
180+
return base.slice_scatter_symint(mutated_view, dim, std::move(start), std::move(end), std::move(step));
179181
}
180182

181183
Tensor FunctionalInverses::split_copy_Tensor_inverse(const Tensor& base, const Tensor& mutated_view, bool reapply_views, int64_t mutated_view_idx, c10::SymInt split_size, int64_t dim) {

aten/src/ATen/FunctionalStorageImpl.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ struct ViewMeta {
3333
std::function<Tensor(const Tensor&, int64_t)> forward,
3434
std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse,
3535
int64_t out_idx = 0)
36-
: forward_fn(forward), reverse_fn(reverse), out_index(out_idx) {}
36+
: forward_fn(std::move(forward)),
37+
reverse_fn(std::move(reverse)),
38+
out_index(out_idx) {}
3739

3840
std::function<Tensor(const Tensor&, int64_t)> forward_fn;
3941
std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse_fn;

aten/src/ATen/FunctionalTensorWrapper.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ std::vector<Tensor> create_functional_tensor_with_view_meta(ITensorListRef view_
564564
void mutate_view_meta(const at::Tensor& self, functionalization::ViewMeta meta) {
565565
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(self));
566566
auto self_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(self);
567-
self_impl->mutate_view_meta(meta);
567+
self_impl->mutate_view_meta(std::move(meta));
568568
}
569569

570570
// Note [Propagating strides in the functionalization pass]

aten/src/ATen/FunctionalizeFallbackKernel.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include <ATen/ops/as_strided_copy.h>
2323
#include <ATen/ops/empty_strided_native.h>
2424
#include <ATen/ops/_unsafe_view.h>
25+
26+
#include <utility>
2527
#endif
2628

2729
namespace {
@@ -169,7 +171,7 @@ const at::Tensor & resize__functionalization(c10::DispatchKeySet dispatchKeySet,
169171
return base.as_strided_scatter(mutated_view, size, compute_contiguous_strides(size));
170172
}
171173
);
172-
at::functionalization::impl::mutate_view_meta(self, view_meta);
174+
at::functionalization::impl::mutate_view_meta(self, std::move(view_meta));
173175
return self;
174176
}
175177

@@ -278,7 +280,7 @@ at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymIntArrayRe
278280
}
279281
);
280282

281-
auto out = at::functionalization::impl::create_functional_tensor_with_view_meta(tmp_output, self, view_meta);
283+
auto out = at::functionalization::impl::create_functional_tensor_with_view_meta(tmp_output, self, std::move(view_meta));
282284
// See Note [Propagating strides in the functionalization pass]
283285
// (for _unsafe_view, I'm just manually doing the shape inference rule here instead of calling the meta function for unsafe_view)
284286
auto inferred_size = at::infer_size_dv(size, self.sym_numel());

aten/src/ATen/LegacyBatchingRegistrations.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include <c10/util/irange.h>
99
#include <c10/core/SymIntArrayRef.h>
1010

11+
#include <utility>
12+
1113
namespace at {
1214

1315
// NOTE: [What is a batching rule?]
@@ -143,7 +145,7 @@ Tensor binary_pointwise_batching_rule(
143145
logical_other = logical_other.to(result_type);
144146
}
145147
auto physical_args = BroadcastingVmapTransform::logicalToPhysical(
146-
{logical_self, logical_other});
148+
{std::move(logical_self), std::move(logical_other)});
147149
auto result = Func(physical_args[0].tensor(), physical_args[1].tensor(), args...);
148150
return physical_args[0].getPhysicalToLogicalMap().apply(result);
149151
}

aten/src/ATen/ParallelNative.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#endif // C10_MOBILE
1313

1414
#include <atomic>
15+
#include <utility>
1516

1617
#ifdef _OPENMP
1718
#include <omp.h>
@@ -179,7 +180,7 @@ void invoke_parallel(
179180
}
180181
};
181182
state.remaining = num_tasks;
182-
_run_with_pool(task, num_tasks);
183+
_run_with_pool(std::move(task), num_tasks);
183184

184185
// Wait for all tasks to finish.
185186
{
@@ -277,7 +278,7 @@ bool in_parallel_region() {
277278
void intraop_launch(std::function<void()> func) {
278279
#ifndef C10_MOBILE
279280
if (!in_parallel_region() && get_num_threads() > 1) {
280-
_get_intraop_pool().run(func);
281+
_get_intraop_pool().run(std::move(func));
281282
} else {
282283
// execute inline if we're in parallel region
283284
func();

aten/src/ATen/core/custom_class.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ namespace torch {
4545
namespace detail {
4646

4747
void record_custom_class(std::string name) {
48-
RECORD_FUNCTION_WITH_SCOPE(at::RecordScope::CUSTOM_CLASS, name, c10::ArrayRef<const c10::IValue>{});
48+
RECORD_FUNCTION_WITH_SCOPE(at::RecordScope::CUSTOM_CLASS, std::move(name), c10::ArrayRef<const c10::IValue>{});
4949
}
5050

5151
} // namespace detail

aten/src/ATen/core/dispatch/Dispatcher.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#include <ATen/core/dispatch/Dispatcher.h>
2+
#include <chrono>
23
#include <list>
34
#include <sstream>
4-
#include <chrono>
5+
#include <utility>
56

67
namespace c10 {
78

@@ -206,7 +207,7 @@ RegistrationHandleRAII Dispatcher::registerDef(FunctionSchema schema, std::strin
206207
TORCH_CHECK(op.operatorDef_->def_count == 0, "Tried to register an operator (", schema, ") with the same name and overload name multiple times.",
207208
" Each overload's schema should only be registered with a single call to def().",
208209
" Duplicate registration: ", debug, ". Original registration: ", op.operatorDef_->op.debug());
209-
op.operatorDef_->op.registerSchema(std::move(schema), std::move(debug), tags);
210+
op.operatorDef_->op.registerSchema(std::move(schema), std::move(debug), std::move(tags));
210211
listeners_->callOnOperatorRegistered(op);
211212

212213
// NB: do not increment the counts until AFTER error checking

aten/src/ATen/core/function_schema.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <iostream>
44
#include <stack>
5+
#include <utility>
56

67
namespace c10 {
78

@@ -132,7 +133,7 @@ c10::optional<AliasTypeSet> FunctionSchema::mapTypeToAliasTypeSet(const TypePtr&
132133
if (mutable_types.size() == 0) {
133134
return c10::nullopt;
134135
}
135-
return {AliasTypeSet{TupleType::create(mutable_types)}};
136+
return {AliasTypeSet{TupleType::create(std::move(mutable_types))}};
136137
}
137138
default:
138139
return c10::nullopt;

aten/src/ATen/core/ivalue.cpp

+9-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
1-
#include <ATen/core/ivalue.h>
21
#include <ATen/core/Dict.h>
32
#include <ATen/core/Formatting.h>
43
#include <ATen/core/class_type.h>
54
#include <ATen/core/enum_type.h>
65
#include <ATen/core/function.h>
6+
#include <ATen/core/ivalue.h>
77
#include <ATen/core/jit_type.h>
88
#include <ATen/core/stack.h>
99
#include <ATen/core/type_factory.h>
10-
#include <c10/util/irange.h>
1110
#include <c10/util/StringUtil.h>
1211
#include <c10/util/hash.h>
12+
#include <c10/util/irange.h>
1313
#include <cmath>
1414
#include <iostream>
15+
#include <utility>
1516

1617
namespace c10 {
1718
bool _fastEqualsForContainer(const IValue& lhs, const IValue& rhs) {
@@ -492,11 +493,11 @@ std::ostream& printMaybeAnnotatedList(
492493
if (the_list.toListRef().size() == 0 ||
493494
!elementTypeCanBeInferredFromMembers(list_elem_type)) {
494495
out << "annotate(" << the_list.type<c10::Type>()->annotation_str() << ", ";
495-
printList(out, the_list.toListRef(), "[", "]", formatter);
496+
printList(out, the_list.toListRef(), "[", "]", std::move(formatter));
496497
out << ")";
497498
return out;
498499
} else {
499-
return printList(out, the_list.toListRef(), "[", "]", formatter);
500+
return printList(out, the_list.toListRef(), "[", "]", std::move(formatter));
500501
}
501502
}
502503

@@ -533,9 +534,9 @@ std::ostream& printMaybeAnnotatedDict(
533534
if (the_dict.toGenericDict().size() == 0 ||
534535
!elementTypeCanBeInferredFromMembers(value_type)) {
535536
out << "annotate(" << the_dict.type<c10::Type>()->annotation_str() << ",";
536-
printDict(out, the_dict.toGenericDict(), formatter) << ")";
537+
printDict(out, the_dict.toGenericDict(), std::move(formatter)) << ")";
537538
} else {
538-
return printDict(out, the_dict.toGenericDict(), formatter);
539+
return printDict(out, the_dict.toGenericDict(), std::move(formatter));
539540
}
540541
return out;
541542
}
@@ -873,7 +874,7 @@ IValue IValue::deepcopy(
873874
for (const auto& e : toTupleRef().elements()) {
874875
copied_tuple.push_back(e.deepcopy(memo));
875876
}
876-
copy = IValue(ivalue::Tuple::create(copied_tuple));
877+
copy = IValue(ivalue::Tuple::create(std::move(copied_tuple)));
877878
}
878879
break;
879880
case IValue::Tag::GenericList: {
@@ -1033,7 +1034,7 @@ WeakTypePtr WeakOrStrongTypePtr::asWeakTypePtr() const {
10331034
} else {
10341035
std::weak_ptr<torch::jit::CompilationUnit> weak_cu =
10351036
cu_.getStrongRefOrThrow();
1036-
return WeakTypePtr(weak_cu, type_);
1037+
return WeakTypePtr(std::move(weak_cu), type_);
10371038
}
10381039
}
10391040

aten/src/ATen/core/tensor_type.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include <ATen/core/Tensor.h>
22
#include <ATen/core/jit_type.h>
33

4+
#include <utility>
5+
46
namespace c10 {
57

68
namespace {
@@ -416,7 +418,7 @@ VaryingShape<int64_t> TensorType::strides() const {
416418
ss[*s.stride_index_] = *s.stride_;
417419
}
418420
}
419-
return VaryingShape<int64_t>(ss);
421+
return VaryingShape<int64_t>(std::move(ss));
420422
}
421423

422424
TensorType::TensorType(

aten/src/ATen/core/type.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#include <ATen/core/Dict.h>
22
#include <ATen/core/Tensor.h>
33
#include <ATen/core/dynamic_type.h>
4-
#include <ATen/core/function_schema.h>
54
#include <ATen/core/enum_type.h>
5+
#include <ATen/core/function.h>
66
#include <ATen/core/function_schema.h>
7+
#include <ATen/core/grad_mode.h>
78
#include <ATen/core/jit_type.h>
89
#include <c10/macros/Macros.h>
910
#include <c10/util/irange.h>
10-
#include <ATen/core/grad_mode.h>
11-
#include <ATen/core/function.h>
1211
#include <iostream>
12+
#include <utility>
1313

1414
namespace std {
1515
template<>
@@ -298,7 +298,7 @@ TypePtr DictType::get(std::string identifier, TypePtr key, TypePtr value) {
298298
auto map_key = std::make_tuple(identifier, key, value);
299299
std::lock_guard<std::mutex> lock(mutex);
300300
if (containerTypePtrs.find(map_key) == containerTypePtrs.end()) {
301-
TypePtr t = DictType::create(key, value);
301+
TypePtr t = DictType::create(std::move(key), std::move(value));
302302
containerTypePtrs.emplace(map_key, std::move(t));
303303
}
304304
return containerTypePtrs[map_key];
@@ -738,7 +738,7 @@ TupleTypePtr TupleType::createWithSpec(const c10::optional<c10::QualifiedName>&
738738
/*arguments=*/std::move(arguments),
739739
/*returns=*/std::vector<Argument>{});
740740
return std::shared_ptr<TupleType>(new TupleType(
741-
field_types, qualName, schema)); // NOLINT(modernize-make-shared)
741+
field_types, qualName, std::move(schema))); // NOLINT(modernize-make-shared)
742742
}
743743

744744
c10::optional<std::vector<c10::string_view>> TupleType::names() const {

aten/src/ATen/core/union_type.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#include <ATen/core/Dict.h>
22
#include <ATen/core/Tensor.h>
3+
#include <ATen/core/function.h>
34
#include <ATen/core/function_schema.h>
5+
#include <ATen/core/grad_mode.h>
46
#include <ATen/core/jit_type.h>
57
#include <ATen/core/type_factory.h>
68
#include <c10/macros/Macros.h>
79
#include <c10/util/irange.h>
8-
#include <ATen/core/grad_mode.h>
9-
#include <ATen/core/function.h>
1010
#include <iostream>
11+
#include <utility>
1112

1213
namespace c10 {
1314

@@ -262,7 +263,7 @@ UnionTypePtr UnionType::create(std::vector<TypePtr> reference) {
262263
auto not_none = union_type->containedTypes()[0] != NoneType::get()
263264
? union_type->containedTypes()[0]
264265
: union_type->containedTypes()[1];
265-
return OptionalType::create(not_none);
266+
return OptionalType::create(std::move(not_none));
266267
}
267268
}
268269

@@ -396,7 +397,7 @@ std::string UnionType::unionStr(TypePrinter printer, bool is_annotation_str)
396397
ss << ", ";
397398
}
398399
if (is_annotation_str) {
399-
ss << NumberType::get()->annotation_str(printer);
400+
ss << NumberType::get()->annotation_str(std::move(printer));
400401
} else {
401402
ss << NumberType::get()->str();
402403
}
@@ -410,7 +411,7 @@ std::string UnionType::str() const {
410411
}
411412

412413
std::string UnionType::annotation_str_impl(TypePrinter printer) const {
413-
return this->unionStr(printer, /*is_annotation_str=*/true);
414+
return this->unionStr(std::move(printer), /*is_annotation_str=*/true);
414415
}
415416

416417
bool UnionType::canHoldType(const Type& type) const {

aten/src/ATen/native/Activation.cpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@
7373
#include <ATen/ops/threshold_backward_native.h>
7474
#include <ATen/ops/threshold_native.h>
7575
#include <ATen/ops/zeros_like.h>
76+
77+
#include <utility>
7678
#endif
7779

7880
namespace at {
@@ -624,7 +626,7 @@ Tensor rrelu_with_noise_cpu(
624626
c10::optional<Generator> generator) {
625627
auto output = at::empty_like(self, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
626628
return at::native::rrelu_with_noise_out_cpu(
627-
self, noise, lower, upper, training, generator, output);
629+
self, noise, lower, upper, training, std::move(generator), output);
628630
}
629631

630632
Tensor& rrelu_with_noise_cpu_(
@@ -635,7 +637,7 @@ Tensor& rrelu_with_noise_cpu_(
635637
bool training,
636638
c10::optional<Generator> generator) {
637639
return at::native::rrelu_with_noise_out_cpu(
638-
self, noise, lower, upper, training, generator, self);
640+
self, noise, lower, upper, training, std::move(generator), self);
639641
}
640642

641643
Tensor rrelu_with_noise_backward(
@@ -658,12 +660,12 @@ Tensor rrelu_with_noise_backward(
658660

659661
Tensor rrelu(const Tensor & self, const Scalar& lower, const Scalar& upper, bool training, c10::optional<Generator> generator) {
660662
TORCH_CHECK(lower.to<double>() <= upper.to<double>(), "Lower bound should be less than or equal to the upper bound")
661-
return at::rrelu_with_noise(self, at::empty_like(self, LEGACY_CONTIGUOUS_MEMORY_FORMAT), lower, upper, training, generator);
663+
return at::rrelu_with_noise(self, at::empty_like(self, LEGACY_CONTIGUOUS_MEMORY_FORMAT), lower, upper, training, std::move(generator));
662664
}
663665

664666
Tensor & rrelu_(Tensor & self, const Scalar& lower, const Scalar& upper, bool training, c10::optional<Generator> generator) {
665667
TORCH_CHECK(lower.to<double>() <= upper.to<double>(), "Lower bound should be less than or equal to the upper bound")
666-
return at::rrelu_with_noise_(self, at::empty_like(self, LEGACY_CONTIGUOUS_MEMORY_FORMAT), lower, upper, training, generator);
668+
return at::rrelu_with_noise_(self, at::empty_like(self, LEGACY_CONTIGUOUS_MEMORY_FORMAT), lower, upper, training, std::move(generator));
667669
}
668670

669671
TORCH_IMPL_FUNC(threshold_out)(const Tensor& self, const Scalar& threshold, const Scalar& value, const Tensor& result) {

0 commit comments

Comments
 (0)