|
8 | 8 | #include "caffe2/core/operator.h"
|
9 | 9 | #include "caffe2/core/static_tracepoint.h"
|
10 | 10 | #include "caffe2/core/timer.h"
|
11 |
| -#include "caffe2/opt/converter.h" |
12 | 11 | #include "caffe2/proto/caffe2_pb.h"
|
13 | 12 | #include "caffe2/utils/proto_utils.h"
|
14 | 13 |
|
15 |
| -#include "nomnigraph/Graph/Algorithms.h" |
16 |
| - |
17 | 14 | namespace caffe2 {
|
18 | 15 | namespace dag_utils {
|
19 | 16 |
|
@@ -123,146 +120,6 @@ void updateOperatorNodes(
|
123 | 120 | }
|
124 | 121 | } // namespace
|
125 | 122 |
|
126 |
| -using namespace nom::repr; |
127 |
| -using DepGraph = nom::Graph<NNGraph::NodeRef>; |
128 |
| - |
129 |
| -// \brief This function prunes edges in the dependency |
130 |
| -// graph to increase the chaining opportunity. |
131 |
| -// It does not eliminate parallelism opportunity. |
132 |
| -void optimizeDependencyGraph(DepGraph* deps) { |
133 |
| - auto edges = deps->getMutableEdges(); |
134 |
| - for (const auto& edge : edges) { |
135 |
| - auto tail = edge->tail(); |
136 |
| - auto head = edge->head(); |
137 |
| - deps->deleteEdge(edge); |
138 |
| - std::unordered_set<DepGraph::NodeRef> seen; |
139 |
| - nom::algorithm::reachable<DepGraph>(tail, nullptr, &seen); |
140 |
| - // Removing that edge removes a dominator, which is invalid |
141 |
| - if (!seen.count(head)) { |
142 |
| - deps->createEdge(tail, head); |
143 |
| - } |
144 |
| - } |
145 |
| -} |
146 |
| - |
147 |
| -ExecutionChains computeChains( |
148 |
| - const caffe2::NetDef& predict_net, |
149 |
| - std::vector<OperatorNode>& orig_nodes) { |
150 |
| - // These serve as the map into predict_net.op() |
151 |
| - std::vector<NNGraph::NodeRef> nom_ops; |
152 |
| - auto nn = convertToNNModule(predict_net, false, &nom_ops); |
153 |
| - CAFFE_ENFORCE_EQ(nom_ops.size(), predict_net.op().size()); |
154 |
| - |
155 |
| - // Create a map from NodeRef to index into predict_net.op() |
156 |
| - // Now we can use pure nomnigraph functions and map back later |
157 |
| - std::unordered_map<NNGraph::NodeRef, int> nom_op_to_pos; |
158 |
| - for (auto idx = 0; idx < nom_ops.size(); ++idx) { |
159 |
| - nom_op_to_pos[nom_ops[idx]] = idx; |
160 |
| - } |
161 |
| - |
162 |
| - // The algorithm: |
163 |
| - // 1) create dependency graph of ops |
164 |
| - // 2) for all nodes thats have multiple in edges, remove all in edges |
165 |
| - // 3) for all nodes thats have multiple out edges, remove all out edges |
166 |
| - // 4) return the components as chains |
167 |
| - |
168 |
| - // Caveats that can easily be handled |
169 |
| - // 1) Cannot have a chain that crosses device options |
170 |
| - // insert extra edge at each boundary |
171 |
| - // 2) All CPU async ops have to be the last op in a chain |
172 |
| - // insert extra out edge |
173 |
| - DepGraph deps; |
174 |
| - |
175 |
| - // Map NodeRef to the node in the dependency graph |
176 |
| - std::unordered_map<NNGraph::NodeRef, DepGraph::NodeRef> dep_map; |
177 |
| - for (const auto& node : nn::filter<NeuralNetOperator>(nn)) { |
178 |
| - dep_map[node] = deps.createNode(node); |
179 |
| - } |
180 |
| - |
181 |
| - // 1) Create dependency graph |
182 |
| - for (const auto& node : nn::filter<NeuralNetOperator>(nn)) { |
183 |
| - for (const auto& output : nn::getOutputs(node)) { |
184 |
| - for (const auto& consumer : nn::getConsumers(output)) { |
185 |
| - // Record single dependencies first |
186 |
| - if (!deps.hasEdge(dep_map[node], dep_map[consumer])) { |
187 |
| - deps.createEdge(dep_map[node], dep_map[consumer]); |
188 |
| - } |
189 |
| - } |
190 |
| - } |
191 |
| - } |
192 |
| - |
193 |
| - optimizeDependencyGraph(&deps); |
194 |
| - |
195 |
| - // Fixup device boundary and async op issues |
196 |
| - for (const auto& dep : deps.getMutableNodes()) { |
197 |
| - int op_idx = nom_op_to_pos[dep->data()]; |
198 |
| - auto d1 = orig_nodes.at(op_idx).operator_->device_option(); |
199 |
| - auto outEdges = dep->getOutEdges(); |
200 |
| - for (const auto& outEdge : outEdges) { |
201 |
| - int op2_idx = nom_op_to_pos[outEdge->head()->data()]; |
202 |
| - auto d2 = orig_nodes.at(op2_idx).operator_->device_option(); |
203 |
| - if (!IsSameDevice(d1, d2)) { |
204 |
| - deps.createEdge(dep, outEdge->head()); |
205 |
| - } |
206 |
| - } |
207 |
| - if (d1.device_type() == PROTO_CUDA) { |
208 |
| - continue; |
209 |
| - } |
210 |
| - if (orig_nodes.at(op_idx).operator_->HasAsyncPart()) { |
211 |
| - outEdges = dep->getOutEdges(); |
212 |
| - for (const auto& outEdge : outEdges) { |
213 |
| - // Clone out edges |
214 |
| - deps.createEdge(outEdge->tail(), outEdge->head()); |
215 |
| - } |
216 |
| - } |
217 |
| - } |
218 |
| - |
219 |
| - // 2) Prune in edges if multiplicity > 1 |
220 |
| - // 3) Prune out edges if multiplicity > 1 |
221 |
| - for (const auto& dep : deps.getMutableNodes()) { |
222 |
| - auto inEdges = dep->getInEdges(); |
223 |
| - if (inEdges.size() > 1) { |
224 |
| - for (const auto& inEdge : inEdges) { |
225 |
| - NOM_REQUIRE_OR_CONT(inEdge); |
226 |
| - deps.deleteEdge(inEdge); |
227 |
| - } |
228 |
| - } |
229 |
| - auto outEdges = dep->getOutEdges(); |
230 |
| - if (outEdges.size() > 1) { |
231 |
| - for (const auto& outEdge : outEdges) { |
232 |
| - NOM_REQUIRE_OR_CONT(outEdge); |
233 |
| - deps.deleteEdge(outEdge); |
234 |
| - } |
235 |
| - } |
236 |
| - } |
237 |
| - |
238 |
| - // 4) Return components as chains |
239 |
| - std::vector<DepGraph::NodeRef> chain_starts; |
240 |
| - for (const auto& dep : deps.getMutableNodes()) { |
241 |
| - if (dep->getInEdges().size() == 0) { |
242 |
| - chain_starts.emplace_back(dep); |
243 |
| - } |
244 |
| - } |
245 |
| - |
246 |
| - ExecutionChains chains; |
247 |
| - for (const auto& dep : chain_starts) { |
248 |
| - DepGraph::NodeRef front = dep; |
249 |
| - std::vector<int> ops; |
250 |
| - do { |
251 |
| - ops.emplace_back(nom_op_to_pos[front->data()]); |
252 |
| - auto outEdges = front->getOutEdges(); |
253 |
| - if (outEdges.size()) { |
254 |
| - front = outEdges.at(0)->head(); |
255 |
| - } else { |
256 |
| - front = nullptr; |
257 |
| - } |
258 |
| - } while (front); |
259 |
| - chains[nom_op_to_pos[dep->data()]] = ops; |
260 |
| - } |
261 |
| - |
262 |
| - updateOperatorNodes(orig_nodes, chains); |
263 |
| - return chains; |
264 |
| -} |
265 |
| - |
266 | 123 | ExecutionChains computeChains(std::vector<OperatorNode>& orig_nodes) {
|
267 | 124 | const std::vector<OpGraphNode> nodes = pruneOpNodeGraph(orig_nodes);
|
268 | 125 | vector<int> initial_frontier;
|
|
0 commit comments