Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
4 : *
5 : * @file network_graph.h
6 : * @date 19 Oct 2020
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Jijoong Moon <jijoong.moon@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief This is Network Graph Class for Neural Network
11 : *
12 : * @todo Support multi-input graph.
13 : */
14 :
15 : #include <activation_layer.h>
16 : #include <addition_layer.h>
17 : #include <bn_layer.h>
18 : #include <concat_layer.h>
19 : #include <connection.h>
20 : #include <cross_entropy_loss_layer.h>
21 : #include <cross_entropy_sigmoid_loss_layer.h>
22 : #include <cross_entropy_softmax_loss_layer.h>
23 : #include <engine.h>
24 : #include <flatten_layer.h>
25 : #include <grucell.h>
26 : #include <identity_layer.h>
27 : #include <input_layer.h>
28 : #include <layer_node.h>
29 : #include <layer_normalization_layer.h>
30 : #include <lstmcell.h>
31 : #include <multiout_layer.h>
32 : #include <network_graph.h>
33 : #include <nntrainer_error.h>
34 : #include <nntrainer_log.h>
35 : #include <profiler.h>
36 : #include <rnn.h>
37 : #include <rnncell.h>
38 : #include <split_layer.h>
39 : #include <tensor_layer.h>
40 : #include <time_dist.h>
41 : #include <tracer.h>
42 : #include <util_func.h>
43 : #include <weight_layer.h>
44 :
45 : #include <cmath>
46 : #include <iostream>
47 : #include <stdexcept>
48 : #include <string>
49 :
50 : #include "graph_node.h"
51 : #include "tensor.h"
52 :
53 : #define LNODE(x) std::static_pointer_cast<LayerNode>(x)
54 :
55 : namespace nntrainer {
56 703 : int NetworkGraph::compile(const std::string &loss_type) {
57 : int status = ML_ERROR_NONE;
58 :
59 703 : status = isCompilable();
60 703 : NN_RETURN_STATUS();
61 :
62 : try {
63 642 : setOutputConnections();
64 0 : } catch (std::exception &e) {
65 0 : ml_loge("setting output layer failed, reason: %s", e.what());
66 : return ML_ERROR_INVALID_PARAMETER;
67 0 : }
68 :
69 642 : graph.realizeInputOutputNode();
70 :
71 642 : if (exec_mode != ExecutionMode::INFERENCE) {
72 : try {
73 : /// @todo realize loss beforehand
74 641 : status = addLossLayer(loss_type);
75 632 : NN_RETURN_STATUS();
76 9 : } catch (const std::exception &e) {
77 9 : ml_loge("%s", e.what());
78 : status = ML_ERROR_INVALID_PARAMETER;
79 : NN_RETURN_STATUS();
80 9 : }
81 : } else {
82 1 : if (!loss_type.empty()) {
83 0 : ml_loge("Warning : Loss type is given in inference mode. Ignoring loss "
84 : "type.");
85 : }
86 : }
87 :
88 633 : graph.topologicalSort();
89 :
90 633 : setExecutionOrder();
91 633 : forward_iter_end = (*(cend() - 1)).get();
92 :
93 633 : inPlaceOptimize();
94 :
95 633 : status = checkCompiledGraph();
96 633 : NN_RETURN_STATUS();
97 :
98 627 : compiled = true;
99 :
100 627 : return status;
101 : }
102 :
103 633 : void NetworkGraph::setExecutionOrder() {
104 : auto backward_order = graph.size();
105 5102 : for (auto iter = getBackwardingBeginIter(); iter != getBackwardingEndIter();
106 : iter++) {
107 : auto &node = *iter;
108 4469 : auto order_idx = getBackwardingEndIter() - iter - 1;
109 : auto forward_order = order_idx;
110 : auto calc_gradient_order = backward_order;
111 4469 : if (node->getTrainable())
112 4451 : backward_order++;
113 : auto calc_derivative_order = backward_order;
114 4469 : if (node->getTrainable())
115 4451 : backward_order++;
116 4469 : auto apply_gradient_order = backward_order++;
117 :
118 : node->setExecutionOrder({forward_order, calc_gradient_order,
119 : calc_derivative_order, apply_gradient_order});
120 : }
121 :
122 : /**
123 : * This sets max execution order temporarily till model is initialized.
124 : * This set max execution order is used to extend gradient exec orders for
125 : * clipping.
126 : */
127 633 : graph_exec_end = std::get<3>((*(cbegin()))->getExecutionOrder());
128 633 : }
129 :
130 0 : void NetworkGraph::addLayerNode(std::unique_ptr<Layer> layer) {
131 0 : graph.addNode(std::make_unique<LayerNode>(std::move(layer)));
132 0 : }
133 :
134 641 : int NetworkGraph::addLossLayer(const std::string &loss_type_) {
135 1292 : for (unsigned int i = 0; i < graph.getNumOutputNodes(); ++i) {
136 : auto output_layer_node = LNODE(graph.getOutputNode(i));
137 : std::string loss_type = loss_type_;
138 :
139 660 : if (output_layer_node->requireLabel())
140 338 : continue;
141 :
142 322 : if (loss_type.empty())
143 13 : continue;
144 :
145 : auto second_to_last_layer_node = output_layer_node;
146 : bool is_cross_entropy_loss =
147 309 : istrequal(loss_type, CrossEntropyLossLayer::type);
148 309 : if (is_cross_entropy_loss) {
149 189 : auto type = output_layer_node->getType();
150 :
151 189 : if (type != ActivationLayer::type) {
152 : throw exception::not_supported(
153 : "Error: Cross Entropy need last layer to have softmax or sigmoid"
154 0 : "activation.");
155 : }
156 :
157 189 : switch (output_layer_node->getActivationType()) {
158 : case ActivationType::ACT_SIGMOID:
159 : loss_type = CrossEntropySigmoidLossLayer::type;
160 : break;
161 : case ActivationType::ACT_SOFTMAX:
162 : loss_type = CrossEntropySoftmaxLossLayer::type;
163 : break;
164 8 : default:
165 : throw exception::not_supported(
166 16 : "Error: Cross Entropy not supported without softmax or sigmoid.");
167 : }
168 :
169 : second_to_last_layer_node =
170 370 : LNODE(graph.getNode(output_layer_node->getInputConnectionName(0)));
171 : }
172 :
173 610 : std::shared_ptr<LayerNode> lnode = createLayerNode(loss_type);
174 600 : graph.ensureName(*lnode);
175 :
176 300 : if (second_to_last_layer_node->getDistribute()) {
177 1 : lnode->setProperty({"distribute=true"});
178 : }
179 :
180 : /// @todo remove this by add loss at realization
181 600 : second_to_last_layer_node->setOutputLayers({lnode->getName()});
182 600 : lnode->setProperty(
183 300 : {"input_layers=" + second_to_last_layer_node->getName()});
184 :
185 300 : if (is_cross_entropy_loss) {
186 543 : graph.replaceNode(output_layer_node, lnode);
187 : } else {
188 238 : graph.addNode(lnode, false);
189 : }
190 300 : graph.replaceOutputNode(i, lnode);
191 : }
192 :
193 632 : return ML_ERROR_NONE;
194 600 : }
195 :
196 642 : void NetworkGraph::setOutputConnections() {
197 5027 : for (auto layer_iter = cbegin(); layer_iter != cend(); layer_iter++) {
198 : const auto &node = *layer_iter;
199 13283 : for (auto i = 0u, num_inode = node->getNumInputConnections(); i < num_inode;
200 : ++i) {
201 4513 : const auto &name = node->getInputConnectionName(i);
202 4513 : const auto &idx = node->getInputConnectionIndex(i);
203 :
204 4513 : auto node_setting_output = getLayerNode(name);
205 9026 : node_setting_output->setOutputConnection(idx, node->getName(), i);
206 : }
207 : }
208 642 : }
209 :
210 703 : int NetworkGraph::isCompilable() {
211 703 : if (compiled) {
212 0 : ml_loge("Graph is already compiled");
213 0 : return ML_ERROR_NOT_SUPPORTED;
214 : }
215 :
216 703 : if (graph.empty()) {
217 61 : ml_loge("Graph is empty");
218 61 : return ML_ERROR_INVALID_PARAMETER;
219 : }
220 :
221 : return ML_ERROR_NONE;
222 : }
223 :
224 633 : int NetworkGraph::checkCompiledGraph() {
225 : /** Dimension of input layers must be known */
226 5054 : for (auto iter = cbegin(); iter != cend(); iter++) {
227 : auto lnode = (*iter);
228 4427 : if (lnode->getNumInputConnections() == 0) {
229 984 : if (!lnode->hasInputShapeProperty()) {
230 12 : ml_loge("Layer with no inbound connection need input_shape property");
231 : return ML_ERROR_INVALID_PARAMETER;
232 : }
233 : }
234 : }
235 :
236 : return ML_ERROR_NONE;
237 : }
238 :
239 619 : void NetworkGraph::markNodesForBackwarding() {
240 : /** accumulate all the nodes which must support backwarding */
241 : std::unordered_set<std::string> must_support_backwarding;
242 619 : if (exec_mode == ExecutionMode::INFERENCE) {
243 6 : for (auto iter = cbegin(); iter != cend(); iter++) {
244 : auto lnode = (*iter);
245 : lnode->needsCalcGradient(false);
246 5 : lnode->needsCalcDerivative(false);
247 : }
248 : return;
249 : }
250 :
251 : /**
252 : * if a node is trainable, then all the nodes ahead of it must support
253 : * backwarding operation
254 : */
255 5018 : for (auto iter = cbegin(); iter != cend(); iter++) {
256 : auto lnode = (*iter);
257 7363 : if (lnode->getTrainable() ||
258 7363 : must_support_backwarding.find(lnode->getName()) !=
259 : must_support_backwarding.end()) {
260 3187 : if (lnode->getTrainable()) {
261 : lnode->needsCalcGradient(true);
262 : }
263 : #ifdef ENABLE_TEST
264 3187 : if (lnode->supportBackwarding() && !optimize_memory) {
265 1406 : lnode->needsCalcDerivative(true);
266 : }
267 : #endif
268 :
269 6363 : for (auto i = 0u, num_node = lnode->getNumOutputConnections();
270 6363 : i < num_node; ++i) {
271 3176 : auto conn = lnode->getOutputConnection(i);
272 3176 : if (!conn) {
273 8 : continue;
274 : }
275 :
276 : must_support_backwarding.insert(conn->getName());
277 : }
278 : }
279 : }
280 :
281 : /** mark all the required nodes support backwarding */
282 3179 : for (auto const &node_name : must_support_backwarding) {
283 2561 : auto ln = LNODE(graph.getNode(node_name)).get();
284 2561 : ln->needsCalcDerivative(true);
285 : }
286 : }
287 :
288 645 : void NetworkGraph::setBatchSize(unsigned int batch_size) {
289 645 : if (batch_size == this->batch_size)
290 : return;
291 :
292 625 : this->batch_size = batch_size;
293 815 : if (!input_list.empty() && getInputDimension()[0].batch() == batch_size)
294 : return;
295 :
296 : auto allocated = tensor_manager->isAllocated();
297 :
298 435 : if (allocated)
299 : deallocateTensors();
300 :
301 3497 : for (auto iter = cbegin(); iter != cend(); iter++) {
302 3062 : if ((*iter)->isFinalized()) {
303 : /// resize tensors spec
304 : /// @todo remove below, if custom tensor needs to change dimension
305 : /// according to the tensor, it must be done explicitly, or at least have
306 : /// a property to control the behavior
307 3062 : const RunLayerContext &context = (*iter)->getRunContext();
308 5099 : for (unsigned int idx = 0; idx < context.getNumTensors(); idx++) {
309 2037 : auto const &ts = context.getTensor(idx);
310 2037 : tensor_manager->setBatchSize(ts.getName(), ts.getDim().batch());
311 2037 : if (context.tensorHasGradient(idx)) {
312 1016 : auto const &ts_grad = context.getTensorGrad(idx);
313 1016 : tensor_manager->setBatchSize(ts_grad.getName(),
314 2032 : ts_grad.getDim().batch());
315 : }
316 : }
317 : /// override setting batch as per request
318 6124 : (*iter)->setBatch(batch_size);
319 : }
320 : }
321 : /// resize input and output spec
322 435 : tensor_manager->setBatchSize(batch_size);
323 :
324 435 : if (allocated)
325 0 : allocateTensors(exec_mode);
326 :
327 : /** update input and label dimensions */
328 1005 : for (unsigned int idx = 0; idx < input_list.size(); idx++)
329 570 : input_dims_[idx] = tensor_manager->getTensor(input_list[idx])->getDim();
330 883 : for (unsigned int idx = 0; idx < label_list.size(); idx++)
331 448 : label_dims_[idx] = tensor_manager->getTensor(label_list[idx])->getDim();
332 : }
333 :
334 0 : void NetworkGraph::resetInputDimension(std::vector<TensorDim> dims) {
335 : auto allocated = tensor_manager->isAllocated();
336 :
337 0 : if (allocated)
338 : deallocateTensors();
339 :
340 0 : for (auto iter = cbegin(); iter != cend(); iter++) {
341 0 : if ((*iter)->isFinalized()) {
342 0 : (*iter)->updateTensorsByInputDimensions(dims);
343 : }
344 : }
345 :
346 0 : if (allocated)
347 0 : allocateTensors(exec_mode);
348 :
349 : /** update input and label dimensions */
350 0 : for (unsigned int idx = 0; idx < input_list.size(); idx++)
351 0 : input_dims_[idx] = tensor_manager->getTensor(input_list[idx])->getDim();
352 0 : for (unsigned int idx = 0; idx < label_list.size(); idx++)
353 0 : label_dims_[idx] = tensor_manager->getTensor(label_list[idx])->getDim();
354 0 : }
355 :
356 22292 : void NetworkGraph::applyGradients(
357 : LayerNode *node, const std::function<void(Weight &)> &apply_func) {
358 22292 : if (!node->getTrainable())
359 : return;
360 :
361 14934 : TRACE_MEMORY() << node->getName() + ": AG";
362 14934 : TRACE_TIME() << node->getName() + ": AG";
363 :
364 7467 : auto &rc = node->getRunContext();
365 7467 : auto num_weight = rc.getNumWeights();
366 23960 : for (unsigned i = 0; i < num_weight; ++i) {
367 16493 : if (!rc.weightHasGradient(i)) {
368 408 : continue;
369 : }
370 :
371 16085 : if (!rc.isGradientLastAccess(i)) {
372 : /// @note instead of checking the last access of the weight, checking
373 : /// if weights are dependent to others to minimize overhead.
374 : /// this logic assume that the source of the dependent weight must be
375 : /// prior to the dependent.
376 458 : continue;
377 : }
378 :
379 15627 : if (rc.isGradientClipByGlobalNorm(i) || rc.isMixedPrecision(i)) {
380 : /**
381 : * @note the weights whose gradient are to be clipped by global norm will
382 : * be clipped at once at the end of iteration and applied then.
383 : * For those weights where mixed precision is uesed, their gradient
384 : * updates might be delayed until they confirm whether their loss scales
385 : * are appropeiate.
386 : */
387 44 : continue;
388 : }
389 :
390 15583 : apply_func(rc.getWeightObject(i));
391 : }
392 : }
393 :
394 6821 : sharedConstTensors NetworkGraph::forwarding(
395 : bool training,
396 : std::function<void(std::shared_ptr<LayerNode>, bool)> forwarding_op,
397 : std::function<bool(void *userdata)> stop_cb, void *userdata) {
398 61427 : for (auto iter = cbegin(); iter != cend() && !stop_cb(userdata); iter++) {
399 : auto &ln = *iter;
400 : PROFILE_TIME_START(profile_keys.at(ln->getType()));
401 27303 : forwarding_op(*iter, training);
402 : PROFILE_TIME_END(profile_keys.at(ln->getType()));
403 : }
404 :
405 : sharedConstTensors out;
406 13666 : for (unsigned int i = 0; i < graph.getNumOutputNodes(); ++i) {
407 : auto const &output_layer_node = LNODE(graph.getOutputNode(i));
408 13690 : for (unsigned int j = 0; j < output_layer_node->getNumOutputs(); ++j) {
409 : // @todo we should determine what type to return
410 : // out.push_back(MAKE_SHARED_TENSOR(
411 : // output_layer_node->getOutput(j).clone(TensorDim::DataType::FP32)));
412 13690 : out.push_back(MAKE_SHARED_TENSOR(output_layer_node->getOutput(j)));
413 : }
414 : }
415 :
416 6821 : return out;
417 0 : }
418 :
419 0 : sharedConstTensors NetworkGraph::incremental_forwarding(
420 : unsigned int from, unsigned int to, bool training,
421 : std::function<void(std::shared_ptr<LayerNode>, bool)> forwarding_op,
422 : std::function<bool(void *userdata)> stop_cb, void *userdata) {
423 0 : for (auto iter = cbegin(); iter != cend() && !stop_cb(userdata); iter++) {
424 : auto &ln = *iter;
425 : PROFILE_TIME_START(profile_keys.at(ln->getType()));
426 0 : forwarding_op(*iter, training);
427 : PROFILE_TIME_END(profile_keys.at(ln->getType()));
428 : }
429 :
430 : sharedConstTensors out;
431 0 : for (unsigned int i = 0; i < graph.getNumOutputNodes(); ++i) {
432 : auto const &output_layer_node = LNODE(graph.getOutputNode(i));
433 0 : for (unsigned int j = 0; j < output_layer_node->getNumOutputs(); ++j) {
434 0 : out.push_back(MAKE_SHARED_TENSOR(output_layer_node->getOutput(j)));
435 : }
436 : }
437 :
438 0 : return out;
439 0 : }
440 :
441 6132 : bool NetworkGraph::backwarding(
442 : int iteration,
443 : std::function<void(std::shared_ptr<LayerNode>, bool)> &forwarding_op,
444 : std::function<bool(std::shared_ptr<LayerNode>, int)> &backwarding_op,
445 : std::function<void(Weight &, int)> &lazy_apply_grad_op,
446 : std::function<bool(void *userdata)> stop_cb, void *userdata) {
447 : /**
448 : * last layer backwarding is run out of this loop
449 : */
450 : auto iter_begin = getBackwardingBeginIter();
451 : auto iter_end = getBackwardingEndIter();
452 : bool is_valid = true;
453 :
454 : /// there is no layer to train, so backwarding is essentially noop
455 6132 : if (iter_begin == iter_end) {
456 : return true;
457 : }
458 :
459 : auto const &lptr_begin = (*iter_begin);
460 : // graph_const_reverse_iterator
461 : auto iter_ = iter_begin;
462 :
463 6132 : if (lptr_begin->requireLabel() == false)
464 : throw std::runtime_error(
465 0 : "Error: last layer does not accept label, we can't train");
466 :
467 50716 : for (iter_ = iter_begin; iter_ != iter_end && !stop_cb(userdata); iter_++) {
468 : auto &ln = *iter_;
469 : PROFILE_TIME_START(profile_keys.at(ln->getType()));
470 22292 : is_valid = backwarding_op(ln, iteration);
471 : PROFILE_TIME_END(profile_keys.at(ln->getType()));
472 :
473 22292 : if (!is_valid) {
474 : break;
475 : }
476 : }
477 :
478 6132 : if (!is_valid) {
479 : /** if has NaN
480 : * 1. reset the loss scale. : @todo Backoff_factor : default --> 0.5
481 : * 2. run forwarding from cur_iter to cend() && !stop_cb(userdata);
482 : * 3. return false --> run backwarding again;
483 : */
484 0 : float scale = (*iter_)->getRunContext().getLossScale();
485 :
486 0 : NNTR_THROW_IF(scale - 1.0f < 10e-6, std::invalid_argument)
487 : << "Loss Scale Factor is 1.0f";
488 :
489 0 : float s = scale > 1.5f ? scale * 0.5f : 1.0f;
490 :
491 0 : resetLossScale(s);
492 :
493 0 : auto f_iter = cbegin() + graph.getSortedNodeIdx((*iter_)->getName());
494 :
495 0 : for (auto iter = f_iter; iter != cend() && !stop_cb(userdata); iter++) {
496 : auto &ln = *iter;
497 : ln->reStoreData(true);
498 : }
499 :
500 0 : for (auto iter = f_iter; iter != cend() && !stop_cb(userdata); iter++) {
501 : auto &ln = *iter;
502 : PROFILE_TIME_START(profile_keys.at(ln->getType()));
503 0 : forwarding_op(*iter, true);
504 : PROFILE_TIME_END(profile_keys.at(ln->getType()));
505 : }
506 :
507 : return false;
508 : }
509 :
510 : /** perform clipping of the gradients by global norm if any */
511 6132 : if (lazy_weights.empty())
512 : return true;
513 :
514 11 : if (is_clip_grad) {
515 : /** calculate the global norm */
516 : Tensor global_norm_t(
517 11 : TensorDim({1u, 1u, 1u, (unsigned int)lazy_weights.size()}));
518 : float *global_norm_data = global_norm_t.getData();
519 :
520 55 : for (unsigned int idx = 0; idx < lazy_weights.size(); idx++) {
521 : auto const &w = lazy_weights[idx];
522 :
523 44 : if (isMixedPrecision()) {
524 : Tensor scaled_grad =
525 0 : w->getGradientRef().clone(TensorDim::DataType::FP32);
526 0 : scaled_grad.divide_i(loss_scale);
527 0 : global_norm_data[idx] = scaled_grad.l2norm();
528 0 : } else {
529 44 : global_norm_data[idx] = w->getGradientNorm();
530 : }
531 : }
532 11 : float global_norm = global_norm_t.l2norm();
533 : /** apply the gradient with the above global norm */
534 55 : for (auto w : lazy_weights) {
535 : w->clipGradientByGlobalNorm(global_norm);
536 : }
537 11 : }
538 : /** apply the gradient with the above global norm */
539 55 : for (auto w : lazy_weights) {
540 44 : lazy_apply_grad_op(*w, iteration);
541 : }
542 11 : nan_count++;
543 :
544 : /** @todo : handle as property : growth_interval : default --> 2000 */
545 11 : if (nan_count > 2000) {
546 0 : float scale = (*iter_)->getRunContext().getLossScale();
547 : /** @todo growth_factor : default --> 2.0 */
548 0 : float s = scale * 2.0f;
549 0 : resetLossScale(s);
550 0 : nan_count = 0;
551 : }
552 :
553 : return true;
554 : }
555 :
556 619 : LayerNode *NetworkGraph::computeBackwardEnd() {
557 : int max_exec_order = -1;
558 : LayerNode *node = nullptr;
559 :
560 619 : if (!optimize_memory) {
561 241 : return (*cbegin()).get();
562 : }
563 :
564 2879 : for (auto iter = getBackwardingBeginIter(); iter != getBackwardingEndIter();
565 : iter++) {
566 : auto &ln = *iter;
567 : const auto &exec_order = ln->getExecutionOrder();
568 2501 : int cur_order = std::get<0>(exec_order);
569 2501 : if (ln->needsCalcDerivative() || ln->needsCalcGradient()) {
570 : #ifdef ENABLE_TEST
571 1781 : cur_order = std::get<2>(exec_order);
572 : #else
573 : cur_order = std::get<1>(exec_order);
574 : #endif
575 : }
576 :
577 2501 : NNTR_THROW_IF(max_exec_order == cur_order, std::invalid_argument)
578 0 : << "layer node: " << ln->getName()
579 : << " has duplicated max_exec_order, this should not happen, current "
580 : "execution order: "
581 : << max_exec_order;
582 :
583 2501 : if (max_exec_order < cur_order) {
584 : max_exec_order = cur_order;
585 : node = ln.get();
586 : }
587 : }
588 :
589 : return node;
590 : }
591 :
592 : /**
593 : * @brief Allocate memory for all the managed tensors
594 : */
595 683 : void NetworkGraph::allocateTensors(ExecutionMode exec_mode_) {
596 683 : exec_mode = exec_mode_;
597 683 : if (exec_mode == ExecutionMode::INFERENCE)
598 : /**
599 : * get the order of execution/usage order for the forwarding of the last
600 : * layer and pass that as the max_exec_order ensuring that all tensors
601 : * with usage less than the max_exec_order are allocated.
602 : */
603 420 : tensor_manager->allocateTensors(
604 420 : std::get<0>((*(cend() - 1))->getExecutionOrder()));
605 : else {
606 : /**
607 : * get the order of execution/usage order for the backwarding of the first
608 : * layer (as that will be the last layer to executed in the backwarding)
609 : * and pass that as the max_exec_order ensuring that all tensors with
610 : * usage less than the max_exec_order are allocated.
611 : * @todo if model is gradient clipping, we have to add last execution order
612 : * + 1
613 : */
614 263 : tensor_manager->allocateTensors(
615 263 : std::get<3>(backward_iter_end->getExecutionOrder()));
616 : }
617 683 : }
618 :
619 1313 : std::vector<TensorDim> NetworkGraph::getInputDimension() const {
620 1313 : NNTR_THROW_IF(input_dims_.empty(), std::invalid_argument)
621 : << "[NetworkGraph] the graph has no node identified as input!";
622 1313 : return input_dims_;
623 : }
624 :
625 1563 : unsigned int NetworkGraph::getBatchSize() const { return batch_size; }
626 :
627 166 : std::vector<TensorDim> NetworkGraph::getOutputDimension() const {
628 166 : NNTR_THROW_IF(label_dims_.empty(), std::invalid_argument)
629 : << "[NetworkGraph] the graph has no node identified as output!";
630 : /// for now, outputting label_dims_ works, later label dim will be different
631 : /// from output dimension
632 166 : return label_dims_;
633 : }
634 :
635 : std::vector<std::shared_ptr<LayerNode>>
636 0 : NetworkGraph::getUnsortedLayers(const std::string &input_layer,
637 : const std::string &output_layer) const {
638 : /// @fixme: this won't work if input, output layers are not in order
639 : /// Further, this function must be removed. There should be rather
640 : /// getAllNames and getLayerByName instead of getUnsortedLayers.
641 :
642 : /** count layers after output layer */
643 : unsigned int num_layers_remove_end = 0;
644 0 : if (!output_layer.empty()) {
645 0 : for (auto iter = graph.crbegin(); iter != graph.crend(); iter++) {
646 0 : if ((*iter)->getName() != output_layer)
647 0 : num_layers_remove_end++;
648 : else
649 : break;
650 : }
651 : }
652 :
653 0 : if (num_layers_remove_end == graph.size())
654 0 : return {};
655 :
656 : /** count layers before input layer */
657 : unsigned int num_layers_remove_start = 0;
658 0 : if (!input_layer.empty()) {
659 : for (auto iter = graph.cbegin();
660 0 : iter != graph.cend() - num_layers_remove_end; iter++) {
661 0 : if ((*iter)->getName() != input_layer)
662 0 : num_layers_remove_start++;
663 : else
664 : break;
665 : }
666 : }
667 :
668 : /** copy the graph and return */
669 : std::vector<std::shared_ptr<LayerNode>> ret;
670 0 : std::transform(graph.cbegin() + num_layers_remove_start,
671 : graph.cend() - num_layers_remove_end, std::back_inserter(ret),
672 : [](auto const &elem) { return LNODE(elem); });
673 :
674 : return ret;
675 0 : }
676 :
677 15 : std::vector<std::shared_ptr<LayerNode>> NetworkGraph::getLayerNodes() const {
678 15 : return std::vector<std::shared_ptr<LayerNode>>(cbegin(), cend());
679 : }
680 :
681 8497 : void NetworkGraph::addLayer(std::shared_ptr<LayerNode> layer) {
682 8497 : if (compiled)
683 1 : throw std::runtime_error("Cannot modify graph after compile");
684 :
685 : /** Insert the layer to the graph */
686 16992 : graph.addNode(layer);
687 8496 : }
688 :
689 : InPlaceType
690 2565 : NetworkGraph::canExecuteInPlace(const std::shared_ptr<LayerNode> &lnode) {
691 2565 : InPlaceType inplace_type = lnode->initializeInPlace();
692 :
693 2565 : if (inplace_type == InPlaceType::NONE) {
694 : return inplace_type;
695 : }
696 :
697 2958 : if (lnode->getType() == InputLayer::type &&
698 2819 : !istrequal(getTensorType()[2], "FP32")) {
699 : return InPlaceType::NONE;
700 : }
701 :
702 1211 : if (lnode->getType() == MultiOutLayer::type) {
703 : return InPlaceType::RESTRICTING;
704 : }
705 :
706 : /** A case where it can operate in-place even if there is a multi-out type
707 : * input connection. */
708 1017 : if (inplace_type == InPlaceType::RESTRICTING) {
709 194 : for (size_t i = 0, num_node = lnode->getNumInputConnections(); i < num_node;
710 : ++i) {
711 97 : const std::string &input_name = lnode->getInputConnectionName(i);
712 194 : if (getLayerNode(input_name)->getInPlaceType() ==
713 : InPlaceType::RESTRICTING)
714 : return inplace_type;
715 : }
716 : return InPlaceType::NON_RESTRICTING;
717 : }
718 : /** A case where it cannot operate in-place if there is a multi-out type
719 : * input connection. */
720 : else {
721 : /** condition: NON_RESTRICTING */
722 1300 : for (size_t i = 0, num_node = lnode->getNumInputConnections(); i < num_node;
723 : ++i) {
724 384 : const std::string &input_name = lnode->getInputConnectionName(i);
725 768 : if (getLayerNode(input_name)->getInPlaceType() ==
726 : InPlaceType::RESTRICTING)
727 : return InPlaceType::NONE;
728 : }
729 : return inplace_type;
730 : }
731 : }
732 :
733 633 : void NetworkGraph::inPlaceOptimize() {
734 633 : if (optimize_memory) {
735 2957 : for (unsigned int idx = 0; idx < graph.size(); ++idx) {
736 2565 : auto const &lnode = getSortedLayerNode(idx);
737 2565 : lnode->setInPlaceType(canExecuteInPlace(lnode));
738 : }
739 : }
740 633 : }
741 :
742 : /**
743 : * @brief Set the Inplace Shared Memory Config By Layer object
744 : *
745 : * @param lnode layer node object
746 : * @param shared_var if the variable should be shared
747 : * @param shared_grad if the gradient should be shared
748 : */
749 : static void
750 1196 : setInplaceSharedMemoryConfigByLayer(const std::shared_ptr<LayerNode> &lnode,
751 : bool &shared_var, bool &shared_grad) {
752 : /** for multiout layer, variables are shared but gradients are not */
753 1196 : if (lnode->getType() == MultiOutLayer::type) {
754 196 : shared_var = true;
755 196 : shared_grad = false;
756 : } else {
757 1000 : shared_var = true;
758 1000 : shared_grad = true;
759 : }
760 :
761 : /**
762 : * @todo for layers which support in-place, both variables and gradients
763 : * will be shared.
764 : */
765 1196 : }
766 :
767 : std::vector<Var_Grad *>
768 4410 : NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
769 : const std::vector<Var_Grad *> &prev_inputs) {
770 4410 : const GraphNode &gnode = *lnode.get();
771 : std::vector<TensorDim> input_dims;
772 4410 : input_dims.reserve(prev_inputs.size());
773 4410 : std::transform(prev_inputs.begin(), prev_inputs.end(),
774 : std::back_inserter(input_dims),
775 : [](const Var_Grad *vg) { return vg->getDim(); });
776 :
777 : /** finalize the layer and get the final context */
778 4410 : auto init_context = lnode->finalize(input_dims, getTensorType(), exec_mode);
779 4407 : const auto &ct_engine = nntrainer::Engine::Global();
780 :
781 : /**
782 : * Request manager for either a pre-allocated output as input or a newly
783 : * allocated output. This is necessary for manager to know when this
784 : * output node is going to be used.
785 : */
786 : std::vector<std::string> input_names;
787 4407 : input_names.reserve(prev_inputs.size());
788 4407 : std::transform(
789 : prev_inputs.begin(), prev_inputs.end(), std::back_inserter(input_names),
790 4554 : [](auto const &vg) -> const auto & { return vg->getName(); });
791 : const std::vector<Var_Grad *> &inputs = tensor_manager->requestInputs(
792 4407 : gnode, init_context.getInputDimensions(), input_names);
793 :
794 : /** In-Place optimizations */
795 : /**
796 : * Request manager for either a pre-allocated input as output or a newly
797 : * allocated output. This is necessary for manager to know when this
798 : * output node is going to be used with in-place optimizations.
799 : */
800 4407 : auto out_specs = init_context.getOutSpecs();
801 :
802 : /// @note try move inplace control to finalize
803 4407 : bool shared_var = false, shared_grad = false;
804 :
805 4407 : if (lnode->getInPlaceType() != InPlaceType::NONE && lnode->supportInPlace()) {
806 1182 : setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
807 :
808 2662 : for (unsigned int i = 0; i < out_specs.size(); ++i) {
809 : auto &s = out_specs.at(i);
810 1480 : if (shared_var) {
811 1480 : s.variable_spec.request_type =
812 : TensorSpecV2::RequestType::READ_ONLY_VIEW;
813 1480 : if (lnode->getType() == IdentityLayer::type) {
814 50 : s.variable_spec.reference_name = inputs[i]->getName();
815 50 : s.variable_spec.dim.setFormat(inputs[i]->getDim().getFormat());
816 1430 : } else if (lnode->getInPlaceDirection() == InPlaceDirection::RIGHT) {
817 0 : s.variable_spec.reference_name = inputs[1]->getName();
818 0 : s.variable_spec.dim.setFormat(inputs[1]->getDim().getFormat());
819 1430 : } else if (lnode->getType() == WeightLayer::type) {
820 : WeightSpec w_spec = init_context.getWeightsSpec()[i];
821 0 : s.variable_spec.reference_name = std::get<8>(w_spec);
822 : s.variable_spec.dim.setFormat(std::get<0>(w_spec).getFormat());
823 1430 : } else if (lnode->getType() == TensorLayer::type) {
824 : InitLayerContext::TensorSpec t_spec =
825 : init_context.getTensorsSpec()[i];
826 0 : s.variable_spec.reference_name = std::get<3>(t_spec);
827 : s.variable_spec.dim.setFormat(std::get<0>(t_spec).getFormat());
828 : } else {
829 1430 : s.variable_spec.reference_name = inputs[0]->getName();
830 1430 : s.variable_spec.dim.setFormat(inputs[0]->getDim().getFormat());
831 : }
832 : }
833 1480 : if (shared_grad && s.gradient_spec) {
834 984 : s.gradient_spec->request_type =
835 : TensorSpecV2::RequestType::READ_ONLY_VIEW;
836 984 : if (lnode->getType() == IdentityLayer::type) {
837 50 : s.gradient_spec->reference_name = inputs[i]->getGradientName();
838 50 : s.gradient_spec->dim.setFormat(inputs[i]->getDim().getFormat());
839 934 : } else if (lnode->getInPlaceDirection() == InPlaceDirection::RIGHT) {
840 0 : s.gradient_spec->reference_name = inputs[1]->getGradientName();
841 0 : s.gradient_spec->dim.setFormat(inputs[1]->getDim().getFormat());
842 934 : } else if (lnode->getType() == WeightLayer::type) {
843 : WeightSpec w_spec = init_context.getWeightsSpec()[i];
844 : s.gradient_spec->reference_name =
845 0 : std::get<8>(w_spec) + Var_Grad::grad_suffix;
846 : s.gradient_spec->dim.setFormat(std::get<0>(w_spec).getFormat());
847 934 : } else if (lnode->getType() == TensorLayer::type) {
848 : InitLayerContext::TensorSpec t_spec =
849 : init_context.getTensorsSpec()[i];
850 : s.gradient_spec->reference_name =
851 0 : std::get<3>(t_spec) + Var_Grad::grad_suffix;
852 : s.gradient_spec->dim.setFormat(std::get<0>(t_spec).getFormat());
853 : } else {
854 934 : s.gradient_spec->reference_name = inputs[0]->getGradientName();
855 934 : s.gradient_spec->dim.setFormat(inputs[0]->getDim().getFormat());
856 : }
857 : }
858 : }
859 : }
860 4407 : if (lnode->requireLabel()) {
861 630 : NNTR_THROW_IF(out_specs.size() != 1, std::invalid_argument)
862 : << "out specification size must be 1 for label layer for now, "
863 0 : << lnode->getName() << " out spec size: " << out_specs.size();
864 630 : NNTR_THROW_IF(out_specs[0].gradient_spec == nullptr, std::invalid_argument)
865 0 : << "label space does not exist for " << lnode->getName();
866 630 : out_specs[0].gradient_spec->request_type =
867 : TensorSpecV2::RequestType::PLACEHOLDER;
868 : }
869 :
870 : /// @note below needs to be enabled only for inference mode, but need
871 : /// decision if we are going to separate inference initialization from
872 : /// train initialization this might not worth optimize because in general
873 : /// output of a neuralnet is very small
874 4407 : if (lnode->getOutputConnections().size() == 0u) {
875 : std::for_each(out_specs.begin(), out_specs.end(),
876 : [this](VarGradSpecV2 &spec) {
877 638 : spec.variable_spec.additional_exec_order.push_back(
878 638 : std::get<0>(forward_iter_end->getExecutionOrder()));
879 : });
880 : }
881 :
882 8814 : if (lnode->getType() == RNNCellLayer::type or
883 8814 : lnode->getType() == LSTMCellLayer::type or
884 8766 : lnode->getType() == GRUCellLayer::type) {
885 : std::for_each(out_specs.begin(), out_specs.end(), [](VarGradSpecV2 &spec) {
886 104 : spec.variable_spec.ls = TensorLifespan::FORWARD_GRAD_LIFESPAN;
887 : });
888 : }
889 :
890 : const std::vector<Var_Grad *> &outputs = tensor_manager->requestTensors(
891 4407 : out_specs, Manager::TensorGroupType::OUTPUT, lnode->getExecutionOrder(),
892 8814 : lnode->getName());
893 :
894 : /** create shared weight names if requested */
895 : std::vector<std::string> shared_weight_names;
896 : std::vector<std::string> shared_tensor_names;
897 4407 : if (auto shared_node_str = lnode->getSharedFrom(); !shared_node_str.empty()) {
898 : /// @note below is commented but kept from quick fix to be referenced
899 : /// for later(#1707)
900 : // auto shared_node = getLayerNode(shared_node_str).get();
901 : // NNTR_THROW_IF(shared_node == nullptr, std::invalid_argument)
902 : // << "shared_node requested but it is not registered in the graph,
903 : // name:
904 : // "
905 : // << shared_node_str << " requested from " << lnode->getName();
906 : // NNTR_THROW_IF(shared_node->getType() != lnode->getType(),
907 : // std::invalid_argument)
908 : // << " shared_node and lnode type mismatch, source node type: "
909 : // << shared_node->getType() << " depedent node type: " <<
910 : // lnode->getType()
911 : // << " depedent node name: " << lnode->getName();
912 : // NNTR_THROW_IF(!shared_node->isFinalized(), std::invalid_argument)
913 : // << "shared node must be prior to the dependent node and it should
914 : // be
915 : // "
916 : // "finalized beforehand, shared node name: "
917 : // << shared_node_str << " dependent node name: " << lnode->getName();
918 : // auto num_weight = shared_node->getNumWeights();
919 : // shared_weight_names.reserve(num_weight);
920 : // for (auto i = 0u; i < num_weight; ++i) {
921 : // shared_weight_names.emplace_back(shared_node->getWeightName(i));
922 : // }
923 : // auto &rc = node->getRunContext();
924 :
925 : /// @fixme tensor should be only shared if context explicitly requested
926 : /// to do so. This has to be added to the part of tensor spec, other
927 : /// wise it will break many things
928 : const auto &t_specs = init_context.getTensorsSpec();
929 800 : for (auto i = 0u; i < t_specs.size(); ++i) {
930 488 : shared_tensor_names.emplace_back(std::get<3>(t_specs.at(i)));
931 : }
932 :
933 : const auto &w_specs = init_context.getWeightsSpec();
934 1960 : for (auto i = 0u; i < w_specs.size(); ++i) {
935 1648 : shared_weight_names.emplace_back(std::get<8>(w_specs.at(i)));
936 : }
937 : }
938 : lnode->setDataType(init_context.getWeightDataType(),
939 : init_context.getActivationDataType());
940 4407 : bool trainable = lnode->getTrainable();
941 4407 : if (exec_mode == ExecutionMode::INFERENCE)
942 : trainable = false;
943 :
944 8814 : auto context = ct_engine.getRegisteredContext(lnode->getComputeEngineType());
945 :
946 : auto ct_data = context->getContextData();
947 :
948 4407 : lnode->configureRunContext(
949 : // TODO: update weights spec for trainable based on layer trainable prop
950 8814 : tensor_manager->requestWeights(gnode, init_context.getWeightsSpec(),
951 : trainable, shared_weight_names),
952 : inputs, outputs,
953 8814 : tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
954 : trainable, shared_tensor_names),
955 : init_context.getLossScale(), ct_data);
956 :
957 8814 : return outputs;
958 4410 : }
959 :
960 : std::vector<Var_Grad *>
961 28 : NetworkGraph::refinalizeContext(const std::shared_ptr<LayerNode> &lnode,
962 : const std::vector<Var_Grad *> &prev_inputs) {
963 28 : const GraphNode &gnode = *lnode.get();
964 : std::vector<TensorDim> input_dims;
965 28 : input_dims.reserve(prev_inputs.size());
966 28 : std::transform(prev_inputs.begin(), prev_inputs.end(),
967 : std::back_inserter(input_dims),
968 : [](const Var_Grad *vg) { return vg->getDim(); });
969 :
970 : /** refinalize the layer and get the final context */
971 28 : auto init_context = lnode->refinalize(input_dims);
972 28 : const auto &ct_engine = nntrainer::Engine::Global();
973 :
974 : /**
975 : * Request manager for either a pre-allocated output as input or a newly
976 : * allocated output. This is necessary for manager to know when this
977 : * output node is going to be used.
978 : */
979 : std::vector<std::string> input_names;
980 28 : input_names.reserve(prev_inputs.size());
981 28 : std::transform(
982 : prev_inputs.begin(), prev_inputs.end(), std::back_inserter(input_names),
983 29 : [](auto const &vg) -> const auto & { return vg->getName(); });
984 : const std::vector<Var_Grad *> &inputs = tensor_manager->requestInputs(
985 28 : gnode, init_context.getInputDimensions(), input_names);
986 :
987 : /** In-Place optimizations */
988 : /**
989 : * Request manager for either a pre-allocated input as output or a newly
990 : * allocated output. This is necessary for manager to know when this
991 : * output node is going to be used with in-place optimizations.
992 : */
993 28 : auto out_specs = init_context.getOutSpecs();
994 : /// @note try move inplace control to finalize
995 28 : bool shared_var = false, shared_grad = false;
996 28 : if (lnode->getInPlaceType() != InPlaceType::NONE) {
997 14 : setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
998 30 : for (unsigned int i = 0; i < out_specs.size(); ++i) {
999 : auto &s = out_specs.at(i);
1000 16 : if (shared_var) {
1001 16 : s.variable_spec.request_type =
1002 : TensorSpecV2::RequestType::READ_ONLY_VIEW;
1003 16 : if (lnode->getType() == IdentityLayer::type) {
1004 0 : s.variable_spec.reference_name = inputs[i]->getName();
1005 16 : } else if (lnode->getInPlaceDirection() == InPlaceDirection::RIGHT) {
1006 0 : s.variable_spec.reference_name = inputs[1]->getName();
1007 : } else {
1008 16 : s.variable_spec.reference_name = inputs[0]->getName();
1009 : }
1010 : }
1011 16 : if (shared_grad && s.gradient_spec) {
1012 10 : s.gradient_spec->request_type =
1013 : TensorSpecV2::RequestType::READ_ONLY_VIEW;
1014 10 : if (lnode->getType() == IdentityLayer::type) {
1015 0 : s.gradient_spec->reference_name = inputs[i]->getGradientName();
1016 10 : } else if (lnode->getInPlaceDirection() == InPlaceDirection::RIGHT) {
1017 : // @note With binary inputs, inputs[0] represents the left input
1018 : // tensor while inputs[1] represents the right input tensor. As a
1019 : // result, if the in-place direction is set to right, the in-place
1020 : // memory is assigned to inputs[1].
1021 0 : s.gradient_spec->reference_name = inputs[1]->getGradientName();
1022 : } else {
1023 10 : s.gradient_spec->reference_name = inputs[0]->getGradientName();
1024 : }
1025 : }
1026 : }
1027 : }
1028 28 : if (lnode->requireLabel()) {
1029 1 : NNTR_THROW_IF(out_specs.size() != 1, std::invalid_argument)
1030 : << "out specification size must be 1 for label layer for now, "
1031 0 : << lnode->getName() << " out spec size: " << out_specs.size();
1032 1 : NNTR_THROW_IF(out_specs[0].gradient_spec == nullptr, std::invalid_argument)
1033 0 : << "label space does not exist for " << lnode->getName();
1034 1 : out_specs[0].gradient_spec->request_type =
1035 : TensorSpecV2::RequestType::PLACEHOLDER;
1036 : }
1037 :
1038 : /// @note below needs to be enabled only for inference mode, but need
1039 : /// decision if we are going to separate inference initialization from
1040 : /// train initialization this might not worth optimize because in general
1041 : /// output of a neuralnet is very small
1042 28 : if (lnode->getOutputConnections().size() == 0u) {
1043 : std::for_each(out_specs.begin(), out_specs.end(),
1044 : [this](VarGradSpecV2 &spec) {
1045 1 : spec.variable_spec.additional_exec_order.push_back(
1046 1 : std::get<0>(forward_iter_end->getExecutionOrder()));
1047 : });
1048 : }
1049 :
1050 56 : if (lnode->getType() == RNNCellLayer::type or
1051 56 : lnode->getType() == LSTMCellLayer::type or
1052 56 : lnode->getType() == GRUCellLayer::type) {
1053 : std::for_each(out_specs.begin(), out_specs.end(), [](VarGradSpecV2 &spec) {
1054 0 : spec.variable_spec.ls = TensorLifespan::FORWARD_GRAD_LIFESPAN;
1055 : });
1056 : }
1057 :
1058 : const std::vector<Var_Grad *> &outputs = tensor_manager->requestTensors(
1059 28 : out_specs, Manager::TensorGroupType::OUTPUT, lnode->getExecutionOrder(),
1060 56 : lnode->getName());
1061 :
1062 : /** create shared weight names if requested */
1063 : std::vector<std::string> shared_weight_names;
1064 : std::vector<std::string> shared_tensor_names;
1065 28 : if (auto shared_node_str = lnode->getSharedFrom(); !shared_node_str.empty()) {
1066 : /// @note below is commented but kept from quick fix to be referenced
1067 : /// for later(#1707)
1068 : // auto shared_node = getLayerNode(shared_node_str).get();
1069 : // NNTR_THROW_IF(shared_node == nullptr, std::invalid_argument)
1070 : // << "shared_node requested but it is not registered in the graph,
1071 : // name:
1072 : // "
1073 : // << shared_node_str << " requested from " << lnode->getName();
1074 : // NNTR_THROW_IF(shared_node->getType() != lnode->getType(),
1075 : // std::invalid_argument)
1076 : // << " shared_node and lnode type mismatch, source node type: "
1077 : // << shared_node->getType() << " depedent node type: " <<
1078 : // lnode->getType()
1079 : // << " depedent node name: " << lnode->getName();
1080 : // NNTR_THROW_IF(!shared_node->isFinalized(), std::invalid_argument)
1081 : // << "shared node must be prior to the dependent node and it should
1082 : // be
1083 : // "
1084 : // "finalized beforehand, shared node name: "
1085 : // << shared_node_str << " dependent node name: " << lnode->getName();
1086 : // auto num_weight = shared_node->getNumWeights();
1087 : // shared_weight_names.reserve(num_weight);
1088 : // for (auto i = 0u; i < num_weight; ++i) {
1089 : // shared_weight_names.emplace_back(shared_node->getWeightName(i));
1090 : // }
1091 : // auto &rc = node->getRunContext();
1092 :
1093 : /// @fixme tensor should be only shared if context explicitly requested
1094 : /// to do so. This has to be added to the part of tensor spec, other
1095 : /// wise it will break many things
1096 : const auto &t_specs = init_context.getTensorsSpec();
1097 0 : for (auto i = 0u; i < t_specs.size(); ++i) {
1098 0 : shared_tensor_names.emplace_back(std::get<3>(t_specs.at(i)));
1099 : }
1100 :
1101 : const auto &w_specs = init_context.getWeightsSpec();
1102 0 : for (auto i = 0u; i < w_specs.size(); ++i) {
1103 0 : shared_weight_names.emplace_back(std::get<8>(w_specs.at(i)));
1104 : }
1105 : }
1106 :
1107 28 : auto weights = lnode->getRunContext().getWeights();
1108 :
1109 56 : auto context = ct_engine.getRegisteredContext(lnode->getComputeEngineType());
1110 :
1111 : auto ct_data = context->getContextData();
1112 :
1113 28 : lnode->configureRunContext(
1114 : // TODO: update weights spec for trainable based on layer trainable prop
1115 : weights, inputs, outputs,
1116 56 : tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
1117 28 : lnode->getTrainable(), shared_tensor_names),
1118 : init_context.getLossScale(), ct_data);
1119 :
1120 56 : return outputs;
1121 28 : }
1122 :
1123 : #ifdef ENABLE_TEST
1124 :
1125 : std::map<std::string, std::vector<unsigned int>>
1126 32 : NetworkGraph::getLayerExecutionOrders(const std::shared_ptr<LayerNode> &lnode) {
1127 32 : const auto &init_context = lnode->getInitContext();
1128 32 : auto out_specs = init_context.getOutSpecs();
1129 32 : auto weight_specs = init_context.getWeightsSpec();
1130 32 : auto tensor_specs = init_context.getTensorsSpec();
1131 :
1132 : std::map<std::string, std::vector<unsigned int>> exec_orders;
1133 :
1134 64 : for (auto &spec : out_specs) {
1135 64 : const auto &name = lnode->getName() + ":" + spec.variable_spec.name;
1136 32 : auto orders = tensor_manager->getTensorExecutionOrders(name, false);
1137 64 : exec_orders.insert({name, orders});
1138 : try {
1139 : auto orders_grad =
1140 32 : tensor_manager->getTensorExecutionOrders(name + ":grad", false);
1141 32 : exec_orders.insert({name + ":grad", orders_grad});
1142 32 : } catch (const std::exception &e) {
1143 0 : ml_logi("Cannot find grad tensor for %s:grad", name.c_str());
1144 : continue;
1145 0 : }
1146 32 : }
1147 :
1148 56 : for (auto &spec : weight_specs) {
1149 : const auto &name = std::get<const std::string>(spec);
1150 24 : auto orders = tensor_manager->getTensorExecutionOrders(name, true);
1151 48 : exec_orders.insert({name, orders});
1152 : try {
1153 : auto orders_grad =
1154 24 : tensor_manager->getTensorExecutionOrders(name + ":grad", false);
1155 18 : exec_orders.insert({name + ":grad", orders_grad});
1156 24 : } catch (const std::exception &e) {
1157 6 : ml_logi("Cannot find grad tensor for %s:grad", name.c_str());
1158 : continue;
1159 6 : }
1160 24 : }
1161 :
1162 32 : for (auto &spec : tensor_specs) {
1163 : const auto &name = std::get<const std::string>(spec);
1164 0 : auto orders = tensor_manager->getTensorExecutionOrders(name, false);
1165 0 : exec_orders.insert({name, orders});
1166 : try {
1167 : auto orders_grad =
1168 0 : tensor_manager->getTensorExecutionOrders(name + ":grad", false);
1169 0 : exec_orders.insert({name + ":grad", orders_grad});
1170 0 : } catch (const std::exception &e) {
1171 0 : ml_logi("Cannot find grad tensor for %s:grad", name.c_str());
1172 : continue;
1173 0 : }
1174 0 : }
1175 :
1176 32 : return exec_orders;
1177 32 : }
1178 :
1179 : #endif // ENABLE_TEST
1180 :
1181 622 : int NetworkGraph::initialize(ExecutionMode mode,
1182 : const std::vector<Connection> &model_input_names,
1183 : const std::vector<Connection> &model_label_names) {
1184 622 : exec_mode = mode;
1185 : tensor_manager->setExecutionMode(mode);
1186 : /**
1187 : * this contains the map from node name to its input tensor names
1188 : * @note: these input tensors have already been allocated
1189 : */
1190 : std::unordered_map<std::string, std::vector<Var_Grad *>> input_map;
1191 :
1192 : /** check if the given config of node is of input node */
1193 : auto is_input_node = [](const LayerNode *node) -> bool {
1194 9321 : return node->getInputConnections().empty();
1195 : };
1196 :
1197 4408 : for (unsigned int idx = 0; idx < graph.size(); ++idx) {
1198 : std::vector<Var_Grad *> inputs = {};
1199 4408 : auto const &lnode = getSortedLayerNode(idx);
1200 8816 : if (profile_keys.find(lnode->getType()) == profile_keys.end()) {
1201 : int event_key = 0;
1202 : PROFILE_TIME_REGISTER_EVENT(event_key, lnode->getType());
1203 5870 : profile_keys[lnode->getType()] = event_key;
1204 : }
1205 :
1206 : /**
1207 : * Set input dimension for all the layers.
1208 : * For input layer, as input dimension is known, set input tensor.
1209 : */
1210 4408 : if (!is_input_node(lnode.get())) {
1211 6870 : if (input_map.find(lnode->getName()) == input_map.end())
1212 0 : throw std::runtime_error("Cannot find input buffers for the node");
1213 10308 : inputs = input_map.at(lnode->getName());
1214 : }
1215 :
1216 : /**
1217 : * Initialize all the layers, allocate output tensors for each layer
1218 : * init2and add optimizer related weights for the layer
1219 : */
1220 4408 : const std::vector<Var_Grad *> &outputs = finalizeContext(lnode, inputs);
1221 :
1222 : /** no need to update input_map for the last layer */
1223 4405 : if (idx == graph.size() - 1)
1224 : break;
1225 :
1226 8348 : for (auto i = 0u, num_node = lnode->getNumOutputConnections(); i < num_node;
1227 : ++i) {
1228 4562 : auto conn = lnode->getOutputConnection(i);
1229 4562 : if (!conn) {
1230 16 : ml_logi("out connection not defined for %s, %u",
1231 : lnode->getName().c_str(), i);
1232 8 : continue;
1233 : }
1234 :
1235 4554 : auto sink_node = getLayerNode(conn->getName());
1236 : [[maybe_unused]] auto [it, b] =
1237 9108 : input_map.try_emplace({sink_node->getName(), {}});
1238 :
1239 9108 : NNTR_THROW_IF(sink_node->getInputConnectionName(conn->getIndex()) !=
1240 : lnode->getName(),
1241 : std::invalid_argument)
1242 0 : << "node pair does not match between " << lnode->getName() << ' '
1243 0 : << sink_node->getName();
1244 :
1245 4554 : auto &sink_tensors = it->second;
1246 4554 : sink_tensors.resize(sink_node->getNumInputConnections());
1247 4554 : sink_tensors[conn->getIndex()] = outputs[i];
1248 : }
1249 8813 : }
1250 :
1251 5024 : for (unsigned int idx = 0; idx < graph.size(); ++idx) {
1252 4405 : auto const &lnode = getSortedLayerNode(idx);
1253 4405 : auto &rc = lnode->getRunContext();
1254 : auto first_grad_access = std::get<1>(lnode->getExecutionOrder());
1255 : auto last_grad_access = std::get<3>(lnode->getExecutionOrder());
1256 9247 : for (unsigned i = 0; i < rc.getNumWeights(); ++i) {
1257 4842 : if (!rc.weightHasGradient(i)) {
1258 : /// @todo this is duck taping that MUST BE REMOVED. We will need to
1259 : /// have, is weight first access kind of concept.
1260 510 : if (tensor_manager->isFirstAccess(
1261 510 : rc.getWeight(i).getName(),
1262 : std::get<0>(lnode->getExecutionOrder()), true)) {
1263 294 : rc.getWeightObject(i).setAsGradientFirstAccess();
1264 : }
1265 510 : if (tensor_manager->isLastAccess(rc.getWeight(i).getName(),
1266 : last_grad_access, true)) {
1267 286 : rc.getWeightObject(i).setAsGradientLastAccess();
1268 : }
1269 : } else {
1270 4332 : if (tensor_manager->isFirstAccess(rc.getWeightGrad(i).getName(),
1271 : first_grad_access)) {
1272 3724 : rc.getWeightObject(i).setAsGradientFirstAccess();
1273 : }
1274 : /**
1275 : * if the gradient is to be clipped by global norm, then the last
1276 : * access is by clipping itself. However, as clipping is not a layer
1277 : * and does not contain any weights, such weights never get assigned
1278 : * gradient_last_access. This is a quick hotfix.
1279 : * TODO: make an independent clipping layer which will execute at
1280 : * the end, and will share ownership of weights which it will clip.
1281 : * This will remove this hot fix, and also remove the checks of if
1282 : * weights require clipping.
1283 : */
1284 4332 : if (tensor_manager->isLastAccess(rc.getWeightGrad(i).getName(),
1285 4956 : last_grad_access) ||
1286 1248 : ((rc.isGradientClipByGlobalNorm(i) || rc.isMixedPrecision(i)) &&
1287 16 : tensor_manager->isSecondLastAccess(rc.getWeightGrad(i).getName(),
1288 : last_grad_access))) {
1289 3724 : rc.getWeightObject(i).setAsGradientLastAccess();
1290 : }
1291 : }
1292 : }
1293 : }
1294 :
1295 : /**** identify model input / output to be set externally later ****/
1296 970 : auto identify_as_model_input = [this](LayerNode *node) {
1297 970 : auto num_input = node->getNumInputs();
1298 970 : NNTR_THROW_IF(num_input != 1, std::invalid_argument)
1299 : << "Input layer is supposed to have exactly one input, but more then "
1300 : "one input detected, num inputs: "
1301 : << num_input;
1302 :
1303 970 : input_list.push_back(node->getInput(0).getName());
1304 970 : input_dims_.push_back(node->getInputDimensions()[0]);
1305 970 : };
1306 :
1307 4489 : auto is_label_node = [](LayerNode *node) { return node->requireLabel(); };
1308 :
1309 630 : auto identify_as_model_label = [this](LayerNode *node) {
1310 : /// @todo change this as lnode->getNumLabels of sorts
1311 630 : auto num_label = node->getNumOutputs();
1312 630 : NNTR_THROW_IF(!node->getOutputConnections().empty(), std::invalid_argument)
1313 : << "label layer is supposed to be a leaf for now";
1314 630 : NNTR_THROW_IF(num_label != 1, std::invalid_argument)
1315 : << "label layer is supposed to have exactly one label, but more then "
1316 : "one label detected, num labels: "
1317 : << num_label;
1318 :
1319 : /// @todo implement and use getLabel(0) instead.
1320 630 : output_list.push_back(node->getOutput(0).getName());
1321 630 : label_list.push_back(node->getOutputGrad(0).getName());
1322 630 : label_dims_.push_back(node->getOutputDimensions()[0]);
1323 630 : };
1324 :
1325 1238 : auto identify_external_tensors = [this](const std::vector<Connection> &conns,
1326 : auto &&pred, auto &&identify) {
1327 1238 : if (conns.empty()) {
1328 6568 : for (unsigned int i = 0; i < graph.size(); ++i) {
1329 11116 : auto lnode = getSortedLayerNode(i).get();
1330 5558 : if (!pred(lnode)) {
1331 4550 : continue;
1332 : }
1333 : /// when name is empty, we identify everything as the node, all of
1334 : /// them must be having identical dimensions
1335 1008 : identify(lnode);
1336 : }
1337 : } else {
1338 820 : for (auto &conn : conns) {
1339 1184 : auto lnode = getLayerNode(conn.getName()).get();
1340 592 : NNTR_THROW_IF(!pred(lnode), std::invalid_argument)
1341 : << "given node is not of that kind, name: " << conn.getName();
1342 592 : identify(lnode);
1343 : }
1344 : unsigned int num_node_of_kind = 0;
1345 3480 : for (unsigned int i = 0; i < graph.size(); ++i) {
1346 6504 : auto lnode = getSortedLayerNode(i).get();
1347 3252 : if (!pred(lnode)) {
1348 2660 : continue;
1349 : }
1350 592 : num_node_of_kind++;
1351 : }
1352 228 : NNTR_THROW_IF(num_node_of_kind != conns.size(), std::invalid_argument)
1353 : << "conns given but there are not identified node of the kind, num "
1354 : "node of kind: "
1355 : << num_node_of_kind << " identifier size: " << conns.size();
1356 : }
1357 1857 : };
1358 :
1359 619 : identify_external_tensors(model_input_names, is_input_node,
1360 : identify_as_model_input);
1361 619 : identify_external_tensors(model_label_names, is_label_node,
1362 : identify_as_model_label);
1363 : /** mark the nodes which will be backwarded during the graph operation */
1364 : try {
1365 619 : markNodesForBackwarding();
1366 619 : backward_iter_end = computeBackwardEnd();
1367 0 : } catch (std::exception &e) {
1368 0 : ml_loge("Backwarding required from layer which doesn't support "
1369 : "backwarding: %s",
1370 : e.what());
1371 : return ML_ERROR_INVALID_PARAMETER;
1372 0 : }
1373 :
1374 : /** select weights which would require clipping of the gradients by global
1375 : * norm if any */
1376 1241 : lazy_weights = tensor_manager->getWeights([](const Weight *w) {
1377 4842 : return w->hasGradient() && w->isGradientLastAccess() &&
1378 : (w->isGradientClipByGlobalNorm() || w->isMixedPrecision());
1379 619 : });
1380 :
1381 619 : is_clip_grad = false;
1382 619 : for (auto w : lazy_weights) {
1383 4 : if (w->isGradientClipByGlobalNorm()) {
1384 4 : is_clip_grad = true;
1385 4 : break;
1386 : }
1387 : }
1388 : return ML_ERROR_NONE;
1389 : }
1390 :
1391 1 : int NetworkGraph::reinitialize(
1392 : const std::vector<Connection> &model_input_names,
1393 : const std::vector<Connection> &model_label_names) {
1394 : input_dims_.clear();
1395 : label_dims_.clear();
1396 1 : tensor_manager->reinitialize();
1397 :
1398 : /**
1399 : * this contains the map from node name to its input tensor names
1400 : * @note: these input tensors have already been allocated
1401 : */
1402 : std::unordered_map<std::string, std::vector<Var_Grad *>> input_map;
1403 :
1404 : /** check if the given config of node is of input node */
1405 : auto is_input_node = [](const LayerNode *node) -> bool {
1406 56 : return node->getInputConnections().empty();
1407 : };
1408 :
1409 28 : for (unsigned int idx = 0; idx < graph.size(); ++idx) {
1410 : std::vector<Var_Grad *> inputs = {};
1411 28 : auto const &lnode = getSortedLayerNode(idx);
1412 :
1413 56 : if (profile_keys.find(lnode->getType()) == profile_keys.end()) {
1414 : int event_key = 0;
1415 : PROFILE_TIME_REGISTER_EVENT(event_key, lnode->getType());
1416 0 : profile_keys[lnode->getType()] = event_key;
1417 : }
1418 :
1419 : /**
1420 : * Set input dimension for all the layers.
1421 : * For input layer, as input dimension is known, set input tensor.
1422 : */
1423 28 : if (!is_input_node(lnode.get())) {
1424 54 : if (input_map.find(lnode->getName()) == input_map.end())
1425 0 : throw std::runtime_error("Cannot find input buffers for the node");
1426 81 : inputs = input_map.at(lnode->getName());
1427 : }
1428 :
1429 : /**
1430 : * Reinitialize all the layers, allocate output tensors for each layer
1431 : * init2and add optimizer related weights for the layer
1432 : */
1433 28 : const std::vector<Var_Grad *> &outputs = refinalizeContext(lnode, inputs);
1434 :
1435 : /** no need to update input_map for the last layer */
1436 28 : if (idx == graph.size() - 1)
1437 : break;
1438 :
1439 56 : for (auto i = 0u, num_node = lnode->getNumOutputConnections(); i < num_node;
1440 : ++i) {
1441 29 : auto conn = lnode->getOutputConnection(i);
1442 29 : if (!conn) {
1443 0 : ml_logi("out connection not defined for %s, %u",
1444 : lnode->getName().c_str(), i);
1445 0 : continue;
1446 : }
1447 :
1448 29 : auto sink_node = getLayerNode(conn->getName());
1449 : [[maybe_unused]] auto [it, b] =
1450 58 : input_map.try_emplace({sink_node->getName(), {}});
1451 :
1452 58 : NNTR_THROW_IF(sink_node->getInputConnectionName(conn->getIndex()) !=
1453 : lnode->getName(),
1454 : std::invalid_argument)
1455 0 : << "node pair does not match between " << lnode->getName() << ' '
1456 0 : << sink_node->getName();
1457 :
1458 29 : auto &sink_tensors = it->second;
1459 29 : sink_tensors.resize(sink_node->getNumInputConnections());
1460 29 : sink_tensors[conn->getIndex()] = outputs[i];
1461 : }
1462 56 : }
1463 :
1464 29 : for (unsigned int idx = 0; idx < graph.size(); ++idx) {
1465 28 : auto const &lnode = getSortedLayerNode(idx);
1466 28 : auto &rc = lnode->getRunContext();
1467 : auto first_grad_access = std::get<1>(lnode->getExecutionOrder());
1468 : auto last_grad_access = std::get<3>(lnode->getExecutionOrder());
1469 46 : for (unsigned i = 0; i < rc.getNumWeights(); ++i) {
1470 18 : if (!rc.weightHasGradient(i)) {
1471 : /// @todo this is duck taping that MUST BE REMOVED. We will need to
1472 : /// have, is weight first access kind of concept.
1473 18 : if (tensor_manager->isFirstAccess(
1474 18 : rc.getWeight(i).getName(),
1475 : std::get<0>(lnode->getExecutionOrder()), true)) {
1476 18 : rc.getWeightObject(i).setAsGradientFirstAccess();
1477 : }
1478 18 : if (tensor_manager->isLastAccess(rc.getWeight(i).getName(),
1479 : last_grad_access, true)) {
1480 18 : rc.getWeightObject(i).setAsGradientLastAccess();
1481 : }
1482 : } else {
1483 0 : if (tensor_manager->isFirstAccess(rc.getWeightGrad(i).getName(),
1484 : first_grad_access)) {
1485 0 : rc.getWeightObject(i).setAsGradientFirstAccess();
1486 : }
1487 : /**
1488 : * if the gradient is to be clipped by global norm, then the last
1489 : * access is by clipping itself. However, as clipping is not a layer
1490 : * and does not contain any weights, such weights never get assigned
1491 : * gradient_last_access. This is a quick hotfix.
1492 : * TODO: make an independent clipping layer which will execute at
1493 : * the end, and will share ownership of weights which it will clip.
1494 : * This will remove this hot fix, and also remove the checks of if
1495 : * weights require clipping.
1496 : */
1497 0 : if (tensor_manager->isLastAccess(rc.getWeightGrad(i).getName(),
1498 0 : last_grad_access) ||
1499 0 : (rc.isGradientClipByGlobalNorm(i) &&
1500 0 : tensor_manager->isSecondLastAccess(rc.getWeightGrad(i).getName(),
1501 : last_grad_access))) {
1502 0 : rc.getWeightObject(i).setAsGradientLastAccess();
1503 : }
1504 : }
1505 : }
1506 : }
1507 : /**** identify model input / output to be set externally later ****/
1508 1 : auto identify_as_model_input = [this](LayerNode *node) {
1509 1 : auto num_input = node->getNumInputs();
1510 1 : NNTR_THROW_IF(num_input != 1, std::invalid_argument)
1511 : << "Input layer is supposed to have exactly one input, but more then "
1512 : "one input detected, num inputs: "
1513 : << num_input;
1514 :
1515 : // input_list.push_back(node->getInput(0).getName());
1516 1 : input_dims_.push_back(node->getInputDimensions()[0]);
1517 1 : };
1518 :
1519 28 : auto is_label_node = [](LayerNode *node) { return node->requireLabel(); };
1520 :
1521 1 : auto identify_as_model_label = [this](LayerNode *node) {
1522 : /// @todo change this as lnode->getNumLabels of sorts
1523 1 : auto num_label = node->getNumOutputs();
1524 1 : NNTR_THROW_IF(!node->getOutputConnections().empty(), std::invalid_argument)
1525 : << "label layer is supposed to be a leaf for now";
1526 1 : NNTR_THROW_IF(num_label != 1, std::invalid_argument)
1527 : << "label layer is supposed to have exactly one label, but more then "
1528 : "one label detected, num labels: "
1529 : << num_label;
1530 :
1531 : /// @todo implement and use getLabel(0) instead.
1532 : // output_list.push_back(node->getOutput(0).getName());
1533 : // label_list.push_back(node->getOutputGrad(0).getName());
1534 1 : label_dims_.push_back(node->getOutputDimensions()[0]);
1535 1 : };
1536 :
1537 2 : auto identify_external_tensors = [this](const std::vector<Connection> &conns,
1538 : auto &&pred, auto &&identify) {
1539 2 : if (conns.empty()) {
1540 58 : for (unsigned int i = 0; i < graph.size(); ++i) {
1541 112 : auto lnode = getSortedLayerNode(i).get();
1542 56 : if (!pred(lnode)) {
1543 54 : continue;
1544 : }
1545 : /// when name is empty, we identify everything as the node, all of
1546 : /// them must be having identical dimensions
1547 2 : identify(lnode);
1548 : }
1549 : } else {
1550 0 : for (auto &conn : conns) {
1551 0 : auto lnode = getLayerNode(conn.getName()).get();
1552 0 : NNTR_THROW_IF(!pred(lnode), std::invalid_argument)
1553 : << "given node is not of that kind, name: " << conn.getName();
1554 0 : identify(lnode);
1555 : }
1556 : unsigned int num_node_of_kind = 0;
1557 0 : for (unsigned int i = 0; i < graph.size(); ++i) {
1558 0 : auto lnode = getSortedLayerNode(i).get();
1559 0 : if (!pred(lnode)) {
1560 0 : continue;
1561 : }
1562 0 : num_node_of_kind++;
1563 : }
1564 0 : NNTR_THROW_IF(num_node_of_kind != conns.size(), std::invalid_argument)
1565 : << "conns given but there are not identified node of the kind, num "
1566 : "node of kind: "
1567 : << num_node_of_kind << " identifier size: " << conns.size();
1568 : }
1569 3 : };
1570 :
1571 1 : identify_external_tensors(model_input_names, is_input_node,
1572 : identify_as_model_input);
1573 1 : identify_external_tensors(model_label_names, is_label_node,
1574 : identify_as_model_label);
1575 :
1576 1 : return ML_ERROR_NONE;
1577 : }
1578 :
1579 14518 : void NetworkGraph::setExternalTensors(const std::vector<Tensor> &data,
1580 : const std::vector<std::string> names) {
1581 : /// feed or clear label
1582 29853 : for (unsigned int idx = 0; idx < names.size(); idx++) {
1583 15335 : if (data.empty())
1584 4725 : tensor_manager->fillPlaceholder(names[idx], Tensor());
1585 13760 : else if (data.size() == 1)
1586 : tensor_manager->fillPlaceholder(names[idx], data[0]);
1587 : else
1588 : tensor_manager->fillPlaceholder(names[idx], data[idx]);
1589 : }
1590 14518 : }
1591 :
1592 7259 : void NetworkGraph::setInputsLabels(const std::vector<Tensor> &inputs,
1593 : const std::vector<Tensor> &labels) {
1594 7259 : NNTR_THROW_IF(labels.size() > 1 && labels.size() != label_list.size(),
1595 : std::invalid_argument)
1596 : << "label size does not match with the network requirements"
1597 : << " label size: " << labels.size()
1598 : << " requirements size: " << label_list.size();
1599 :
1600 7259 : NNTR_THROW_IF(inputs.size() > 1 && inputs.size() != input_list.size(),
1601 : std::invalid_argument)
1602 : << "input size does not match with the network requirements"
1603 : << " input size: " << inputs.size()
1604 : << " requirements size: " << input_list.size();
1605 :
1606 7259 : setExternalTensors(inputs, input_list);
1607 7259 : setExternalTensors(labels, label_list);
1608 7259 : }
1609 :
1610 1148 : void NetworkGraph::setInputsLabels(sharedConstTensors &inputs,
1611 : sharedConstTensors &labels) {
1612 : std::vector<Tensor> ins;
1613 : std::transform(
1614 : inputs.begin(), inputs.end(), std::back_inserter(ins),
1615 : [](auto const &val) -> const auto & { return *val.get(); });
1616 :
1617 : std::vector<Tensor> labs;
1618 : std::transform(
1619 : labels.begin(), labels.end(), std::back_inserter(labs),
1620 : [](auto const &val) -> const auto & { return *val.get(); });
1621 :
1622 1148 : setInputsLabels(ins, labs);
1623 1148 : }
1624 :
1625 23 : std::vector<Tensor> NetworkGraph::getOutputTensors() const {
1626 : std::vector<Tensor> output_tensors;
1627 23 : output_tensors.reserve(output_list.size());
1628 :
1629 46 : for (auto const &name : output_list)
1630 23 : output_tensors.push_back(*tensor_manager->getTensor(name));
1631 :
1632 23 : return output_tensors;
1633 0 : }
1634 :
1635 5400 : void NetworkGraph::flushCache() { tensor_manager->flushCache(); }
1636 :
1637 94179 : void NetworkGraph::flushCacheExcept(unsigned int order) {
1638 94179 : tensor_manager->flushCacheExcept(order);
1639 94179 : }
1640 :
1641 0 : void NetworkGraph::LoadTensors(unsigned int order, unsigned int lookahead) {
1642 0 : tensor_manager->LoadTensors(order, lookahead);
1643 0 : }
1644 :
1645 0 : bool NetworkGraph::checkLoadComplete(unsigned int order) {
1646 0 : return tensor_manager->checkLoadComplete(order);
1647 : }
1648 :
1649 0 : bool NetworkGraph::inActive(unsigned int order) {
1650 0 : return tensor_manager->inActive(order);
1651 : }
1652 :
1653 0 : bool NetworkGraph::checkUnloadComplete(unsigned int order) {
1654 0 : return tensor_manager->checkUnloadComplete(order);
1655 : }
1656 :
1657 0 : void NetworkGraph::UnloadTensors(unsigned int order) {
1658 0 : tensor_manager->UnloadTensors(order);
1659 0 : }
1660 :
1661 616 : void NetworkGraph::requestOptimizerVariable(
1662 : std::function<std::vector<TensorDim>(const TensorDim &)> cb,
1663 : bool request_only_trainable) {
1664 6064 : for (auto const &w : tensor_manager->getWeights()) {
1665 4832 : if (w->isGradientLastAccess() && w->hasGradient()) {
1666 7444 : const TensorDim &dim = w->getDim();
1667 : std::vector<TensorDim> dims = cb(dim);
1668 11166 : w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
1669 3722 : dims, w->getName(), ":opt", TensorLifespan::MAX_LIFESPAN,
1670 : w->isGradientClipByGlobalNorm(), w->isMixedPrecision(),
1671 : Initializer::ZEROS));
1672 3722 : }
1673 616 : }
1674 616 : }
1675 :
1676 0 : void NetworkGraph::resetLossScale(float scale) {
1677 0 : loss_scale = scale;
1678 0 : for (auto iter = cbegin(); iter != cend(); iter++) {
1679 : auto &ln = *iter;
1680 0 : ln->getRunContext().setLossScale(scale);
1681 : }
1682 0 : }
1683 :
1684 : } /* namespace nntrainer */
|