Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file layer_context.cpp
6 : * @date 26 July 2021
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 : * @author Debadri Samaddar <s.debadri@samsung.com>
10 : * @author Niket Agarwal <niket.a@samsung.com>
11 : * @bug No known bugs except for NYI items
12 : * @brief This is the layer context for each layer
13 : */
14 :
15 : #include "nntrainer_error.h"
16 : #include <functional>
17 : #include <memory>
18 : #include <tensor_wrap_specs.h>
19 :
20 : #include <fstream>
21 : #include <iterator>
22 : #include <layer_context.h>
23 : #include <nntrainer_log.h>
24 : #include <stdexcept>
25 : #include <var_grad.h>
26 :
27 : namespace nntrainer {
28 :
29 : /**
30 : * @brief rename specification
31 : *
32 : * @param spec spec to rename
33 : * @param idx idx to suffix
34 : */
35 5796 : static void suffixSpec(VarGradSpecV2 &spec, unsigned int idx) {
36 11592 : spec.variable_spec.name += std::to_string(idx);
37 5796 : if (spec.gradient_spec) {
38 17388 : spec.gradient_spec->name += std::to_string(idx) + Var_Grad::grad_suffix;
39 : }
40 5796 : }
41 :
42 4878 : InitLayerContext::InitLayerContext(
43 : const std::vector<TensorDim> &dim, const std::vector<bool> &req_out_connected,
44 : bool is_inplace_, const std::string &n, const std::string &prefix_,
45 : const float max_norm, std::array<std::string, 3> tensor_type_,
46 : const float loss_scale_, ml::train::ExecutionMode mode_,
47 4878 : ml::train::LayerComputeEngine engine_) :
48 4878 : input_dim(dim),
49 4878 : is_inplace(is_inplace_),
50 4878 : clip_by_global_norm(max_norm),
51 : output_specs(),
52 4878 : req_out_is_connected(req_out_connected),
53 4878 : name(n),
54 4878 : prefix(prefix_),
55 4878 : tensor_type(tensor_type_),
56 4878 : loss_scale(loss_scale_),
57 4878 : mode(mode_),
58 4878 : engine(engine_) {
59 4879 : NNTR_THROW_IF(!validate(), std::invalid_argument)
60 : << "Invalid init context name: " << name
61 : << " num inputs: " << getNumInputs();
62 4877 : if (prefix.empty())
63 : prefix = name; // default prefix is the name
64 4881 : }
65 :
66 5339 : unsigned int InitLayerContext::getNumRequestedOutputs() const {
67 5339 : return req_out_is_connected.size();
68 : }
69 :
70 4196 : void InitLayerContext::setOutputDimensions(
71 : const std::vector<TensorDim> &out_dim) {
72 : std::vector<VarGradSpecV2> specs;
73 4196 : specs.reserve(out_dim.size());
74 :
75 9070 : for (unsigned i = 0u, sz = out_dim.size(); i < sz; ++i) {
76 9748 : auto spec = outSpec(out_dim.at(i));
77 :
78 : specs.push_back(std::move(spec));
79 : }
80 :
81 4196 : requestOutputs(std::move(specs));
82 4196 : }
83 :
84 5796 : VarGradSpecV2 InitLayerContext::outSpec(const TensorDim &dim,
85 : const std::string &name,
86 : TensorLifespan ls,
87 : TensorLifespan grad_ls) {
88 : VarGradSpecV2 spec;
89 5796 : spec.variable_spec.dim = dim;
90 5796 : spec.variable_spec.name = name;
91 5796 : spec.variable_spec.ls = ls;
92 11592 : spec.gradient_spec = std::make_unique<TensorSpecV2>(spec.variable_spec);
93 5796 : spec.gradient_spec->ls = grad_ls;
94 :
95 5796 : return spec;
96 : }
97 :
98 4873 : void InitLayerContext::requestOutputs(std::vector<VarGradSpecV2> &&out_specs) {
99 4873 : NNTR_THROW_IF(out_specs.size() < getNumRequestedOutputs(),
100 : std::invalid_argument)
101 : << "number of output dimension set is smaller than the number of out "
102 : "tensor slots requested, num output specification: "
103 0 : << out_specs.size() << " slots to fill: " << getNumRequestedOutputs()
104 : << " context name: " << name;
105 4873 : NNTR_THROW_IF(output_specs.size(), std::invalid_argument)
106 : << "output specification already set, cannot set twice. Check if output is "
107 : "already requested elsewhere";
108 4873 : output_specs.reserve(out_specs.size());
109 :
110 5796 : auto is_dangled = [this](unsigned int idx) {
111 5796 : return req_out_is_connected.size() <= idx || !req_out_is_connected[idx];
112 4873 : };
113 :
114 10669 : for (unsigned i = 0u, sz = out_specs.size(); i < sz; ++i) {
115 5796 : auto &spec = out_specs.at(i);
116 5796 : suffixSpec(spec, i);
117 5796 : if (is_dangled(i)) {
118 294 : ml_logw("given output is being dangled: %s in context: %s",
119 : spec.variable_spec.name.c_str(), name.c_str());
120 : spec.gradient_spec = nullptr;
121 : }
122 : output_specs.push_back(std::move(spec));
123 : }
124 4873 : }
125 :
126 4736 : const std::vector<VarGradSpecV2> &InitLayerContext::getOutSpecs() const {
127 4736 : return output_specs;
128 : }
129 :
130 4541 : RunLayerContext::RunLayerContext(const std::string &name, bool trainable,
131 : float l, bool is_inplace_, float loss_scale_,
132 : std::shared_ptr<ContextData> ct_data_,
133 : bool restore_, const std::vector<Weight *> &w,
134 : const std::vector<Var_Grad *> &in,
135 : const std::vector<Var_Grad *> &out,
136 4541 : const std::vector<Var_Grad *> &t) :
137 : ct_data(ct_data_),
138 4541 : loss(l),
139 4541 : is_inplace(is_inplace_),
140 4541 : loss_scale(loss_scale_),
141 4541 : restoreData(restore_),
142 4541 : weights(w),
143 4541 : inputs(in),
144 4541 : outputs(out),
145 4541 : tensors(t) {
146 4541 : std::get<props::Name>(props).set(name);
147 4541 : std::get<props::Trainable>(props).set(trainable);
148 4541 : NNTR_THROW_IF(!readyToUse(), std::invalid_argument)
149 : << "run context is not ready to use upon creation";
150 :
151 4541 : if (!validate())
152 0 : throw std::invalid_argument("Creating invalid run context");
153 4541 : }
154 :
155 : /**
156 : * @brief Get the Weight tensor object
157 : *
158 : * @param idx Identifier of the weight
159 : * @return Tensor& Reference to the weight tensor
160 : */
161 37063 : Tensor &RunLayerContext::getWeight(unsigned int idx) const {
162 37063 : return weights[idx]->getVariableRef();
163 : }
164 :
165 : /**
166 : * @brief Get the Weight Gradient tensor object
167 : *
168 : * @param idx Identifier of the weight
169 : * @return Tensor& Reference to the weight grad tensor
170 : */
171 31552 : Tensor &RunLayerContext::getWeightGrad(unsigned int idx) const {
172 31552 : if (!weights[idx]->hasGradient())
173 : throw std::invalid_argument(
174 0 : "Requesting gradient for a non-trainable weight.");
175 31552 : return weights[idx]->getGradientRef();
176 : }
177 :
178 : /**
179 : * @brief Get the Weight Gradient tensor object
180 : *
181 : * @param idx Identifier of the weight
182 : * @return Tensor& Reference to the weight grad tensor
183 : */
184 3992 : Tensor &RunLayerContext::getWeightFP32(unsigned int idx) const {
185 3992 : return weights[idx]->getVariableFP32Ref();
186 : }
187 :
188 : /**
189 : * @brief Get the Weight Optimizer Variable tensor object
190 : *
191 : * @param idx Identifier of the weight
192 : * @param jdx Identifier of the optimizer variables
193 : * @return Tensor& Reference to the weight optimizer variable tensor
194 : */
195 180 : Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
196 : unsigned int jdx) const {
197 180 : return weights[idx]->getOptimizerVariableRef(jdx);
198 : }
199 :
200 : /**
201 : * @brief Get the Number of Weight Optimizer Variable tensor object
202 : *
203 : * @param idx Identifier of the weight
204 : * @return int Number of the weight optimizer variable
205 : */
206 276 : unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
207 276 : return weights[idx]->getNumOptVariable();
208 : }
209 :
210 : /**
211 : * @brief Get regularization loss for the weight
212 : *
213 : * @param idx Identifier of the weight
214 : * @return float Value of the loss
215 : */
216 21545 : float RunLayerContext::getWeightRegularizationLoss(unsigned int idx) const {
217 21545 : return weights[idx]->getRegularizationLoss();
218 : }
219 :
220 : /**
221 : * @brief Get the Weight name
222 : *
223 : * @param idx Identifier of the weight
224 : * @return name of the weight
225 : */
226 4186 : const std::string &RunLayerContext::getWeightName(unsigned int idx) const {
227 4186 : return weights[idx]->getName();
228 : }
229 :
230 : /**
231 : * @brief check if the weight has gradient
232 : *
233 : * @param idx Identifier of the weight
234 : * @return true if weight has gradient, else false
235 : */
236 25821 : bool RunLayerContext::weightHasGradient(unsigned int idx) const {
237 25821 : return weights[idx]->hasGradient();
238 : }
239 :
240 : /**
241 : * @brief Get the Output tensor object
242 : *
243 : * @param idx Identifier of the output
244 : * @return Tensor& Reference to the output tensor
245 : */
246 42805 : Tensor &RunLayerContext::getOutput(unsigned int idx) {
247 42805 : return outputs[idx]->getVariableRef();
248 : }
249 :
250 22 : const Tensor &RunLayerContext::getOutput(unsigned int idx) const {
251 22 : return outputs[idx]->getVariableRef();
252 : }
253 :
254 : /**
255 : * @brief Get the Output Grad tensor object
256 : *
257 : * @param idx Identifier of the output
258 : * @return Tensor Read-only output grad tensor
259 : */
260 17998 : const Tensor RunLayerContext::getOutputGrad(unsigned int idx) const {
261 17998 : if (!outputs[idx]->hasGradient()) {
262 98 : return Tensor(outputs[idx]->getDim(), true, Initializer::ZEROS);
263 : }
264 17900 : return const_cast<RunLayerContext *>(this)->getOutputGradUnsafe(idx);
265 : }
266 :
267 : /**
268 : * @brief check if the output has gradient
269 : *
270 : * @param idx Identifier of the output
271 : * @return true if output has gradient, else false
272 : */
273 0 : bool RunLayerContext::outputHasGradient(unsigned int idx) const {
274 0 : return outputs[idx]->hasGradient();
275 : }
276 :
277 : /**
278 : * @brief Get the Output Grad tensor object
279 : *
280 : * @param idx Identifier of the output
281 : * @return Tensor& Reference to the output grad tensor
282 : *
283 : * @note recommended to NOT use this function as a layer developer but rather
284 : * use getOutputGrad().
285 : */
286 18006 : Tensor &RunLayerContext::getOutputGradUnsafe(unsigned int idx) {
287 18006 : return outputs[idx]->getGradientRef();
288 : }
289 :
290 : /**
291 : * @brief Get the incoming Derivative tensor object
292 : *
293 : * @param idx Identifier of the output
294 : * @return Tensor tensor to incoming derivative. If
295 : */
296 17368 : const Tensor RunLayerContext::getIncomingDerivative(unsigned int idx) const {
297 17368 : return getOutputGrad(idx);
298 : }
299 :
300 : /**
301 : * @brief Get the Input tensor object
302 : *
303 : * @param idx Identifier of the input
304 : * @return Tensor& Reference to the input grad tensor
305 : */
306 45671 : Tensor &RunLayerContext::getInput(unsigned int idx) {
307 45671 : return inputs[idx]->getVariableRef();
308 : }
309 :
310 22 : const Tensor &RunLayerContext::getInput(unsigned int idx) const {
311 22 : return inputs[idx]->getVariableRef();
312 : }
313 :
314 : /**
315 : * @brief Get the Input Grad tensor object
316 : *
317 : * @param idx Identifier of the input
318 : * @return Tensor& Reference to the input grad tensor
319 : */
320 13474 : Tensor &RunLayerContext::getInputGrad(unsigned int idx) {
321 13474 : if (!inputs[idx]->hasGradient()) {
322 : throw std::invalid_argument(
323 0 : "Requesting gradient for a non-trainable tensor.");
324 : }
325 :
326 13474 : return inputs[idx]->getGradientRef();
327 : }
328 :
329 : /**
330 : * @brief check if the input has gradient
331 : *
332 : * @param idx Identifier of the input
333 : * @return true if output has gradient, else false
334 : */
335 0 : bool RunLayerContext::inputHasGradient(unsigned int idx) const {
336 0 : return inputs[idx]->hasGradient();
337 : }
338 :
339 : /**
340 : * @brief Get the outgoing Derivative tensor object
341 : *
342 : * @param idx Identifier of the input
343 : * @return Tensor& Reference to the input derivative tensor
344 : */
345 12113 : Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
346 12113 : return getInputGrad(idx);
347 : }
348 :
349 : /**
350 : * @brief Get the Tensor object
351 : *
352 : * @param idx Identifier of the tensor
353 : * @return Tensor& Reference to the tensor
354 : */
355 7161 : Tensor &RunLayerContext::getTensor(unsigned int idx) {
356 7161 : return tensors[idx]->getVariableRef();
357 : }
358 :
359 : /**
360 : * @brief Get the Tensor object
361 : *
362 : * @param idx Identifier of the tensor
363 : * @return Tensor& Reference to the tensor
364 : */
365 2037 : const Tensor &RunLayerContext::getTensor(unsigned int idx) const {
366 2037 : return tensors[idx]->getVariableRef();
367 : }
368 :
369 : /**
370 : * @brief Get the Tensor Grad object
371 : *
372 : * @param idx Identifier of the tensor
373 : * @return Tensor& Reference to the tensor grad tensor
374 : */
375 2607 : Tensor &RunLayerContext::getTensorGrad(unsigned int idx) {
376 2607 : if (!tensors[idx]->hasGradient())
377 : throw std::invalid_argument(
378 0 : "Requesting gradient for a non-trainable tensor.");
379 2607 : return tensors[idx]->getGradientRef();
380 : }
381 :
382 : /**
383 : * @brief Get the Tensor Grad object
384 : *
385 : * @param idx Identifier of the tensor
386 : * @return Tensor& Reference to the tensor grad tensor
387 : */
388 1016 : const Tensor &RunLayerContext::getTensorGrad(unsigned int idx) const {
389 1016 : if (!tensors[idx]->hasGradient())
390 : throw std::invalid_argument(
391 0 : "Requesting gradient for a non-trainable tensor.");
392 1016 : return tensors[idx]->getGradientRef();
393 : }
394 :
395 : /**
396 : * @brief check if the tensor has gradient
397 : *
398 : * @param idx Identifier of the tensor
399 : * @return true if tensor has gradient, else false
400 : */
401 2037 : bool RunLayerContext::tensorHasGradient(unsigned int idx) const {
402 2037 : return tensors[idx]->hasGradient();
403 : }
404 :
405 0 : bool RunLayerContext::isWeightDependent(unsigned int idx) const {
406 0 : return weights[idx]->isDependent();
407 : }
408 :
409 16248 : bool RunLayerContext::isGradientFirstAccess(unsigned int idx) const {
410 16248 : return weights[idx]->isGradientFirstAccess();
411 : }
412 :
413 19490 : bool RunLayerContext::isGradientLastAccess(unsigned int idx) const {
414 19490 : return weights[idx]->isGradientLastAccess();
415 : }
416 :
417 16251 : bool RunLayerContext::isGradientClipByGlobalNorm(unsigned int idx) const {
418 16251 : return weights[idx]->isGradientClipByGlobalNorm();
419 : }
420 :
421 16191 : bool RunLayerContext::isMixedPrecision(unsigned int idx) const {
422 16191 : return weights[idx]->isMixedPrecision();
423 : }
424 :
425 : /**
426 : * @brief Get the tensor name
427 : *
428 : * @param idx Identifier of the tensor
429 : * @return name of the tensor
430 : */
431 0 : const std::string &RunLayerContext::getTensorName(unsigned int idx) const {
432 0 : return tensors[idx]->getName();
433 : }
434 :
435 : /**
436 : * @brief Get the number of Outputs tensor objects
437 : *
438 : * @return unsigned int number of output tensors
439 : */
440 24058 : unsigned int RunLayerContext::getNumOutputs() const { return outputs.size(); }
441 :
442 : /**
443 : * @brief Get the number of inputs tensor objects
444 : *
445 : * @return unsigned int number of input tensors
446 : */
447 10281 : unsigned int RunLayerContext::getNumInputs() const { return inputs.size(); }
448 :
449 : /**
450 : * @brief Get the number of weights tensor objects
451 : *
452 : * @return unsigned int number of weight tensors
453 : */
454 130129 : unsigned int RunLayerContext::getNumWeights() const { return weights.size(); }
455 :
456 : /**
457 : * @brief Get the number of requested tensors objects
458 : *
459 : * @return unsigned int number of requested tensors
460 : */
461 5099 : unsigned int RunLayerContext::getNumTensors() const { return tensors.size(); }
462 :
463 : /**
464 : * @brief Set the batch for the run context
465 : *
466 : * @param batch Update batch size
467 : */
468 0 : void RunLayerContext::setBatch(unsigned int batch) {
469 0 : for (auto &vg : inputs)
470 0 : vg->setBatchSize(batch);
471 0 : for (auto &vg : outputs)
472 0 : vg->setBatchSize(batch);
473 0 : }
474 :
475 : /**
476 : * @brief Update the dimensions for a requested tensor
477 : *
478 : * @param idx index of the tensor (identifier)
479 : * @param batch Updated batch size
480 : */
481 1938 : void RunLayerContext::updateTensor(unsigned int idx, unsigned int batch) {
482 1938 : tensors[idx]->setBatchSize(batch);
483 1938 : }
484 :
485 0 : void RunLayerContext::updateInput(unsigned int idx, TensorDim dimension) {
486 0 : inputs[idx]->updateDimension(dimension);
487 0 : }
488 :
489 0 : void RunLayerContext::updateOutput(unsigned int idx, TensorDim dimension) {
490 0 : outputs[idx]->updateDimension(dimension);
491 0 : }
492 :
493 0 : void RunLayerContext::updateTensor(unsigned int idx, TensorDim dimension) {
494 0 : tensors[idx]->updateDimension(dimension);
495 0 : }
496 :
497 : /**
498 : * @brief Get weight object for the weights
499 : *
500 : * @param idx index of the weight (identifier)
501 : * @return weight object
502 : */
503 23647 : Weight &RunLayerContext::getWeightObject(unsigned int idx) {
504 23647 : return *weights[idx];
505 : }
506 :
507 : /**
508 : * @brief check if the label is available
509 : *
510 : * @param idx Identifier of the input
511 : * @return true if label is available else false
512 : */
513 12910 : bool RunLayerContext::isLabelAvailable(unsigned int idx) const {
514 12910 : return outputs[idx]->getGradientRef().isAllocated();
515 : }
516 :
517 : /**
518 : * @brief Get label tensor
519 : *
520 : * @param idx Identifier of the input
521 : * @return Tensor& Reference to the label tensor
522 : */
523 6342 : Tensor &RunLayerContext::getLabel(unsigned int idx) {
524 6342 : if (isLabelAvailable(idx))
525 6342 : return outputs[idx]->getGradientRef();
526 : else {
527 0 : std::stringstream ss;
528 : ss << "Requesting label of index: " << idx << "for " << getName()
529 0 : << " does not exist";
530 0 : throw std::invalid_argument(ss.str().c_str());
531 0 : }
532 : }
533 :
534 : /**
535 : * @brief check if run context is set and is ready to use
536 : *
537 : * @return true if ready, else false
538 : */
539 4541 : bool RunLayerContext::readyToUse() const {
540 : /**
541 : * assumption:
542 : * 1. there must be at least 1 input
543 : * 2. the setter set everything at once
544 : */
545 4541 : if (inputs.empty())
546 : return false;
547 4541 : return !inputs[0]->getVariable().empty();
548 : }
549 :
550 : /**
551 : * @brief validates the run context after run
552 : *
553 : * @return true if ready, else false
554 : */
555 4541 : bool RunLayerContext::validate(bool skip_input, bool skip_label) {
556 : /**
557 : * @note a common mistake when using run_context is re-assigning the tensor
558 : * references which leads to nasty bugs. This validation ensures that the
559 : * tensors are not set mistakenly by verifying their unique names
560 : */
561 : bool ret = true;
562 : #ifdef DEBUG
563 : std::function<bool(const Var_Grad *, bool)> matcher;
564 :
565 : if (tensor_map.empty() || !tensor_map[inputs[0]->getName()]) {
566 : auto filler = [this](const auto &vec) {
567 : for (auto const &val : vec) {
568 : if (val->getVariableRef().getTensorType().data_type ==
569 : TensorDim::DataType::FP32) {
570 : tensor_map[val->getName()] = val->getVariableRef().getData();
571 : tensor_map[val->getGradientName()] = val->getGradientRef().getData();
572 : } else if (val->getVariableRef().getTensorType().data_type ==
573 : TensorDim::DataType::FP16) {
574 : #ifdef ENABLE_FP16
575 : tensor_map[val->getName()] =
576 : val->getVariableRef().template getData<_FP16>();
577 : tensor_map[val->getGradientName()] =
578 : val->getGradientRef().template getData<_FP16>();
579 : #else
580 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
581 : #endif
582 : }
583 : }
584 : };
585 :
586 : /** fill the tensor map for the first validation */
587 : filler(weights);
588 : filler(inputs);
589 : filler(outputs);
590 : filler(tensors);
591 : }
592 :
593 : matcher = [this](const Var_Grad *val, bool skip_grad) -> bool {
594 : if (val->getName().empty() ||
595 : (val->hasGradient() && val->getGradientName().empty()))
596 : return false;
597 :
598 : if (tensor_map.find(val->getName()) == tensor_map.end())
599 : /**
600 : * Disabled because of in-place input layer. Enable this later.
601 : * tensor_map[val->getName()] != val->getVariableRef().getData())
602 : */
603 : return false;
604 :
605 : if (skip_grad &&
606 : (tensor_map.find(val->getGradientName()) == tensor_map.end()))
607 : return false;
608 :
609 : return true;
610 : };
611 :
612 : auto matcher_w = [this, &matcher](const std::vector<Weight *> &vec) {
613 : return std::all_of(vec.begin(), vec.end(),
614 : std::bind(matcher, std::placeholders::_1, false));
615 : };
616 :
617 : auto matcher_vw = [this, &matcher](const std::vector<Var_Grad *> &vec,
618 : bool skip_grad = false) {
619 : return std::all_of(vec.begin(), vec.end(),
620 : std::bind(matcher, std::placeholders::_1, skip_grad));
621 : };
622 :
623 : /** match the tensor map from the next validations */
624 : ret =
625 : matcher_w(weights) & matcher_vw(tensors) & matcher_vw(outputs, skip_label);
626 : if (!skip_input)
627 : ret &= matcher_vw(inputs);
628 : #endif
629 :
630 4541 : return ret;
631 : }
632 :
633 : } // namespace nntrainer
|