Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file layer_context.h
6 : * @date 10 June 2021
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 : * @author Debadri Samaddar <s.debadri@samsung.com>
10 : * @author Niket Agarwal <niket.a@samsung.com>
11 : * @bug No known bugs except for NYI items
12 : * @brief This is the layer context for each layer
13 : */
14 :
15 : #ifndef __LAYER_CONTEXT_H__
16 : #define __LAYER_CONTEXT_H__
17 :
18 : #include <memory>
19 : #include <vector>
20 :
21 : #include <common_properties.h>
22 : #include <layer.h>
23 : #include <tensor.h>
24 : #include <tensor_dim.h>
25 : #include <tensor_wrap_specs.h>
26 : #include <weight.h>
27 :
28 : namespace nntrainer {
29 :
30 : class Var_Grad;
31 : class ContextData;
32 :
33 : /**
34 : * @class Layer Context class for all layers
35 : * @brief Class for Layer context
36 : *
37 : * @details This provides for the layer initialization. This context will not
38 : * contain any structures which allow allocation of memory or support to
39 : * allocate any new memory, but rather only support storing specifications based
40 : * on which memory will be allocated later.
41 : */
42 : class InitLayerContext {
43 : public:
44 : /**
45 : * @brief Construct a new Init Layer Context object
46 : *
47 : * @param dim Input dimensions for the layer
48 : * @param req_out_connected bool vector to tell if requested output is
49 : * trainable or not
50 : * @param is_inplace_ true if the context is inplacable
51 : * @param name name
52 : * @param prefix_ prefix
53 : * @param max_norm max norm
54 : * @param tensor_type array including tensor format and weight, activation
55 : * type.
56 : * @param loss_scale loss scale value for mixed precision training
57 : * @param mode execution mode.
58 : */
59 : InitLayerContext(
60 : const std::vector<TensorDim> &dim,
61 : const std::vector<bool> &req_out_connected, bool is_inplace_,
62 : const std::string &n = "", const std::string &prefix_ = "",
63 : const float max_norm = 0.0,
64 : std::array<std::string, 3> tensor_type_ = {"NCHW", "FP32", "FP32"},
65 : const float loss_scale = 1.0,
66 : ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN,
67 : ml::train::LayerComputeEngine engine = ml::train::LayerComputeEngine::CPU);
68 : /**
69 : * @brief get Tensor Format of Layer
70 : *
71 : * @return Tensor Format of the layer
72 : */
73 : TensorDim::Format getFormat() {
74 : return str_converter<enum_class_prop_tag, nntrainer::TensorFormatInfo>::
75 3412 : from_string(tensor_type[0]);
76 : };
77 :
78 : /**
79 : * @brief get Tensor DataType of the Weight
80 : *
81 : * @return Tensor DataType of the the Weight
82 : */
83 : TensorDim::DataType getWeightDataType() {
84 : return str_converter<enum_class_prop_tag, nntrainer::TensorDataTypeInfo>::
85 11806 : from_string(tensor_type[1]);
86 : };
87 :
88 : /**
89 : * @brief get Tensor DataType of the Activation
90 : *
91 : * @return Tensor DataType of the the Activation
92 : */
93 : TensorDim::DataType getActivationDataType() {
94 : return str_converter<enum_class_prop_tag, nntrainer::TensorDataTypeInfo>::
95 14719 : from_string(tensor_type[2]);
96 : };
97 :
98 : /**
99 : * @brief get Layer Compute Engine Type
100 : *
101 : * @return Engine Engine Type
102 : */
103 0 : ml::train::LayerComputeEngine getComputeEngineType() { return engine; };
104 :
105 : /**
106 : * @brief get name by the layer
107 : *
108 : * @return name of the layer
109 : */
110 2 : const std::string &getName() const { return name; }
111 :
112 : /**
113 : * @brief get Execution Mode
114 : *
115 : * @return Mode Execution Mode : ml::train::ExecutionMode::INFERNECE |
116 : * ml::train::ExecutionMode::TRAIN
117 : */
118 : const ml::train::ExecutionMode &getExecutionMode() const { return mode; }
119 :
120 : /**
121 : * @brief Get the number of inputs for the layer
122 : *
123 : * @return unsigned int number of inputs
124 : */
125 2326 : unsigned int getNumInputs() const { return input_dim.size(); }
126 :
127 : /**
128 : * @brief Get the number of requested outputs for the layer
129 : *
130 : * @return unsigned int number of requested outputs
131 : */
132 : unsigned int getNumRequestedOutputs() const;
133 :
134 : /**
135 : * @brief Get the Input Dimensions object
136 : *
137 : * @return const std::vector<TensorDim>& Input dimensions
138 : */
139 1869 : const std::vector<TensorDim> &getInputDimensions() const { return input_dim; }
140 :
141 : /**
142 : * @brief Retrieves the data type of input tensor at the given index
143 : *
144 : * @return The data type of the input tensor
145 : */
146 : const TensorDim::DataType getInputDataType(int idx) const {
147 : return input_dim[idx].getDataType();
148 : }
149 :
150 : /**
151 : * @brief Get the Mutable Input Dimensions object
152 : *
153 : * @return std::vector<TensorDim>& Input dimensions
154 : */
155 : std::vector<TensorDim> &getMutableInputDimensions() { return input_dim; }
156 :
157 : /**
158 : * @brief Set Data Type for Input Dimensions
159 : *
160 : * @param ty data type to set
161 : */
162 : void setInputDataType(TensorDim::DataType ty) {
163 : for (auto &d : input_dim)
164 : d.setDataType(ty);
165 : }
166 :
167 : /**
168 : * @brief Set the Dim Flag to retrieve effective dimension
169 : *
170 : * @param dim_flag_ dimension bit to calculate, rightmost is width
171 : */
172 : void
173 : setEffDimFlagInputDimension(unsigned int idx,
174 : const std::bitset<TensorDim::MAXDIM> &dim_flag_) {
175 654 : input_dim[idx].setEffDimFlag(dim_flag_);
176 652 : }
177 :
178 : /**
179 : * @brief Set the dynamic Dim Flag to retrieve dynamic dimension (that can
180 : * change during running)
181 : *
182 : * @param dim_flag_ dimension bit to calculate, rightmost is width
183 : */
184 : void
185 : setDynDimFlagInputDimension(unsigned int idx,
186 : const std::bitset<TensorDim::MAXDIM> &dim_flag_) {
187 654 : input_dim[idx].setDynDimFlag(dim_flag_);
188 652 : }
189 :
190 : /**
191 : * @brief Set the Output Dimensions object
192 : *
193 : * @param out_dim the output dimension to set to
194 : */
195 : void setOutputDimensions(const std::vector<TensorDim> &out_dim);
196 :
197 : /**
198 : * @brief Request a new weight for the layer
199 : *
200 : * @param dim dimension of Variable of the weight
201 : * @param init initializer for the weight
202 : * @param reg regularizer for the weight
203 : * @param reg_const regularization constant for the weight
204 : * @param name name of the weight
205 : * @param trainable if the weight is trainable (require gradient or not)
206 : * @param is_virtual if the weight is virtual (not allocate)
207 : * @return unsigned int index of the weight for its getter
208 : *
209 : * @todo Consider providing a guarantee that the returned indices will always
210 : * start from 0 and will always be incremental.
211 : */
212 5232 : unsigned int requestWeight(const TensorDim &dim, const Initializer init,
213 : const WeightRegularizer reg, const float reg_const,
214 : const float decay, const std::string &name,
215 : bool trainable = true, bool is_virtual = false,
216 : unsigned int out_axis = 3) {
217 :
218 : /** @note : We assumes the gradient type is same with Activation data
219 : * type.*/
220 5232 : TensorDim dim_g(dim);
221 :
222 : dim_g.setDataType(getActivationDataType());
223 :
224 5232 : weights_spec.emplace_back(
225 5232 : dim, dim_g, init, reg, reg_const, decay, clip_by_global_norm, trainable,
226 5232 : prefix + ":" + name, out_axis, loss_scale,
227 10464 : (getWeightDataType() != ml::train::TensorDim::DataType::FP32),
228 : is_virtual);
229 5232 : return weights_spec.size() - 1;
230 : }
231 :
232 : /**
233 : * @brief Request a new weight for the layer
234 : *
235 : * @param dim dimension of Variable of the weight
236 : * @param dim_g dimension of Gradient of the weight
237 : * @param init initializer for the weight
238 : * @param reg regularizer for the weight
239 : * @param reg_const regularization constant for the weight
240 : * @param name name of the weight
241 : * @param trainable if the weight is trainable (require gradient or not)
242 : * @return unsigned int index of the weight for its getter
243 : *
244 : * @todo Consider providing a guarantee that the returned indices will always
245 : * start from 0 and will always be incremental.
246 : */
247 108 : unsigned int requestWeight(const TensorDim &dim, const TensorDim &dim_g,
248 : const Initializer init,
249 : const WeightRegularizer reg, const float reg_const,
250 : const float decay, const std::string &name,
251 : bool trainable = true, unsigned int out_axis = 3,
252 : bool is_virtual = false) {
253 :
254 : /** @note : We assumes the gradient type is same with Activation data
255 : * type.*/
256 108 : weights_spec.emplace_back(
257 108 : dim, dim_g, init, reg, reg_const, decay, clip_by_global_norm, trainable,
258 108 : prefix + ":" + name, out_axis, loss_scale,
259 216 : (getWeightDataType() != ml::train::TensorDim::DataType::FP32),
260 : is_virtual);
261 108 : return weights_spec.size() - 1;
262 : }
263 :
264 : /**
265 : * @brief Request a new weight for the layer
266 : *
267 : * @param spec tensor spec
268 : * @return unsigned int index of the weight for its getter
269 : *
270 : * @todo Consider providing a guarantee that the returned indices will always
271 : * start from 0 and will always be incremental.
272 : */
273 : unsigned int requestWeight(const WeightSpec &spec) {
274 2 : weights_spec.emplace_back(spec);
275 : return weights_spec.size() - 1;
276 : }
277 :
278 : /**
279 : * @brief Request a new tensor for the layer
280 : *
281 : * @param dim dimension of the tensor
282 : * @param trainable if the tensor is trainable (require gradient or not)
283 : * @param name name of the tensor
284 : * @param lifespan lifespan of the tensor
285 : * @param private_ if custom tensor should not be shared, and only for soleuse
286 : * @return unsigned int index of the tensor for its getter
287 : *
288 : * @todo Consider providing a guarantee that the returned indices will always
289 : * start from 0 and will always be incremental.
290 : */
291 3013 : unsigned int requestTensor(
292 : const TensorDim &dim, const std::string &name,
293 : const Initializer init = Initializer::NONE, bool trainable = false,
294 : TensorLifespan lifespan = TensorLifespan::ITERATION_LIFESPAN,
295 : bool private_ = true,
296 : ml::train::LayerComputeEngine engine = ml::train::LayerComputeEngine::CPU) {
297 3013 : const auto &prefix_ = private_ ? this->name : this->prefix;
298 6026 : tensors_spec.emplace_back(dim, init, trainable, prefix_ + ":" + name,
299 : lifespan, engine);
300 3013 : return tensors_spec.size() - 1;
301 : }
302 :
303 : /**
304 : * @brief Specification of the tensors
305 : *
306 : */
307 : typedef VarGradSpec TensorSpec;
308 :
309 : /**
310 : * @brief Request a new tensor for the layer
311 : *
312 : * @param spec tensor spec
313 : * @return unsigned int index of the tensor for its getter
314 : *
315 : * @todo Consider providing a guarantee that the returned indices will always
316 : * start from 0 and will always be incremental.
317 : */
318 : unsigned int requestTensor(const TensorSpec &spec) {
319 0 : tensors_spec.emplace_back(spec);
320 : return tensors_spec.size() - 1;
321 : }
322 :
323 : /**
324 : * @brief Get the current weights spec
325 : *
326 : * @return The current weights spec
327 : */
328 136 : const std::vector<WeightSpec> &getWeightsSpec() const { return weights_spec; }
329 :
330 : /**
331 : * @brief Get the number of requested weights
332 : *
333 : * @return The current number of requested weights
334 : */
335 : unsigned int getNumWeights() const { return weights_spec.size(); }
336 :
337 : /**
338 : * @brief Get the current tensors spec
339 : *
340 : * @return The current tensors spec
341 : */
342 136 : const std::vector<TensorSpec> &getTensorsSpec() const { return tensors_spec; }
343 :
344 : /**
345 : * @brief Get the number of requested tensors objects
346 : *
347 : * @return unsigned int number of requested tensors
348 : */
349 : unsigned int getNumTensors() const { return tensors_spec.size(); }
350 :
351 : /**
352 : * @brief create var grad specification with output default
353 : *
354 : * @param dim dimension
355 : * @param name name
356 : * @param ls variable lifespan
357 : * @param grad_ls gradient lifespan
358 : * @return VarGradSpecV2 var grad specification
359 : */
360 : static VarGradSpecV2
361 : outSpec(const TensorDim &dim, const std::string &name = "out",
362 : TensorLifespan ls = TensorLifespan::FORWARD_FUNC_LIFESPAN,
363 : TensorLifespan grad_ls = TensorLifespan::CALC_GRAD_DERIV_LIFESPAN);
364 :
365 : /**
366 : * @brief request outputs
367 : *
368 : * @param out_specs pack of out specification, name will be automatically
369 : * indexed to prevent name clash
370 : */
371 : void requestOutputs(std::vector<VarGradSpecV2> &&out_specs);
372 :
373 : /**
374 : * @brief Get the Out Specs object
375 : *
376 : * @return std::vector<VarGradSpecV2> out specification
377 : */
378 : const std::vector<VarGradSpecV2> &getOutSpecs() const;
379 :
380 : /**
381 : * @brief Validate the context
382 : *
383 : * @return true if validated, else false
384 : * @note this must be called before passing a context to a layer for finalize
385 : */
386 5050 : bool validate() {
387 5050 : if (input_dim.empty()) {
388 : return false;
389 : }
390 :
391 11412 : for (auto const &dim : input_dim) {
392 6362 : if (dim.getDataLen() == 0) {
393 : return false;
394 : }
395 : }
396 :
397 5050 : if (name.empty()) {
398 : return false;
399 : }
400 :
401 : return true;
402 : }
403 :
404 : /**
405 : * @brief check if the layer is expected to run in-place
406 : *
407 : * @return true if in-place, else false
408 : */
409 434 : bool getInPlace() const { return is_inplace; }
410 :
411 : /**
412 : * @brief get Initial value of Loss_Scale. This is set to RunLayerContext
413 : * and updated
414 : *
415 : * @return loss_scale
416 : */
417 4435 : float getLossScale() const { return loss_scale; }
418 :
419 : /**
420 : * @brief get Mixed Precision Training. If the weight is not the FP32, then
421 : * it is mixed training.
422 : *
423 : * @return true if it is mixed training
424 : */
425 : bool isMixedTraining() { return istrequal(tensor_type[1], "FP32"); }
426 :
427 : private:
428 : std::vector<TensorDim> input_dim; /**< Input dimensions for the layer */
429 : bool is_inplace; /**< if the layer is expected to run in-place */
430 : float clip_by_global_norm; /**< max norm value for clip by norm */
431 :
432 : std::vector<VarGradSpecV2> output_specs; /**< Specification for the output */
433 : std::vector<WeightSpec> weights_spec; /**< Specification for the weights */
434 : std::vector<TensorSpec>
435 : tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
436 : variables) */
437 :
438 : std::vector<bool> req_out_is_connected;
439 : /**< a bool vector to tell if requested out is actually connected to others */
440 : std::string name; /**< name of the layer */
441 : std::string prefix; /**< prefix of the layer */
442 : std::array<std::string, 3> tensor_type;
443 : float loss_scale; /**< loss_scale value */
444 : ml::train::ExecutionMode mode;
445 : ml::train::LayerComputeEngine engine;
446 : };
447 :
448 : /**
449 : * @class Layer Context class for all layers
450 : * @brief Class for Layer context
451 : *
452 : * @details This provides for the layer executing. This context will contain
453 : * structures with memory allocated or support to allocate any new memory, but
454 : * rather only support storing specifications based on which memory will be
455 : * allocated later.
456 : *
457 : * @todo Check the caller of the getTensor() and set restrictions on the tensors
458 : * to be accessed based on which function is requesting it.
459 : */
460 : class RunLayerContext {
461 : public:
462 : /**
463 : * @brief Construct a new Run Layer Context object
464 : *
465 : */
466 : RunLayerContext() :
467 : loss(0.0), is_inplace(false), loss_scale(1.0), restoreData(false) {}
468 :
469 : /**
470 : * @brief Construct a new Run Layer Context object
471 : *
472 : */
473 : RunLayerContext(const std::string &name, bool is_inplace_) :
474 : RunLayerContext() {
475 : is_inplace = is_inplace_;
476 : std::get<props::Name>(props).set(name);
477 : }
478 :
479 : /**
480 : * @brief Construct a new Run Layer Context object
481 : *
482 : */
483 : RunLayerContext(const std::string &name, bool is_inplace_,
484 : float loss_scale_) :
485 : RunLayerContext() {
486 : is_inplace = is_inplace_;
487 : std::get<props::Name>(props).set(name);
488 : loss_scale = loss_scale_;
489 : }
490 :
491 : /**
492 : * @brief Construct a new Run Layer Context object
493 : *
494 : * @param name name of the layer
495 : * @param trainable if the layer is trainable
496 : * @param l loss of the layer
497 : * @param is_inplace_ execution in-place of the layer
498 : * @param loss_scale loss_scale of the layer
499 : * @param w weights of the layer
500 : * @param in inputs of the layer
501 : * @param out outputs of the layer
502 : * @param t extra tensors of the layer
503 : */
504 : RunLayerContext(const std::string &name, bool trainable, float l,
505 : bool is_inplace_, float loss_scale_,
506 : std::shared_ptr<ContextData> ct_data, bool restoreData_,
507 : const std::vector<Weight *> &w,
508 : const std::vector<Var_Grad *> &in,
509 : const std::vector<Var_Grad *> &out,
510 : const std::vector<Var_Grad *> &t);
511 :
512 : /**
513 : * @brief Get the Weight tensor object
514 : *
515 : * @param w out tensor
516 : * @param idx Identifier of the weight
517 : * @return Tensor& Reference to the weight tensor
518 : */
519 : void getWeight(Tensor &w, unsigned int idx) {
520 : Tensor &t_w = weights[idx]->getVariableRef();
521 :
522 : if (t_w.getDataType() == Tdatatype::FP32 ||
523 : t_w.getDataType() == Tdatatype::FP16 ||
524 : t_w.getDataType() == Tdatatype::BCQ ||
525 : t_w.getDataType() == Tdatatype::Q4_K) {
526 : w = t_w;
527 : return;
528 : }
529 :
530 : unsigned int base_idx = 0;
531 : Tdatatype o_t = getOutput(base_idx).getDataType();
532 :
533 : if (w.empty()) {
534 : TensorDim d = t_w.getDim();
535 : d.setDataType(o_t);
536 : w = Tensor(d, true);
537 : }
538 :
539 : return;
540 : }
541 :
542 : /**
543 : * @brief Get the Weight tensor object
544 : *
545 : * @param idx Identifier of the weight
546 : * @return Tensor& Reference to the weight tensor
547 : */
548 : Tensor &getWeight(unsigned int idx) const;
549 :
550 : /**
551 : * @brief Get the Weight Gradient tensor object
552 : *
553 : * @note this method returns the fresh gradient to be filled
554 : * @param idx Identifier of the weight
555 : * @return Tensor& Reference to the weight grad tensor
556 : */
557 : Tensor &getWeightGrad(unsigned int idx) const;
558 :
559 : /**
560 : * @brief Get the Weight Gradient tensor object
561 : *
562 : * @param idx Identifier of the weight
563 : * @return Tensor& Reference to the weight grad tensor
564 : */
565 : Tensor &getWeightFP32(unsigned int idx) const;
566 :
567 : /**
568 :
569 : * @brief Get the Weight Optimizer Variable tensor object
570 : *
571 : * @param idx Identifier of the weight
572 : * @param jdx Identifier of the weight optimizer variable
573 : * @return Tensor& Reference to the weight grad tensor
574 : */
575 : Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
576 :
577 : /**
578 : * @brief Get the Weight name
579 : *
580 : * @param idx Identifier of the weight
581 : * @return name of the weight
582 : */
583 : const std::string &getWeightName(unsigned int idx) const;
584 :
585 : /**
586 : * @brief check if the weight has gradient
587 : *
588 : * @param idx Identifier of the weight
589 : * @return true if weight has gradient, else false
590 : */
591 : bool weightHasGradient(unsigned int idx) const;
592 :
593 : /**
594 : * @brief Get the Output tensor object
595 : *
596 : * @param idx Identifier of the output
597 : * @return Tensor& Reference to the output tensor
598 : */
599 : Tensor &getOutput(unsigned int idx);
600 :
601 : /**
602 : * @brief Get the Output tensor object
603 : *
604 : * @param idx Identifier of the output
605 : * @return Tensor& Reference to the output tensor
606 : */
607 : const Tensor &getOutput(unsigned int idx) const;
608 :
609 : /**
610 : * @brief Get the Output Grad tensor object
611 : *
612 : * @param idx Identifier of the output
613 : * @return Read-only output grad tensor, if derivative does not have
614 : * gradient, return a temporary, initialized to zero
615 : */
616 : const Tensor getOutputGrad(unsigned int idx) const;
617 :
618 : /**
619 : * @brief Get the Output Grad tensor object
620 : *
621 : * @param idx Identifier of the output
622 : * @return Tensor& Reference to the output grad tensor, this is valid only if
623 : * the given output is trainable
624 : *
625 : * @note recommended to NOT use this function as a layer developer but rather
626 : * use getOutputGrad().
627 : */
628 : Tensor &getOutputGradUnsafe(unsigned int idx);
629 :
630 : /**
631 : * @brief check if the weight has gradient
632 : *
633 : * @param idx Identifier of the weight
634 : * @return true if weight has gradient, else false
635 : */
636 : bool outputHasGradient(unsigned int idx) const;
637 :
638 : /**
639 : * @brief Get the incoming Derivative tensor object
640 : *
641 : * @param idx Identifier of the output
642 : * @return Tensor output derivative tensor, if derivative does not have
643 : * gradient, return a temporary, initialized to zero
644 : */
645 : const Tensor getIncomingDerivative(unsigned int idx) const;
646 :
647 : /**
648 : * @brief Get the Input tensor object
649 : *
650 : * @param idx Identifier of the input
651 : * @return Tensor& Reference to the input grad tensor
652 : */
653 : Tensor &getInput(unsigned int idx);
654 :
655 : /**
656 : * @brief Get the Input tensor object
657 : *
658 : * @param idx Identifier of the input
659 : * @return Tensor& Reference to the input grad tensor
660 : */
661 : const Tensor &getInput(unsigned int idx) const;
662 :
663 : /**
664 : * @brief Get the Input Grad tensor object
665 : *
666 : * @param idx Identifier of the input
667 : * @return Tensor& Reference to the input grad tensor
668 : */
669 : Tensor &getInputGrad(unsigned int idx);
670 :
671 : /**
672 : * @brief check if the weight has gradient
673 : *
674 : * @param idx Identifier of the weight
675 : * @return true if weight has gradient, else false
676 : */
677 : bool inputHasGradient(unsigned int idx) const;
678 :
679 : /**
680 : * @brief Get the outgoing Derivative tensor object
681 : *
682 : * @param idx Identifier of the input
683 : * @return Tensor& Reference to the input derivative tensor
684 : */
685 : Tensor &getOutgoingDerivative(unsigned int idx);
686 :
687 : /**
688 : * @brief Get the Tensor object
689 : *
690 : * @param idx Identifier of the tensor
691 : * @return Tensor& Reference to the tensor
692 : */
693 : Tensor &getTensor(unsigned int idx);
694 :
695 : /**
696 : * @brief Get the Tensor object
697 : *
698 : * @param idx Identifier of the tensor
699 : * @return Tensor& Reference to the tensor
700 : */
701 : const Tensor &getTensor(unsigned int idx) const;
702 :
703 : /**
704 : * @brief Get the Tensor Grad object
705 : *
706 : * @param idx Identifier of the tensor
707 : * @return Tensor& Reference to the tensor grad tensor
708 : */
709 : Tensor &getTensorGrad(unsigned int idx);
710 :
711 : /**
712 : * @brief Get the Tensor Grad object
713 : *
714 : * @param idx Identifier of the tensor
715 : * @return Tensor& Reference to the tensor grad tensor
716 : */
717 : const Tensor &getTensorGrad(unsigned int idx) const;
718 :
719 : /**
720 : * @brief check if the tensor has gradient
721 : *
722 : * @param idx Identifier of the tensor
723 : * @return true if tensor has gradient, else false
724 : */
725 : bool tensorHasGradient(unsigned int idx) const;
726 :
727 : /**
728 : * @brief check if the weight is burrowed from others so it is dependent
729 : *
730 : * @param idx index
731 : * @return bool true if weight is burrowed from outside
732 : */
733 : bool isWeightDependent(unsigned int idx) const;
734 :
735 : /**
736 : * @brief check current gradient is first access
737 : * @note for now, it equivalent to weight last access, so this value is
738 : * accessible for non-trainable weights as well. This is in terms of execution
739 : * order.
740 : *
741 : * @param idx index
742 : * @return bool true if first access
743 : */
744 : bool isGradientFirstAccess(unsigned int idx) const;
745 :
746 : /**
747 : * @brief check current gradient is last access
748 : * @note for now, it equivalent to weight last access, so this value is
749 : * accessible for non-trainable weights as well. This is in terms of execution
750 : * order.
751 : *
752 : * @param idx index
753 : * @return bool true if last access
754 : */
755 : bool isGradientLastAccess(unsigned int idx) const;
756 :
757 : /**
758 : * @brief check if the gradient is to be clipped by global norm
759 : *
760 : * @param idx index
761 : * @return bool true if it is to be clipped else false
762 : */
763 : bool isGradientClipByGlobalNorm(unsigned int idx) const;
764 :
765 : /**
766 : * @brief check if the weight is mixed precsion
767 : *
768 : * @param idx index
769 : * @return bool true if it is mixed precision
770 : */
771 : bool isMixedPrecision(unsigned int idx) const;
772 :
773 : /**
774 : * @brief Get the tensor name
775 : *
776 : * @param idx Identifier of the tensor
777 : * @return name of the tensor
778 : */
779 : const std::string &getTensorName(unsigned int idx) const;
780 :
781 : /**
782 : * @brief Get the number of Outputs tensor objects
783 : *
784 : * @return unsigned int number of output tensors
785 : */
786 : unsigned int getNumOutputs() const;
787 :
788 : /**
789 : * @brief Get the number of inputs tensor objects
790 : *
791 : * @return unsigned int number of input tensors
792 : */
793 : unsigned int getNumInputs() const;
794 :
795 : /**
796 : * @brief Get the number of weights tensor objects
797 : *
798 : * @return unsigned int number of weight tensors
799 : */
800 : unsigned int getNumWeights() const;
801 :
802 : /**
803 : * @brief Get the Number of Weight Optimizer Variable tensor object
804 : *
805 : * @param idx Identifier of the weight
806 : * @return unsigned int Number of the weight optimizer variable
807 : */
808 : unsigned int getNumWeightOptVar(unsigned int idx) const;
809 :
810 : /**
811 : * @brief Get the number of requested tensors objects
812 : *
813 : * @return unsigned int number of requested tensors
814 : */
815 : unsigned int getNumTensors() const;
816 : /**
817 : * @brief Set the batch for the run context
818 : *
819 : * @param batch Update batch size
820 : */
821 : void setBatch(unsigned int batch);
822 :
823 : /**
824 : * @brief Update the dimensions for a requested tensor
825 : *
826 : * @param idx index of the tensor (identifier)
827 : * @param batch Updated batch size
828 : */
829 : void updateTensor(unsigned int idx, unsigned int batch);
830 :
831 : /**
832 : * @brief Update the dimensions for a requested input
833 : *
834 : * @param idx index of the input (identifier)
835 : * @param dim dimension to be updated
836 : */
837 : void updateInput(unsigned int idx, TensorDim dim);
838 :
839 : /**
840 : * @brief Update the dimensions for a requested output
841 : *
842 : * @param idx index of the output (identifier)
843 : * @param dim dimension to be updated
844 : */
845 : void updateOutput(unsigned int idx, TensorDim dim);
846 :
847 : /**
848 : * @brief Update the dimensions for a requested tensor
849 : *
850 : * @param idx index of the tensor (identifier)
851 : * @param dim dimension to be updated
852 : */
853 : void updateTensor(unsigned int idx, TensorDim dim);
854 :
855 : /**
856 : * @brief Get weight object for the weights
857 : *
858 : * @param idx index of the weight (identifier)
859 : * @return weight object
860 : */
861 : Weight &getWeightObject(unsigned int idx);
862 :
863 : /**
864 : * @brief check if the label is available
865 : *
866 : * @param idx Identifier of the input
867 : * @return true if label is available else false
868 : */
869 : bool isLabelAvailable(unsigned int idx) const;
870 :
871 : /**
872 : * @brief Get label tensor
873 : *
874 : * @param idx Identifier of the input
875 : * @return Tensor& Reference to the label tensor
876 : */
877 : Tensor &getLabel(unsigned int idx);
878 :
879 : /**
880 : * @brief update loss by the layer
881 : *
882 : * @param val updated loss value
883 : * @note loss value is only used for loss layers. For non-loss layers, setting
884 : * this value will have no change on the behavior of the model.
885 : */
886 6141 : void setLoss(float val) { loss = val; }
887 :
888 : /**
889 : * @brief update loss by the layer
890 : *
891 : * @return loss of the layer
892 : * @note does not includes the regularization loss.
893 : */
894 6844 : float getLoss() const { return loss; }
895 :
896 : /**
897 : * @brief get regularization loss of the layer
898 : *
899 : * @return regularization loss of the layer
900 : */
901 27303 : float getRegularizationLoss() const {
902 : float loss_ = 0;
903 48848 : for (unsigned int idx = 0; idx < getNumWeights(); idx++) {
904 21545 : loss_ += getWeightRegularizationLoss(idx);
905 : }
906 27303 : return loss_;
907 : }
908 :
909 : std::shared_ptr<ContextData> getContextData() { return ct_data; }
910 :
911 : /**
912 : * @brief get name by the layer
913 : *
914 : * @return name of the layer
915 : */
916 0 : const std::string &getName() const { return std::get<props::Name>(props); }
917 :
918 : /**
919 : * @brief get trainable by the layer
920 : *
921 : * @return trainable of the layer
922 : */
923 250 : bool getTrainable() const { return std::get<props::Trainable>(props); }
924 :
925 : /**
926 : * @brief check if run context is set and is ready to use
927 : *
928 : * @return true if ready, else false
929 : */
930 : bool readyToUse() const;
931 :
932 : /**
933 : * @brief validates the run context after run
934 : *
935 : * @param skip_input skip verifying the input
936 : * @param skip_label skip verifying the label
937 : *
938 : * @return true if ready, else false
939 : */
940 : bool validate(bool skip_input = false, bool skip_label = false);
941 :
942 : /**
943 : * @brief check if the layer is expected to run in-place
944 : *
945 : * @return true if in-place, else false
946 : */
947 8797 : bool getInPlace() const { return is_inplace; }
948 :
949 : /**
950 : * @brief get layer weights
951 : *
952 : * @return weights
953 : */
954 28 : std::vector<Weight *> getWeights() { return weights; }
955 :
956 : /**
957 : * @brief get loss scale
958 : * @return loss scale
959 : */
960 599 : float getLossScale() { return loss_scale; }
961 :
962 : /**
963 : * @brief set Loss_Scale.
964 : *
965 : * @return loss_scale
966 : */
967 : void setLossScale(float scale) {
968 0 : loss_scale = scale;
969 0 : for (auto w : weights) {
970 : w->setLossScale(scale);
971 : }
972 : }
973 :
974 : /**
975 : * @brief set Output Zero Flag.
976 : *
977 : */
978 27303 : void reStoreData(bool nb) { restoreData = nb; }
979 :
980 : /**
981 : * @brief get Output Zero Flag.
982 : *
983 : */
984 53 : bool reStoreData() { return restoreData; }
985 :
986 : private:
987 : std::tuple<props::Name, props::Trainable> props; /**< props of the layer */
988 : std::shared_ptr<ContextData> ct_data;
989 : float loss; /**< loss of the layer */
990 : bool is_inplace; /**< if the layer is expected to run in-place */
991 : float loss_scale; /**< loss_scale of the layer */
992 : bool restoreData; /**< reset output for mixed precsion */
993 :
994 : std::vector<Weight *> weights; /**< weights of the layer */
995 : std::vector<Var_Grad *> inputs; /**< inputs of the layer */
996 : std::vector<Var_Grad *> outputs; /**< outputs of the layer */
997 : std::vector<Var_Grad *> tensors; /**< tensors of the layer */
998 :
999 : #ifdef DEBUG
1000 : std::map<std::string, const void *>
1001 : tensor_map; /**< map of tensor name to tensor address */
1002 : #endif
1003 :
1004 : /**
1005 : * @brief Get regularization loss for the weight
1006 : *
1007 : * @param idx Identifier of the weight
1008 : * @return float Value of the loss
1009 : */
1010 : float getWeightRegularizationLoss(unsigned int idx) const;
1011 : };
1012 :
1013 : } // namespace nntrainer
1014 : #endif // __LAYER_CONTEXT_H__
|