LCOV - code coverage report
Current view: top level - nntrainer/layers - layer_context.h (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 90.7 % 54 49
Test Date: 2025-12-14 20:38:17 Functions: 100.0 % 5 5

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
       4              :  *
       5              :  * @file   layer_context.h
       6              :  * @date   10 June 2021
       7              :  * @see    https://github.com/nnstreamer/nntrainer
       8              :  * @author Parichay Kapoor <pk.kapoor@samsung.com>
       9              :  * @author Debadri Samaddar <s.debadri@samsung.com>
      10              :  * @author Niket Agarwal <niket.a@samsung.com>
      11              :  * @bug    No known bugs except for NYI items
      12              :  * @brief  This is the layer context for each layer
      13              :  */
      14              : 
      15              : #ifndef __LAYER_CONTEXT_H__
      16              : #define __LAYER_CONTEXT_H__
      17              : 
      18              : #include <memory>
      19              : #include <vector>
      20              : 
      21              : #include <common_properties.h>
      22              : #include <layer.h>
      23              : #include <tensor.h>
      24              : #include <tensor_dim.h>
      25              : #include <tensor_wrap_specs.h>
      26              : #include <weight.h>
      27              : 
      28              : namespace nntrainer {
      29              : 
      30              : class Var_Grad;
      31              : class ContextData;
      32              : 
      33              : /**
      34              :  * @class   Layer Context class for all layers
      35              :  * @brief   Class for Layer context
      36              :  *
      37              :  * @details This provides for the layer initialization. This context will not
      38              :  * contain any structures which allow allocation of memory or support to
      39              :  * allocate any new memory, but rather only support storing specifications based
      40              :  * on which memory will be allocated later.
      41              :  */
      42              : class InitLayerContext {
      43              : public:
      44              :   /**
      45              :    * @brief Construct a new Init Layer Context object
      46              :    *
      47              :    * @param dim Input dimensions for the layer
      48              :    * @param req_out_connected bool vector to tell if requested output is
      49              :    * trainable or not
      50              :    * @param is_inplace_ true if the context is inplacable
      51              :    * @param name name
      52              :    * @param prefix_ prefix
      53              :    * @param max_norm max norm
      54              :    * @param tensor_type array including tensor format and weight, activation
      55              :    * type.
      56              :    * @param loss_scale loss scale value for mixed precision training
      57              :    * @param mode execution mode.
      58              :    */
      59              :   InitLayerContext(
      60              :     const std::vector<TensorDim> &dim,
      61              :     const std::vector<bool> &req_out_connected, bool is_inplace_,
      62              :     const std::string &n = "", const std::string &prefix_ = "",
      63              :     const float max_norm = 0.0,
      64              :     std::array<std::string, 3> tensor_type_ = {"NCHW", "FP32", "FP32"},
      65              :     const float loss_scale = 1.0,
      66              :     ml::train::ExecutionMode mode = ml::train::ExecutionMode::TRAIN,
      67              :     ml::train::LayerComputeEngine engine = ml::train::LayerComputeEngine::CPU);
      68              :   /**
      69              :    * @brief   get Tensor Format of Layer
      70              :    *
      71              :    * @return Tensor Format of the layer
      72              :    */
      73              :   TensorDim::Format getFormat() {
      74              :     return str_converter<enum_class_prop_tag, nntrainer::TensorFormatInfo>::
      75         3412 :       from_string(tensor_type[0]);
      76              :   };
      77              : 
      78              :   /**
      79              :    * @brief   get Tensor DataType of the Weight
      80              :    *
      81              :    * @return Tensor DataType of the the Weight
      82              :    */
      83              :   TensorDim::DataType getWeightDataType() {
      84              :     return str_converter<enum_class_prop_tag, nntrainer::TensorDataTypeInfo>::
      85        11806 :       from_string(tensor_type[1]);
      86              :   };
      87              : 
      88              :   /**
      89              :    * @brief   get Tensor DataType of the Activation
      90              :    *
      91              :    * @return Tensor DataType of the the Activation
      92              :    */
      93              :   TensorDim::DataType getActivationDataType() {
      94              :     return str_converter<enum_class_prop_tag, nntrainer::TensorDataTypeInfo>::
      95        14719 :       from_string(tensor_type[2]);
      96              :   };
      97              : 
      98              :   /**
      99              :    * @brief   get Layer Compute Engine Type
     100              :    *
     101              :    * @return Engine Engine Type
     102              :    */
     103            0 :   ml::train::LayerComputeEngine getComputeEngineType() { return engine; };
     104              : 
     105              :   /**
     106              :    * @brief   get name by the layer
     107              :    *
     108              :    * @return name of the layer
     109              :    */
     110            2 :   const std::string &getName() const { return name; }
     111              : 
     112              :   /**
     113              :    * @brief   get Execution Mode
     114              :    *
     115              :    * @return Mode Execution Mode : ml::train::ExecutionMode::INFERNECE |
     116              :    * ml::train::ExecutionMode::TRAIN
     117              :    */
     118              :   const ml::train::ExecutionMode &getExecutionMode() const { return mode; }
     119              : 
     120              :   /**
     121              :    * @brief Get the number of inputs for the layer
     122              :    *
     123              :    * @return unsigned int number of inputs
     124              :    */
     125         2326 :   unsigned int getNumInputs() const { return input_dim.size(); }
     126              : 
     127              :   /**
     128              :    * @brief Get the number of requested outputs for the layer
     129              :    *
     130              :    * @return unsigned int number of requested outputs
     131              :    */
     132              :   unsigned int getNumRequestedOutputs() const;
     133              : 
     134              :   /**
     135              :    * @brief Get the Input Dimensions object
     136              :    *
     137              :    * @return const std::vector<TensorDim>& Input dimensions
     138              :    */
     139         1869 :   const std::vector<TensorDim> &getInputDimensions() const { return input_dim; }
     140              : 
     141              :   /**
     142              :    * @brief Retrieves the data type of input tensor at the given index
     143              :    *
     144              :    * @return The data type of the input tensor
     145              :    */
     146              :   const TensorDim::DataType getInputDataType(int idx) const {
     147              :     return input_dim[idx].getDataType();
     148              :   }
     149              : 
     150              :   /**
     151              :    * @brief Get the Mutable Input Dimensions object
     152              :    *
     153              :    * @return std::vector<TensorDim>& Input dimensions
     154              :    */
     155              :   std::vector<TensorDim> &getMutableInputDimensions() { return input_dim; }
     156              : 
     157              :   /**
     158              :    * @brief Set Data Type for Input Dimensions
     159              :    *
     160              :    * @param ty data type to set
     161              :    */
     162              :   void setInputDataType(TensorDim::DataType ty) {
     163              :     for (auto &d : input_dim)
     164              :       d.setDataType(ty);
     165              :   }
     166              : 
     167              :   /**
     168              :    * @brief Set the Dim Flag to retrieve effective dimension
     169              :    *
     170              :    * @param dim_flag_ dimension bit to calculate, rightmost is width
     171              :    */
     172              :   void
     173              :   setEffDimFlagInputDimension(unsigned int idx,
     174              :                               const std::bitset<TensorDim::MAXDIM> &dim_flag_) {
     175          654 :     input_dim[idx].setEffDimFlag(dim_flag_);
     176          652 :   }
     177              : 
     178              :   /**
     179              :    * @brief Set the dynamic Dim Flag to retrieve dynamic dimension (that can
     180              :    * change during running)
     181              :    *
     182              :    * @param dim_flag_ dimension bit to calculate, rightmost is width
     183              :    */
     184              :   void
     185              :   setDynDimFlagInputDimension(unsigned int idx,
     186              :                               const std::bitset<TensorDim::MAXDIM> &dim_flag_) {
     187          654 :     input_dim[idx].setDynDimFlag(dim_flag_);
     188          652 :   }
     189              : 
     190              :   /**
     191              :    * @brief Set the Output Dimensions object
     192              :    *
     193              :    * @param out_dim the output dimension to set to
     194              :    */
     195              :   void setOutputDimensions(const std::vector<TensorDim> &out_dim);
     196              : 
     197              :   /**
     198              :    * @brief Request a new weight for the layer
     199              :    *
     200              :    * @param dim dimension of Variable of the weight
     201              :    * @param init initializer for the weight
     202              :    * @param reg regularizer for the weight
     203              :    * @param reg_const regularization constant for the weight
     204              :    * @param name name of the weight
     205              :    * @param trainable if the weight is trainable (require gradient or not)
     206              :    * @param is_virtual if the weight is virtual (not allocate)
     207              :    * @return unsigned int index of the weight for its getter
     208              :    *
     209              :    * @todo Consider providing a guarantee that the returned indices will always
     210              :    * start from 0 and will always be incremental.
     211              :    */
     212         5232 :   unsigned int requestWeight(const TensorDim &dim, const Initializer init,
     213              :                              const WeightRegularizer reg, const float reg_const,
     214              :                              const float decay, const std::string &name,
     215              :                              bool trainable = true, bool is_virtual = false,
     216              :                              unsigned int out_axis = 3) {
     217              : 
     218              :     /** @note : We assumes the gradient type is same with Activation data
     219              :      * type.*/
     220         5232 :     TensorDim dim_g(dim);
     221              : 
     222              :     dim_g.setDataType(getActivationDataType());
     223              : 
     224         5232 :     weights_spec.emplace_back(
     225         5232 :       dim, dim_g, init, reg, reg_const, decay, clip_by_global_norm, trainable,
     226         5232 :       prefix + ":" + name, out_axis, loss_scale,
     227        10464 :       (getWeightDataType() != ml::train::TensorDim::DataType::FP32),
     228              :       is_virtual);
     229         5232 :     return weights_spec.size() - 1;
     230              :   }
     231              : 
     232              :   /**
     233              :    * @brief Request a new weight for the layer
     234              :    *
     235              :    * @param dim dimension of Variable of the weight
     236              :    * @param dim_g dimension of Gradient of the weight
     237              :    * @param init initializer for the weight
     238              :    * @param reg regularizer for the weight
     239              :    * @param reg_const regularization constant for the weight
     240              :    * @param name name of the weight
     241              :    * @param trainable if the weight is trainable (require gradient or not)
     242              :    * @return unsigned int index of the weight for its getter
     243              :    *
     244              :    * @todo Consider providing a guarantee that the returned indices will always
     245              :    * start from 0 and will always be incremental.
     246              :    */
     247          108 :   unsigned int requestWeight(const TensorDim &dim, const TensorDim &dim_g,
     248              :                              const Initializer init,
     249              :                              const WeightRegularizer reg, const float reg_const,
     250              :                              const float decay, const std::string &name,
     251              :                              bool trainable = true, unsigned int out_axis = 3,
     252              :                              bool is_virtual = false) {
     253              : 
     254              :     /** @note : We assumes the gradient type is same with Activation data
     255              :      * type.*/
     256          108 :     weights_spec.emplace_back(
     257          108 :       dim, dim_g, init, reg, reg_const, decay, clip_by_global_norm, trainable,
     258          108 :       prefix + ":" + name, out_axis, loss_scale,
     259          216 :       (getWeightDataType() != ml::train::TensorDim::DataType::FP32),
     260              :       is_virtual);
     261          108 :     return weights_spec.size() - 1;
     262              :   }
     263              : 
     264              :   /**
     265              :    * @brief Request a new weight for the layer
     266              :    *
     267              :    * @param spec tensor spec
     268              :    * @return unsigned int index of the weight for its getter
     269              :    *
     270              :    * @todo Consider providing a guarantee that the returned indices will always
     271              :    * start from 0 and will always be incremental.
     272              :    */
     273              :   unsigned int requestWeight(const WeightSpec &spec) {
     274            2 :     weights_spec.emplace_back(spec);
     275              :     return weights_spec.size() - 1;
     276              :   }
     277              : 
     278              :   /**
     279              :    * @brief Request a new tensor for the layer
     280              :    *
     281              :    * @param dim dimension of the tensor
     282              :    * @param trainable if the tensor is trainable (require gradient or not)
     283              :    * @param name name of the tensor
     284              :    * @param lifespan lifespan of the tensor
     285              :    * @param private_ if custom tensor should not be shared, and only for soleuse
     286              :    * @return unsigned int index of the tensor for its getter
     287              :    *
     288              :    * @todo Consider providing a guarantee that the returned indices will always
     289              :    * start from 0 and will always be incremental.
     290              :    */
     291         3013 :   unsigned int requestTensor(
     292              :     const TensorDim &dim, const std::string &name,
     293              :     const Initializer init = Initializer::NONE, bool trainable = false,
     294              :     TensorLifespan lifespan = TensorLifespan::ITERATION_LIFESPAN,
     295              :     bool private_ = true,
     296              :     ml::train::LayerComputeEngine engine = ml::train::LayerComputeEngine::CPU) {
     297         3013 :     const auto &prefix_ = private_ ? this->name : this->prefix;
     298         6026 :     tensors_spec.emplace_back(dim, init, trainable, prefix_ + ":" + name,
     299              :                               lifespan, engine);
     300         3013 :     return tensors_spec.size() - 1;
     301              :   }
     302              : 
     303              :   /**
     304              :    * @brief Specification of the tensors
     305              :    *
     306              :    */
     307              :   typedef VarGradSpec TensorSpec;
     308              : 
     309              :   /**
     310              :    * @brief Request a new tensor for the layer
     311              :    *
     312              :    * @param spec tensor spec
     313              :    * @return unsigned int index of the tensor for its getter
     314              :    *
     315              :    * @todo Consider providing a guarantee that the returned indices will always
     316              :    * start from 0 and will always be incremental.
     317              :    */
     318              :   unsigned int requestTensor(const TensorSpec &spec) {
     319            0 :     tensors_spec.emplace_back(spec);
     320              :     return tensors_spec.size() - 1;
     321              :   }
     322              : 
     323              :   /**
     324              :    * @brief Get the current weights spec
     325              :    *
     326              :    * @return The current weights spec
     327              :    */
     328          136 :   const std::vector<WeightSpec> &getWeightsSpec() const { return weights_spec; }
     329              : 
     330              :   /**
     331              :    * @brief Get the number of requested weights
     332              :    *
     333              :    * @return The current number of requested weights
     334              :    */
     335              :   unsigned int getNumWeights() const { return weights_spec.size(); }
     336              : 
     337              :   /**
     338              :    * @brief Get the current tensors spec
     339              :    *
     340              :    * @return The current tensors spec
     341              :    */
     342          136 :   const std::vector<TensorSpec> &getTensorsSpec() const { return tensors_spec; }
     343              : 
     344              :   /**
     345              :    * @brief Get the number of requested tensors objects
     346              :    *
     347              :    * @return unsigned int number of requested tensors
     348              :    */
     349              :   unsigned int getNumTensors() const { return tensors_spec.size(); }
     350              : 
     351              :   /**
     352              :    * @brief create var grad specification with output default
     353              :    *
     354              :    * @param dim dimension
     355              :    * @param name name
     356              :    * @param ls variable lifespan
     357              :    * @param grad_ls gradient lifespan
     358              :    * @return VarGradSpecV2 var grad specification
     359              :    */
     360              :   static VarGradSpecV2
     361              :   outSpec(const TensorDim &dim, const std::string &name = "out",
     362              :           TensorLifespan ls = TensorLifespan::FORWARD_FUNC_LIFESPAN,
     363              :           TensorLifespan grad_ls = TensorLifespan::CALC_GRAD_DERIV_LIFESPAN);
     364              : 
     365              :   /**
     366              :    * @brief request outputs
     367              :    *
     368              :    * @param out_specs pack of out specification, name will be automatically
     369              :    * indexed to prevent name clash
     370              :    */
     371              :   void requestOutputs(std::vector<VarGradSpecV2> &&out_specs);
     372              : 
     373              :   /**
     374              :    * @brief Get the Out Specs object
     375              :    *
     376              :    * @return std::vector<VarGradSpecV2> out specification
     377              :    */
     378              :   const std::vector<VarGradSpecV2> &getOutSpecs() const;
     379              : 
     380              :   /**
     381              :    * @brief Validate the context
     382              :    *
     383              :    * @return true if validated, else false
     384              :    * @note this must be called before passing a context to a layer for finalize
     385              :    */
     386         5050 :   bool validate() {
     387         5050 :     if (input_dim.empty()) {
     388              :       return false;
     389              :     }
     390              : 
     391        11412 :     for (auto const &dim : input_dim) {
     392         6362 :       if (dim.getDataLen() == 0) {
     393              :         return false;
     394              :       }
     395              :     }
     396              : 
     397         5050 :     if (name.empty()) {
     398              :       return false;
     399              :     }
     400              : 
     401              :     return true;
     402              :   }
     403              : 
     404              :   /**
     405              :    * @brief   check if the layer is expected to run in-place
     406              :    *
     407              :    * @return true if in-place, else false
     408              :    */
     409          434 :   bool getInPlace() const { return is_inplace; }
     410              : 
     411              :   /**
     412              :    * @brief   get Initial value of Loss_Scale. This is set to RunLayerContext
     413              :    * and updated
     414              :    *
     415              :    * @return loss_scale
     416              :    */
     417         4435 :   float getLossScale() const { return loss_scale; }
     418              : 
     419              :   /**
     420              :    * @brief   get Mixed Precision Training. If the weight is not the FP32, then
     421              :    * it is mixed training.
     422              :    *
     423              :    * @return true if it is mixed training
     424              :    */
     425              :   bool isMixedTraining() { return istrequal(tensor_type[1], "FP32"); }
     426              : 
     427              : private:
     428              :   std::vector<TensorDim> input_dim; /**< Input dimensions for the layer */
     429              :   bool is_inplace;           /**< if the layer is expected to run in-place */
     430              :   float clip_by_global_norm; /**< max norm value for clip by norm */
     431              : 
     432              :   std::vector<VarGradSpecV2> output_specs; /**< Specification for the output */
     433              :   std::vector<WeightSpec> weights_spec;    /**< Specification for the weights */
     434              :   std::vector<TensorSpec>
     435              :     tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
     436              :                      variables) */
     437              : 
     438              :   std::vector<bool> req_out_is_connected;
     439              :   /**< a bool vector to tell if requested out is actually connected to others */
     440              :   std::string name;   /**< name of the layer */
     441              :   std::string prefix; /**< prefix of the layer */
     442              :   std::array<std::string, 3> tensor_type;
     443              :   float loss_scale; /**< loss_scale value */
     444              :   ml::train::ExecutionMode mode;
     445              :   ml::train::LayerComputeEngine engine;
     446              : };
     447              : 
     448              : /**
     449              :  * @class   Layer Context class for all layers
     450              :  * @brief   Class for Layer context
     451              :  *
     452              :  * @details This provides for the layer executing. This context will contain
     453              :  * structures with memory allocated or support to allocate any new memory, but
     454              :  * rather only support storing specifications based on which memory will be
     455              :  * allocated later.
     456              :  *
     457              :  * @todo Check the caller of the getTensor() and set restrictions on the tensors
     458              :  * to be accessed based on which function is requesting it.
     459              :  */
     460              : class RunLayerContext {
     461              : public:
     462              :   /**
     463              :    * @brief Construct a new Run Layer Context object
     464              :    *
     465              :    */
     466              :   RunLayerContext() :
     467              :     loss(0.0), is_inplace(false), loss_scale(1.0), restoreData(false) {}
     468              : 
     469              :   /**
     470              :    * @brief Construct a new Run Layer Context object
     471              :    *
     472              :    */
     473              :   RunLayerContext(const std::string &name, bool is_inplace_) :
     474              :     RunLayerContext() {
     475              :     is_inplace = is_inplace_;
     476              :     std::get<props::Name>(props).set(name);
     477              :   }
     478              : 
     479              :   /**
     480              :    * @brief Construct a new Run Layer Context object
     481              :    *
     482              :    */
     483              :   RunLayerContext(const std::string &name, bool is_inplace_,
     484              :                   float loss_scale_) :
     485              :     RunLayerContext() {
     486              :     is_inplace = is_inplace_;
     487              :     std::get<props::Name>(props).set(name);
     488              :     loss_scale = loss_scale_;
     489              :   }
     490              : 
     491              :   /**
     492              :    * @brief Construct a new Run Layer Context object
     493              :    *
     494              :    * @param name name of the layer
     495              :    * @param trainable if the layer is trainable
     496              :    * @param l loss of the layer
     497              :    * @param is_inplace_ execution in-place of the layer
     498              :    * @param loss_scale loss_scale of the layer
     499              :    * @param w weights of the layer
     500              :    * @param in inputs of the layer
     501              :    * @param out outputs of the layer
     502              :    * @param t extra tensors of the layer
     503              :    */
     504              :   RunLayerContext(const std::string &name, bool trainable, float l,
     505              :                   bool is_inplace_, float loss_scale_,
     506              :                   std::shared_ptr<ContextData> ct_data, bool restoreData_,
     507              :                   const std::vector<Weight *> &w,
     508              :                   const std::vector<Var_Grad *> &in,
     509              :                   const std::vector<Var_Grad *> &out,
     510              :                   const std::vector<Var_Grad *> &t);
     511              : 
     512              :   /**
     513              :    * @brief Get the Weight tensor object
     514              :    *
     515              :    * @param w out tensor
     516              :    * @param idx Identifier of the weight
     517              :    * @return Tensor& Reference to the weight tensor
     518              :    */
     519              :   void getWeight(Tensor &w, unsigned int idx) {
     520              :     Tensor &t_w = weights[idx]->getVariableRef();
     521              : 
     522              :     if (t_w.getDataType() == Tdatatype::FP32 ||
     523              :         t_w.getDataType() == Tdatatype::FP16 ||
     524              :         t_w.getDataType() == Tdatatype::BCQ ||
     525              :         t_w.getDataType() == Tdatatype::Q4_K) {
     526              :       w = t_w;
     527              :       return;
     528              :     }
     529              : 
     530              :     unsigned int base_idx = 0;
     531              :     Tdatatype o_t = getOutput(base_idx).getDataType();
     532              : 
     533              :     if (w.empty()) {
     534              :       TensorDim d = t_w.getDim();
     535              :       d.setDataType(o_t);
     536              :       w = Tensor(d, true);
     537              :     }
     538              : 
     539              :     return;
     540              :   }
     541              : 
     542              :   /**
     543              :    * @brief Get the Weight tensor object
     544              :    *
     545              :    * @param idx Identifier of the weight
     546              :    * @return Tensor& Reference to the weight tensor
     547              :    */
     548              :   Tensor &getWeight(unsigned int idx) const;
     549              : 
     550              :   /**
     551              :    * @brief Get the Weight Gradient tensor object
     552              :    *
     553              :    * @note this method returns the fresh gradient to be filled
     554              :    * @param idx Identifier of the weight
     555              :    * @return Tensor& Reference to the weight grad tensor
     556              :    */
     557              :   Tensor &getWeightGrad(unsigned int idx) const;
     558              : 
     559              :   /**
     560              :    * @brief Get the Weight Gradient tensor object
     561              :    *
     562              :    * @param idx Identifier of the weight
     563              :    * @return Tensor& Reference to the weight grad tensor
     564              :    */
     565              :   Tensor &getWeightFP32(unsigned int idx) const;
     566              : 
     567              :   /**
     568              : 
     569              :    * @brief Get the Weight Optimizer Variable tensor object
     570              :    *
     571              :    * @param idx Identifier of the weight
     572              :    * @param jdx Identifier of the weight optimizer variable
     573              :    * @return Tensor& Reference to the weight grad tensor
     574              :    */
     575              :   Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
     576              : 
     577              :   /**
     578              :    * @brief Get the Weight name
     579              :    *
     580              :    * @param idx Identifier of the weight
     581              :    * @return name of the weight
     582              :    */
     583              :   const std::string &getWeightName(unsigned int idx) const;
     584              : 
     585              :   /**
     586              :    * @brief check if the weight has gradient
     587              :    *
     588              :    * @param idx Identifier of the weight
     589              :    * @return true if weight has gradient, else false
     590              :    */
     591              :   bool weightHasGradient(unsigned int idx) const;
     592              : 
     593              :   /**
     594              :    * @brief Get the Output tensor object
     595              :    *
     596              :    * @param idx Identifier of the output
     597              :    * @return Tensor& Reference to the output tensor
     598              :    */
     599              :   Tensor &getOutput(unsigned int idx);
     600              : 
     601              :   /**
     602              :    * @brief Get the Output tensor object
     603              :    *
     604              :    * @param idx Identifier of the output
     605              :    * @return Tensor& Reference to the output tensor
     606              :    */
     607              :   const Tensor &getOutput(unsigned int idx) const;
     608              : 
     609              :   /**
     610              :    * @brief Get the Output Grad tensor object
     611              :    *
     612              :    * @param idx Identifier of the output
     613              :    * @return Read-only output grad tensor, if derivative does not have
     614              :    * gradient, return a temporary, initialized to zero
     615              :    */
     616              :   const Tensor getOutputGrad(unsigned int idx) const;
     617              : 
     618              :   /**
     619              :    * @brief Get the Output Grad tensor object
     620              :    *
     621              :    * @param idx Identifier of the output
     622              :    * @return Tensor& Reference to the output grad tensor, this is valid only if
     623              :    * the given output is trainable
     624              :    *
     625              :    * @note recommended to NOT use this function as a layer developer but rather
     626              :    * use getOutputGrad().
     627              :    */
     628              :   Tensor &getOutputGradUnsafe(unsigned int idx);
     629              : 
     630              :   /**
     631              :    * @brief check if the weight has gradient
     632              :    *
     633              :    * @param idx Identifier of the weight
     634              :    * @return true if weight has gradient, else false
     635              :    */
     636              :   bool outputHasGradient(unsigned int idx) const;
     637              : 
     638              :   /**
     639              :    * @brief Get the incoming Derivative tensor object
     640              :    *
     641              :    * @param idx Identifier of the output
     642              :    * @return Tensor output derivative tensor, if derivative does not have
     643              :    * gradient, return a temporary, initialized to zero
     644              :    */
     645              :   const Tensor getIncomingDerivative(unsigned int idx) const;
     646              : 
     647              :   /**
     648              :    * @brief Get the Input tensor object
     649              :    *
     650              :    * @param idx Identifier of the input
     651              :    * @return Tensor& Reference to the input grad tensor
     652              :    */
     653              :   Tensor &getInput(unsigned int idx);
     654              : 
     655              :   /**
     656              :    * @brief Get the Input tensor object
     657              :    *
     658              :    * @param idx Identifier of the input
     659              :    * @return Tensor& Reference to the input grad tensor
     660              :    */
     661              :   const Tensor &getInput(unsigned int idx) const;
     662              : 
     663              :   /**
     664              :    * @brief Get the Input Grad tensor object
     665              :    *
     666              :    * @param idx Identifier of the input
     667              :    * @return Tensor& Reference to the input grad tensor
     668              :    */
     669              :   Tensor &getInputGrad(unsigned int idx);
     670              : 
     671              :   /**
     672              :    * @brief check if the weight has gradient
     673              :    *
     674              :    * @param idx Identifier of the weight
     675              :    * @return true if weight has gradient, else false
     676              :    */
     677              :   bool inputHasGradient(unsigned int idx) const;
     678              : 
     679              :   /**
     680              :    * @brief Get the outgoing Derivative tensor object
     681              :    *
     682              :    * @param idx Identifier of the input
     683              :    * @return Tensor& Reference to the input derivative tensor
     684              :    */
     685              :   Tensor &getOutgoingDerivative(unsigned int idx);
     686              : 
     687              :   /**
     688              :    * @brief Get the Tensor object
     689              :    *
     690              :    * @param idx Identifier of the tensor
     691              :    * @return Tensor& Reference to the tensor
     692              :    */
     693              :   Tensor &getTensor(unsigned int idx);
     694              : 
     695              :   /**
     696              :    * @brief Get the Tensor object
     697              :    *
     698              :    * @param idx Identifier of the tensor
     699              :    * @return Tensor& Reference to the tensor
     700              :    */
     701              :   const Tensor &getTensor(unsigned int idx) const;
     702              : 
     703              :   /**
     704              :    * @brief Get the Tensor Grad object
     705              :    *
     706              :    * @param idx Identifier of the tensor
     707              :    * @return Tensor& Reference to the tensor grad tensor
     708              :    */
     709              :   Tensor &getTensorGrad(unsigned int idx);
     710              : 
     711              :   /**
     712              :    * @brief Get the Tensor Grad object
     713              :    *
     714              :    * @param idx Identifier of the tensor
     715              :    * @return Tensor& Reference to the tensor grad tensor
     716              :    */
     717              :   const Tensor &getTensorGrad(unsigned int idx) const;
     718              : 
     719              :   /**
     720              :    * @brief check if the tensor has gradient
     721              :    *
     722              :    * @param idx Identifier of the tensor
     723              :    * @return true if tensor has gradient, else false
     724              :    */
     725              :   bool tensorHasGradient(unsigned int idx) const;
     726              : 
     727              :   /**
     728              :    * @brief check if the weight is burrowed from others so it is dependent
     729              :    *
     730              :    * @param idx index
     731              :    * @return bool true if weight is burrowed from outside
     732              :    */
     733              :   bool isWeightDependent(unsigned int idx) const;
     734              : 
     735              :   /**
     736              :    * @brief check current gradient is first access
     737              :    * @note for now, it equivalent to weight last access, so this value is
     738              :    * accessible for non-trainable weights as well. This is in terms of execution
     739              :    * order.
     740              :    *
     741              :    * @param idx index
     742              :    * @return bool true if first access
     743              :    */
     744              :   bool isGradientFirstAccess(unsigned int idx) const;
     745              : 
     746              :   /**
     747              :    * @brief check current gradient is last access
     748              :    * @note for now, it equivalent to weight last access, so this value is
     749              :    * accessible for non-trainable weights as well. This is in terms of execution
     750              :    * order.
     751              :    *
     752              :    * @param idx index
     753              :    * @return bool true if last access
     754              :    */
     755              :   bool isGradientLastAccess(unsigned int idx) const;
     756              : 
     757              :   /**
     758              :    * @brief check if the gradient is to be clipped by global norm
     759              :    *
     760              :    * @param idx index
     761              :    * @return bool true if it is to be clipped else false
     762              :    */
     763              :   bool isGradientClipByGlobalNorm(unsigned int idx) const;
     764              : 
     765              :   /**
     766              :    * @brief check if the weight is mixed precsion
     767              :    *
     768              :    * @param idx index
     769              :    * @return bool true if it is mixed precision
     770              :    */
     771              :   bool isMixedPrecision(unsigned int idx) const;
     772              : 
     773              :   /**
     774              :    * @brief Get the tensor name
     775              :    *
     776              :    * @param idx Identifier of the tensor
     777              :    * @return name of the tensor
     778              :    */
     779              :   const std::string &getTensorName(unsigned int idx) const;
     780              : 
     781              :   /**
     782              :    * @brief Get the number of Outputs tensor objects
     783              :    *
     784              :    * @return unsigned int number of output tensors
     785              :    */
     786              :   unsigned int getNumOutputs() const;
     787              : 
     788              :   /**
     789              :    * @brief Get the number of inputs tensor objects
     790              :    *
     791              :    * @return unsigned int number of input tensors
     792              :    */
     793              :   unsigned int getNumInputs() const;
     794              : 
     795              :   /**
     796              :    * @brief Get the number of weights tensor objects
     797              :    *
     798              :    * @return unsigned int number of weight tensors
     799              :    */
     800              :   unsigned int getNumWeights() const;
     801              : 
     802              :   /**
     803              :    * @brief Get the Number of Weight Optimizer Variable tensor object
     804              :    *
     805              :    * @param idx Identifier of the weight
     806              :    * @return unsigned int Number of the weight optimizer variable
     807              :    */
     808              :   unsigned int getNumWeightOptVar(unsigned int idx) const;
     809              : 
     810              :   /**
     811              :    * @brief Get the number of requested tensors objects
     812              :    *
     813              :    * @return unsigned int number of requested tensors
     814              :    */
     815              :   unsigned int getNumTensors() const;
     816              :   /**
     817              :    * @brief Set the batch for the run context
     818              :    *
     819              :    * @param batch Update batch size
     820              :    */
     821              :   void setBatch(unsigned int batch);
     822              : 
     823              :   /**
     824              :    * @brief Update the dimensions for a requested tensor
     825              :    *
     826              :    * @param idx index of the tensor (identifier)
     827              :    * @param batch Updated batch size
     828              :    */
     829              :   void updateTensor(unsigned int idx, unsigned int batch);
     830              : 
     831              :   /**
     832              :    * @brief Update the dimensions for a requested input
     833              :    *
     834              :    * @param idx index of the input (identifier)
     835              :    * @param dim dimension to be updated
     836              :    */
     837              :   void updateInput(unsigned int idx, TensorDim dim);
     838              : 
     839              :   /**
     840              :    * @brief Update the dimensions for a requested output
     841              :    *
     842              :    * @param idx index of the output (identifier)
     843              :    * @param dim dimension to be updated
     844              :    */
     845              :   void updateOutput(unsigned int idx, TensorDim dim);
     846              : 
     847              :   /**
     848              :    * @brief Update the dimensions for a requested tensor
     849              :    *
     850              :    * @param idx index of the tensor (identifier)
     851              :    * @param dim dimension to be updated
     852              :    */
     853              :   void updateTensor(unsigned int idx, TensorDim dim);
     854              : 
     855              :   /**
     856              :    * @brief   Get weight object for the weights
     857              :    *
     858              :    * @param idx index of the weight (identifier)
     859              :    * @return weight object
     860              :    */
     861              :   Weight &getWeightObject(unsigned int idx);
     862              : 
     863              :   /**
     864              :    * @brief   check if the label is available
     865              :    *
     866              :    * @param idx Identifier of the input
     867              :    * @return true if label is available else false
     868              :    */
     869              :   bool isLabelAvailable(unsigned int idx) const;
     870              : 
     871              :   /**
     872              :    * @brief   Get label tensor
     873              :    *
     874              :    * @param idx Identifier of the input
     875              :    * @return Tensor& Reference to the label tensor
     876              :    */
     877              :   Tensor &getLabel(unsigned int idx);
     878              : 
     879              :   /**
     880              :    * @brief   update loss by the layer
     881              :    *
     882              :    * @param val updated loss value
     883              :    * @note loss value is only used for loss layers. For non-loss layers, setting
     884              :    * this value will have no change on the behavior of the model.
     885              :    */
     886         6141 :   void setLoss(float val) { loss = val; }
     887              : 
     888              :   /**
     889              :    * @brief   update loss by the layer
     890              :    *
     891              :    * @return loss of the layer
     892              :    * @note does not includes the regularization loss.
     893              :    */
     894         6844 :   float getLoss() const { return loss; }
     895              : 
     896              :   /**
     897              :    * @brief   get regularization loss of the layer
     898              :    *
     899              :    * @return regularization loss of the layer
     900              :    */
     901        27303 :   float getRegularizationLoss() const {
     902              :     float loss_ = 0;
     903        48848 :     for (unsigned int idx = 0; idx < getNumWeights(); idx++) {
     904        21545 :       loss_ += getWeightRegularizationLoss(idx);
     905              :     }
     906        27303 :     return loss_;
     907              :   }
     908              : 
     909              :   std::shared_ptr<ContextData> getContextData() { return ct_data; }
     910              : 
     911              :   /**
     912              :    * @brief   get name by the layer
     913              :    *
     914              :    * @return name of the layer
     915              :    */
     916            0 :   const std::string &getName() const { return std::get<props::Name>(props); }
     917              : 
     918              :   /**
     919              :    * @brief   get trainable by the layer
     920              :    *
     921              :    * @return trainable of the layer
     922              :    */
     923          250 :   bool getTrainable() const { return std::get<props::Trainable>(props); }
     924              : 
     925              :   /**
     926              :    * @brief   check if run context is set and is ready to use
     927              :    *
     928              :    * @return true if ready, else false
     929              :    */
     930              :   bool readyToUse() const;
     931              : 
     932              :   /**
     933              :    * @brief   validates the run context after run
     934              :    *
     935              :    * @param skip_input  skip verifying the input
     936              :    * @param skip_label  skip verifying the label
     937              :    *
     938              :    * @return true if ready, else false
     939              :    */
     940              :   bool validate(bool skip_input = false, bool skip_label = false);
     941              : 
     942              :   /**
     943              :    * @brief   check if the layer is expected to run in-place
     944              :    *
     945              :    * @return true if in-place, else false
     946              :    */
     947         8797 :   bool getInPlace() const { return is_inplace; }
     948              : 
     949              :   /**
     950              :    * @brief   get layer weights
     951              :    *
     952              :    * @return weights
     953              :    */
     954           28 :   std::vector<Weight *> getWeights() { return weights; }
     955              : 
     956              :   /**
     957              :    * @brief get loss scale
     958              :    * @return loss scale
     959              :    */
     960          599 :   float getLossScale() { return loss_scale; }
     961              : 
     962              :   /**
     963              :    * @brief   set Loss_Scale.
     964              :    *
     965              :    * @return loss_scale
     966              :    */
     967              :   void setLossScale(float scale) {
     968            0 :     loss_scale = scale;
     969            0 :     for (auto w : weights) {
     970              :       w->setLossScale(scale);
     971              :     }
     972              :   }
     973              : 
     974              :   /**
     975              :    * @brief   set Output Zero Flag.
     976              :    *
     977              :    */
     978        27303 :   void reStoreData(bool nb) { restoreData = nb; }
     979              : 
     980              :   /**
     981              :    * @brief   get Output Zero Flag.
     982              :    *
     983              :    */
     984           53 :   bool reStoreData() { return restoreData; }
     985              : 
     986              : private:
     987              :   std::tuple<props::Name, props::Trainable> props; /**< props of the layer */
     988              :   std::shared_ptr<ContextData> ct_data;
     989              :   float loss;       /**< loss of the layer */
     990              :   bool is_inplace;  /**< if the layer is expected to run in-place */
     991              :   float loss_scale; /**< loss_scale of the layer */
     992              :   bool restoreData; /**< reset output for mixed precsion */
     993              : 
     994              :   std::vector<Weight *> weights;   /**< weights of the layer */
     995              :   std::vector<Var_Grad *> inputs;  /**< inputs of the layer */
     996              :   std::vector<Var_Grad *> outputs; /**< outputs of the layer */
     997              :   std::vector<Var_Grad *> tensors; /**< tensors of the layer */
     998              : 
     999              : #ifdef DEBUG
    1000              :   std::map<std::string, const void *>
    1001              :     tensor_map; /**< map of tensor name to tensor address */
    1002              : #endif
    1003              : 
    1004              :   /**
    1005              :    * @brief Get regularization loss for the weight
    1006              :    *
    1007              :    * @param idx Identifier of the weight
    1008              :    * @return float Value of the loss
    1009              :    */
    1010              :   float getWeightRegularizationLoss(unsigned int idx) const;
    1011              : };
    1012              : 
    1013              : } // namespace nntrainer
    1014              : #endif // __LAYER_CONTEXT_H__
        

Generated by: LCOV version 2.0-1