LCOV - coverage_filtered.info - nntrainer/layers/rnncell.cpp

LCOV - code coverage report

Current view:	top level - nntrainer/layers - rnncell.cpp (source / functions)		Coverage	Total	Hit
Test:	coverage_filtered.info	Lines:	89.9 %	148	133
Test Date:	2025-12-14 20:38:17	Functions:	88.9 %	9	8

            Line data    Source code

       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2021 hyeonseok lee <hs89.lee@samsung.com>
       4              :  *
       5              :  * @file   rnncell.cpp
       6              :  * @date   29 Oct 2021
       7              :  * @brief  This is Recurrent Cell Layer Class of Neural Network
       8              :  * @see    https://github.com/nnstreamer/nntrainer
       9              :  * @author hyeonseok lee <hs89.lee@samsung.com>
      10              :  * @bug    No known bugs except for NYI items
      11              :  *
      12              :  */
      13              : 
      14              : #include <cmath>
      15              : #include <common_properties.h>
      16              : 
      17              : #include <layer_context.h>
      18              : #include <nntrainer_error.h>
      19              : #include <nntrainer_log.h>
      20              : #include <node_exporter.h>
      21              : #include <rnncell.h>
      22              : #include <util_func.h>
      23              : 
      24              : namespace nntrainer {
      25              : 
      26              : static constexpr size_t SINGLE_INOUT_IDX = 0;
      27              : 
      28              : // - weight_ih ( weights of input to hidden )
      29              : // - weight_hh ( weights of hidden to hidden )
      30              : // - bias_h ( input bias, hidden_bias )
      31              : // - bias_ih ( input bias )
      32              : // - bias_hh ( hidden bias )
      33              : enum RNNCellParams {
      34              :   weight_ih,
      35              :   weight_hh,
      36              :   bias_h,
      37              :   bias_ih,
      38              :   bias_hh,
      39              :   dropout_mask
      40              : };
      41              : 
      42           49 : RNNCellLayer::RNNCellLayer() :
      43              :   LayerImpl(),
      44           98 :   rnncell_props(props::Unit(), props::IntegrateBias(),
      45           98 :                 props::HiddenStateActivation() = ActivationType::ACT_TANH,
      46           49 :                 props::DropOutRate()),
      47           49 :   acti_func(ActivationType::ACT_NONE, true),
      48           98 :   epsilon(1e-3f) {
      49              :   wt_idx.fill(std::numeric_limits<unsigned>::max());
      50           49 : }
      51              : 
      52           29 : void RNNCellLayer::finalize(InitLayerContext &context) {
      53              :   const nntrainer::WeightRegularizer weight_regularizer =
      54           29 :     std::get<props::WeightRegularizer>(*layer_impl_props);
      55              :   const float weight_regularizer_constant =
      56           29 :     std::get<props::WeightRegularizerConstant>(*layer_impl_props);
      57              :   const Initializer weight_initializer =
      58           29 :     std::get<props::WeightInitializer>(*layer_impl_props);
      59              :   const Initializer bias_initializer =
      60           29 :     std::get<props::BiasInitializer>(*layer_impl_props);
      61              :   auto &weight_decay = std::get<props::WeightDecay>(*layer_impl_props);
      62              :   auto &bias_decay = std::get<props::BiasDecay>(*layer_impl_props);
      63              :   const bool disable_bias =
      64           29 :     std::get<props::DisableBias>(*layer_impl_props).get();
      65              : 
      66           29 :   NNTR_THROW_IF(std::get<props::Unit>(rnncell_props).empty(),
      67              :                 std::invalid_argument)
      68              :     << "unit property missing for rnncell layer";
      69           29 :   const unsigned int unit = std::get<props::Unit>(rnncell_props).get();
      70              :   const bool integrate_bias =
      71           29 :     std::get<props::IntegrateBias>(rnncell_props).get();
      72              :   const nntrainer::ActivationType hidden_state_activation_type =
      73           29 :     std::get<props::HiddenStateActivation>(rnncell_props).get();
      74           29 :   const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
      75              : 
      76           29 :   NNTR_THROW_IF(context.getNumInputs() != 2, std::invalid_argument)
      77              :     << "RNNCell layer expects 2 inputs(one for the input and hidden state for "
      78            0 :        "the other) but got " +
      79            0 :          std::to_string(context.getNumInputs()) + " input(s)";
      80              : 
      81              :   // input_dim = [ batch, 1, 1, feature_size ]
      82              :   const TensorDim &input_dim = context.getInputDimensions()[INOUT_INDEX::INPUT];
      83           29 :   NNTR_THROW_IF(input_dim.channel() != 1 || input_dim.height() != 1,
      84              :                 std::invalid_argument)
      85              :     << "Input must be single time dimension for RNNCell (shape should be "
      86              :        "[batch_size, 1, 1, feature_size])";
      87              :   // input_hidden_state_dim = [ batch, 1, 1, unit ]
      88              :   const TensorDim &input_hidden_state_dim =
      89              :     context.getInputDimensions()[INOUT_INDEX::INPUT_HIDDEN_STATE];
      90           29 :   NNTR_THROW_IF(input_hidden_state_dim.channel() != 1 ||
      91              :                   input_hidden_state_dim.height() != 1,
      92              :                 std::invalid_argument)
      93              :     << "Input hidden state's dimension should be [batch, 1, 1, unit] for "
      94              :        "RNNCell";
      95              : 
      96           29 :   const unsigned int batch_size = input_dim.batch();
      97           29 :   const unsigned int feature_size = input_dim.width();
      98              : 
      99              :   // output_hidden_state_dim = [ batch, 1, 1, unit ]
     100           29 :   TensorDim output_hidden_state_dim(batch_size, 1, 1, unit);
     101           29 :   context.setOutputDimensions({output_hidden_state_dim});
     102              : 
     103              :   // weight_initializer can be set seperately. weight_ih initializer,
     104              :   // weight_hh initializer kernel initializer & recurrent_initializer in keras
     105              :   // for now, it is set same way.
     106              : 
     107              :   // weight_ih_dim : [ 1, 1, feature_size, unit ]
     108           29 :   const TensorDim weight_ih_dim({feature_size, unit});
     109           29 :   wt_idx[RNNCellParams::weight_ih] = context.requestWeight(
     110              :     weight_ih_dim, weight_initializer, weight_regularizer,
     111              :     weight_regularizer_constant, weight_decay, "weight_ih", true);
     112              :   // weight_hh_dim : [ 1, 1, unit, unit ]
     113           29 :   const TensorDim weight_hh_dim({unit, unit});
     114           58 :   wt_idx[RNNCellParams::weight_hh] = context.requestWeight(
     115              :     weight_hh_dim, weight_initializer, weight_regularizer,
     116              :     weight_regularizer_constant, weight_decay, "weight_hh", true);
     117           29 :   if (!disable_bias) {
     118           29 :     if (integrate_bias) {
     119              :       // bias_h_dim : [ 1, 1, 1, unit ]
     120            1 :       const TensorDim bias_h_dim({unit});
     121            1 :       wt_idx[RNNCellParams::bias_h] = context.requestWeight(
     122              :         bias_h_dim, bias_initializer, WeightRegularizer::NONE, 1.0f, bias_decay,
     123              :         "bias_h", true);
     124              :     } else {
     125              :       // bias_ih_dim : [ 1, 1, 1, unit ]
     126           28 :       const TensorDim bias_ih_dim({unit});
     127           28 :       wt_idx[RNNCellParams::bias_ih] = context.requestWeight(
     128              :         bias_ih_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
     129              :         bias_decay, "bias_ih", true);
     130              :       // bias_hh_dim : [ 1, 1, 1, unit ]
     131           28 :       const TensorDim bias_hh_dim({unit});
     132           56 :       wt_idx[RNNCellParams::bias_hh] = context.requestWeight(
     133              :         bias_hh_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
     134              :         bias_decay, "bias_hh", true);
     135              :     }
     136              :   }
     137              : 
     138           29 :   if (dropout_rate > epsilon) {
     139              :     // dropout_mask_dim = [ batch, 1, 1, unit ]
     140            0 :     const TensorDim dropout_mask_dim(batch_size, 1, 1, unit);
     141            0 :     wt_idx[RNNCellParams::dropout_mask] =
     142            0 :       context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE,
     143              :                             false, TensorLifespan::ITERATION_LIFESPAN);
     144              :   }
     145              : 
     146           29 :   acti_func.setActiFunc(hidden_state_activation_type);
     147              : 
     148           29 :   if (!acti_func.supportInPlace()) {
     149              :     throw exception::not_supported(
     150            0 :       "Out of place activation functions not supported");
     151              :   }
     152           29 : }
     153              : 
     154          190 : void RNNCellLayer::setProperty(const std::vector<std::string> &values) {
     155              :   const std::vector<std::string> &remain_props =
     156          190 :     loadProperties(values, rnncell_props);
     157          189 :   LayerImpl::setProperty(remain_props);
     158          189 : }
     159              : 
     160           24 : void RNNCellLayer::exportTo(Exporter &exporter,
     161              :                             const ml::train::ExportMethods &method) const {
     162           24 :   LayerImpl::exportTo(exporter, method);
     163           24 :   exporter.saveResult(rnncell_props, method, this);
     164           24 : }
     165              : 
     166           41 : void RNNCellLayer::forwarding(RunLayerContext &context, bool training) {
     167              :   const bool disable_bias =
     168           41 :     std::get<props::DisableBias>(*layer_impl_props).get();
     169              : 
     170              :   const bool integrate_bias =
     171           41 :     std::get<props::IntegrateBias>(rnncell_props).get();
     172           41 :   const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
     173              : 
     174           41 :   const Tensor &input = context.getInput(INOUT_INDEX::INPUT);
     175              :   const Tensor &prev_hidden_state =
     176           41 :     context.getInput(INOUT_INDEX::INPUT_HIDDEN_STATE);
     177           41 :   Tensor &hidden_state = context.getOutput(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
     178              : 
     179           41 :   const Tensor &weight_ih = context.getWeight(wt_idx[RNNCellParams::weight_ih]);
     180           41 :   const Tensor &weight_hh = context.getWeight(wt_idx[RNNCellParams::weight_hh]);
     181           41 :   Tensor empty;
     182           41 :   const Tensor &bias_h = !disable_bias && integrate_bias
     183           41 :                            ? context.getWeight(wt_idx[RNNCellParams::bias_h])
     184              :                            : empty;
     185              :   const Tensor &bias_ih = !disable_bias && !integrate_bias
     186           41 :                             ? context.getWeight(wt_idx[RNNCellParams::bias_ih])
     187              :                             : empty;
     188              :   const Tensor &bias_hh = !disable_bias && !integrate_bias
     189           41 :                             ? context.getWeight(wt_idx[RNNCellParams::bias_hh])
     190              :                             : empty;
     191              : 
     192           41 :   input.dot(weight_ih, hidden_state);
     193           41 :   prev_hidden_state.dot(weight_hh, hidden_state, false, false, 1.0f);
     194           41 :   if (!disable_bias) {
     195           41 :     if (integrate_bias) {
     196            5 :       hidden_state.add_i(bias_h);
     197              :     } else {
     198           36 :       hidden_state.add_i(bias_ih);
     199           36 :       hidden_state.add_i(bias_hh);
     200              :     }
     201              :   }
     202              : 
     203              :   acti_func.run_fn(hidden_state, hidden_state);
     204              : 
     205           41 :   if (dropout_rate > epsilon && training) {
     206              :     Tensor &dropout_mask =
     207            0 :       context.getTensor(wt_idx[RNNCellParams::dropout_mask]);
     208            0 :     dropout_mask.dropout_mask(dropout_rate);
     209            0 :     hidden_state.multiply_i(dropout_mask);
     210              :   }
     211           41 : }
     212              : 
     213           19 : void RNNCellLayer::calcDerivative(RunLayerContext &context) {
     214           19 :   const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
     215              : 
     216              :   Tensor &outgoing_derivative =
     217           19 :     context.getOutgoingDerivative(INOUT_INDEX::INPUT);
     218              :   Tensor &d_prev_hidden_state =
     219           19 :     context.getOutgoingDerivative(INOUT_INDEX::INPUT_HIDDEN_STATE);
     220              :   const Tensor &hidden_state =
     221           19 :     context.getOutput(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
     222              :   const Tensor &d_hidden_state =
     223           19 :     context.getIncomingDerivative(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
     224           19 :   const Tensor &weight_ih = context.getWeight(wt_idx[RNNCellParams::weight_ih]);
     225           19 :   const Tensor &weight_hh = context.getWeight(wt_idx[RNNCellParams::weight_hh]);
     226              : 
     227              :   /// @note calculate d_hidden_state is duplicated with calcGradient. Needs
     228              :   /// optimization
     229           19 :   Tensor d_hidden_state_;
     230           19 :   if (dropout_rate > epsilon) {
     231              :     const Tensor &dropout_mask =
     232            0 :       context.getTensor(wt_idx[RNNCellParams::dropout_mask]);
     233            0 :     d_hidden_state.multiply(dropout_mask, d_hidden_state_);
     234              :   } else {
     235           19 :     d_hidden_state_.copy(d_hidden_state);
     236              :   }
     237              : 
     238           19 :   Tensor hidden_state_;
     239           19 :   hidden_state_.copy(hidden_state);
     240           19 :   acti_func.run_prime_fn(hidden_state_, d_hidden_state_, d_hidden_state_);
     241              : 
     242           19 :   d_hidden_state_.dot(weight_ih, outgoing_derivative, false, true);
     243           19 :   d_hidden_state_.dot(weight_hh, d_prev_hidden_state, false, true);
     244           19 : }
     245              : 
     246           19 : void RNNCellLayer::calcGradient(RunLayerContext &context) {
     247              :   const bool disable_bias =
     248           19 :     std::get<props::DisableBias>(*layer_impl_props).get();
     249              : 
     250              :   const bool integrate_bias =
     251           19 :     std::get<props::IntegrateBias>(rnncell_props).get();
     252           19 :   const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
     253              : 
     254           19 :   const Tensor &input = context.getInput(INOUT_INDEX::INPUT);
     255              :   const Tensor &prev_hidden_state =
     256           19 :     context.getInput(INOUT_INDEX::INPUT_HIDDEN_STATE);
     257              :   const Tensor &hidden_state =
     258           19 :     context.getOutput(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
     259              :   const Tensor &d_hidden_state =
     260           19 :     context.getIncomingDerivative(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
     261              : 
     262           19 :   Tensor &d_weight_ih = context.getWeightGrad(wt_idx[RNNCellParams::weight_ih]);
     263           19 :   Tensor &d_weight_hh = context.getWeightGrad(wt_idx[RNNCellParams::weight_hh]);
     264           19 :   Tensor empty;
     265           19 :   Tensor &d_bias_h = !disable_bias && integrate_bias
     266           19 :                        ? context.getWeightGrad(wt_idx[RNNCellParams::bias_h])
     267              :                        : empty;
     268              :   Tensor &d_bias_ih = !disable_bias && !integrate_bias
     269           19 :                         ? context.getWeightGrad(wt_idx[RNNCellParams::bias_ih])
     270              :                         : empty;
     271              :   Tensor &d_bias_hh = !disable_bias && !integrate_bias
     272           19 :                         ? context.getWeightGrad(wt_idx[RNNCellParams::bias_hh])
     273              :                         : empty;
     274              : 
     275           19 :   if (context.isGradientFirstAccess(wt_idx[RNNCellParams::weight_ih])) {
     276            9 :     d_weight_ih.setZero();
     277              :   }
     278           19 :   if (context.isGradientFirstAccess(wt_idx[RNNCellParams::weight_hh])) {
     279            9 :     d_weight_hh.setZero();
     280              :   }
     281           19 :   if (!disable_bias) {
     282           19 :     if (integrate_bias) {
     283            1 :       if (context.isGradientFirstAccess(wt_idx[RNNCellParams::bias_h])) {
     284            0 :         d_bias_h.setZero();
     285              :       }
     286              :     } else {
     287           18 :       if (context.isGradientFirstAccess(wt_idx[RNNCellParams::bias_ih])) {
     288            9 :         d_bias_ih.setZero();
     289              :       }
     290           18 :       if (context.isGradientFirstAccess(wt_idx[RNNCellParams::bias_hh])) {
     291            9 :         d_bias_hh.setZero();
     292              :       }
     293              :     }
     294              :   }
     295              : 
     296           19 :   Tensor d_hidden_state_;
     297           19 :   if (dropout_rate > epsilon) {
     298              :     const Tensor &dropout_mask =
     299            0 :       context.getTensor(wt_idx[RNNCellParams::dropout_mask]);
     300            0 :     d_hidden_state.multiply(dropout_mask, d_hidden_state_);
     301              :   } else {
     302           19 :     d_hidden_state_.copy(d_hidden_state);
     303              :   }
     304              : 
     305           19 :   Tensor hidden_state_;
     306           19 :   hidden_state_.copy(hidden_state);
     307           19 :   acti_func.run_prime_fn(hidden_state_, d_hidden_state_, d_hidden_state_);
     308              : 
     309           19 :   input.dot(d_hidden_state_, d_weight_ih, true, false, 1.0);
     310           19 :   prev_hidden_state.dot(d_hidden_state_, d_weight_hh, true, false, 1.0);
     311           19 :   if (!disable_bias) {
     312           19 :     if (integrate_bias) {
     313            1 :       d_hidden_state_.sum(0, d_bias_h, 1.0, 1.0);
     314              :     } else {
     315           18 :       d_hidden_state_.sum(0, d_bias_ih, 1.0, 1.0);
     316           18 :       d_hidden_state_.sum(0, d_bias_hh, 1.0, 1.0);
     317              :     }
     318              :   }
     319           19 : }
     320              : 
     321           24 : void RNNCellLayer::setBatch(RunLayerContext &context, unsigned int batch) {
     322           24 :   const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
     323           24 :   if (dropout_rate > epsilon) {
     324            0 :     context.updateTensor(wt_idx[RNNCellParams::dropout_mask], batch);
     325              :   }
     326           24 : }
     327              : 
     328              : } // namespace nntrainer

Generated by: LCOV version 2.0-1