LCOV - code coverage report
Current view: top level - nntrainer/layers - time_dist.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 9.2 % 217 20
Test Date: 2025-12-14 20:38:17 Functions: 14.3 % 14 2

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
       4              :  *
       5              :  * @file   time_dist.cpp
       6              :  * @date   01 April 2021
       7              :  * @brief  This is Time Distributed Layer Class of Neural Network
       8              :  * @see    https://github.com/nnstreamer/nntrainer
       9              :  * @author Jijoong Moon <jijoong.moon@samsung.com>
      10              :  * @bug    No known bugs except for NYI items
      11              :  *
      12              :  */
      13              : 
      14              : #include <layer_context.h>
      15              : #include <nntrainer_error.h>
      16              : #include <nntrainer_log.h>
      17              : #include <time_dist.h>
      18              : #include <util_func.h>
      19              : #include <weight.h>
      20              : 
      21              : namespace nntrainer {
      22              : 
      23              : static constexpr size_t SINGLE_INOUT_IDX = 0;
      24              : 
      25            0 : static void reshape(Tensor &m) {
      26            0 :   TensorDim d = m.getDim();
      27            0 :   m.reshape({d[2], d[1], d[0], d[3]});
      28            0 : }
      29              : 
      30            0 : void TimeDistLayer::setPosition(RunLayerContext &context) {
      31            0 :   positions[0] = context.getInput(SINGLE_INOUT_IDX).getData();
      32            0 :   positions[2] = context.getOutput(SINGLE_INOUT_IDX).getData();
      33              :   /** TODO: use mode of execution here */
      34              :   try {
      35            0 :     positions[1] = context.getOutgoingDerivative(SINGLE_INOUT_IDX).getData();
      36            0 :     positions[3] =
      37            0 :       (float *)context.getIncomingDerivative(SINGLE_INOUT_IDX).getData();
      38            0 :   } catch (...) {
      39              :     /** in case of training, these tensors will not exist */
      40            0 :   }
      41            0 : }
      42              : 
      43            0 : void TimeDistLayer::transposeInOut(RunLayerContext &context) {
      44              :   // Position[0] : net_input.variable
      45            0 :   Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
      46            0 :   input_.copy(transposeTensor(input_));
      47              : 
      48              :   // Position[1] : net_input.gradient
      49            0 :   Tensor &ret_ = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
      50            0 :   if (ret_.getData() != positions[0]) {
      51            0 :     ret_.copy(transposeTensor(ret_));
      52              :   } else {
      53            0 :     reshape(ret_);
      54              :   }
      55              : 
      56              :   // Position[2] : net_hidden.variable
      57            0 :   Tensor &hval_ = context.getOutput(SINGLE_INOUT_IDX);
      58            0 :   if (hval_.getData() != positions[0] && hval_.getData() != positions[1]) {
      59            0 :     hval_.copy(transposeTensor(hval_));
      60              :   } else {
      61            0 :     reshape(hval_);
      62              :   }
      63              : 
      64              :   // Position[3] : net_hidden.gradient
      65              :   bool trans = true;
      66              : 
      67              :   /// @fixme: below will be propably wrong as this changes incoming derivative.
      68              :   /// other layer referring to this will have wrong output grad information.
      69            0 :   Tensor &derivative_ = context.getOutputGradUnsafe(SINGLE_INOUT_IDX);
      70            0 :   for (unsigned int i = 0; i < 3; ++i) {
      71            0 :     if (derivative_.getData() == positions[i]) {
      72              :       trans = false;
      73              :       break;
      74              :     }
      75              :   }
      76            0 :   if (trans)
      77            0 :     derivative_.copy(transposeTensor(derivative_));
      78              :   else
      79            0 :     reshape(derivative_);
      80            0 : }
      81              : 
      82            0 : Tensor TimeDistLayer::transposeTensor(Tensor &m) {
      83            0 :   TensorDim dim = m.getDim();
      84              :   // Assume the channel is 1. Time Dimension is h. It transpose [b, 1, h, w] to
      85              :   // [h, 1, b, w ] and nntrainer only support 1,2,3 transpose. So we do reshape
      86              :   // first to make [1, b,h, w]
      87              :   // TODO:
      88              :   // If we do {1, dim[0]*dim[1], dim[2], dim[3]} and transpose to {1, dim[2],
      89              :   // dim[0]*dim[1], dim[3]}. Then reshpae to {dim[2], dim[0], dim[1], dim[3]}
      90              :   // then we could support the case which dim[1] is not 1. But we need to change
      91              :   // some other places of code to support.
      92              :   //
      93            0 :   if (dim[1] != 1)
      94              :     throw std::invalid_argument(
      95            0 :       "Channel of Time distributed layer must be 1 for now");
      96              : 
      97            0 :   m.reshape({dim[1], dim[0], dim[2], dim[3]});
      98            0 :   Tensor in = m.transpose("1:0:2");
      99            0 :   in.reshape({dim[2], dim[1], dim[0], dim[3]});
     100            0 :   m.reshape(dim);
     101            0 :   in.setName(m.getName() + "_trans");
     102              : 
     103            0 :   return in;
     104            0 : }
     105              : 
     106            2 : void TimeDistLayer::finalize(InitLayerContext &context) {
     107            2 :   NNTR_THROW_IF(context.getNumInputs() != 1, std::invalid_argument)
     108              :     << "Time distributed layer takes only one input";
     109              : 
     110            2 :   if (!dist_layer) {
     111            0 :     throw std::invalid_argument("distributed layer is not set properly");
     112              :   }
     113              : 
     114              :   const TensorDim &input_dim = context.getInputDimensions()[0];
     115            2 :   if (input_dim.channel() != 1) {
     116              :     throw std::invalid_argument(
     117            0 :       "only 1 channel is allow for time distributed layer");
     118              :   }
     119              : 
     120              :   /**
     121              :    * simulate an InitLayerContext, and then replicate its effect onto the
     122              :    * actual context
     123              :    */
     124            2 :   TensorDim dist_dim = input_dim;
     125            2 :   dist_dim.height(1);
     126              :   InitLayerContext dist_context({dist_dim}, {}, context.getInPlace(),
     127            2 :                                 context.getName());
     128              : 
     129              :   // During forwarding and backwarding, it set the input and output buffer of
     130              :   // dist_layer properly
     131              :   // dist_layer will use forwarding_with_val and backwarding_with_val
     132            2 :   dist_layer->finalize(dist_context);
     133              : 
     134            2 :   TensorDim output_dim = dist_context.getOutSpecs()[0].variable_spec.dim;
     135              :   // input_dim.height is number of time iteration
     136            2 :   output_dim.height(input_dim.height());
     137            2 :   context.setOutputDimensions({output_dim});
     138              : 
     139              :   /** real setting of context */
     140            2 :   fillLayerInitContext(context, dist_context);
     141            6 : }
     142              : 
     143            0 : void TimeDistLayer::fillWeightsFromContext(RunLayerContext &context) {
     144            0 :   weights_wrapper.resize(context.getNumWeights());
     145              : 
     146              :   /** create weights */
     147            0 :   for (unsigned int idx = 0; idx < context.getNumWeights(); idx++) {
     148            0 :     if (context.weightHasGradient(idx)) {
     149            0 :       weights_wrapper[idx] =
     150            0 :         Weight(context.getWeight(idx), context.getWeightGrad(idx),
     151            0 :                context.getWeightName(idx));
     152              :     } else {
     153            0 :       weights_wrapper[idx] =
     154            0 :         Weight(context.getWeight(idx), Tensor(), context.getWeightName(idx));
     155              :     }
     156              :   }
     157            0 : }
     158              : 
     159            0 : void TimeDistLayer::fillTensorsFromContext(RunLayerContext &context) {
     160            0 :   tensors_wrapper.resize(context.getNumTensors());
     161              : 
     162              :   /** create tensors */
     163            0 :   for (unsigned int idx = 0; idx < context.getNumTensors(); idx++) {
     164            0 :     if (context.tensorHasGradient(idx)) {
     165            0 :       tensors_wrapper[idx] =
     166            0 :         Var_Grad(context.getTensor(idx), context.getTensorGrad(idx),
     167            0 :                  context.getTensorName(idx));
     168              :     } else {
     169            0 :       tensors_wrapper[idx] =
     170            0 :         Var_Grad(context.getTensor(idx), Tensor(), context.getTensorName(idx));
     171              :     }
     172              :   }
     173            0 : }
     174              : 
     175            0 : std::vector<Weight *> TimeDistLayer::getWeightsForContext() {
     176              :   /** create weights for context */
     177              :   std::vector<Weight *> weights_for_context;
     178            0 :   for (auto &w : weights_wrapper)
     179            0 :     weights_for_context.push_back(&w);
     180              : 
     181            0 :   return weights_for_context;
     182            0 : }
     183              : 
     184            0 : std::vector<Var_Grad *> TimeDistLayer::getTensorsForContext() {
     185              :   /** create tensors for context */
     186              :   std::vector<Var_Grad *> tensors_for_context;
     187            0 :   for (auto &t : tensors_wrapper)
     188            0 :     tensors_for_context.push_back(&t);
     189              : 
     190            0 :   return tensors_for_context;
     191            0 : }
     192              : 
     193            0 : void TimeDistLayer::forwarding(RunLayerContext &context, bool training) {
     194            0 :   setPosition(context);
     195              : 
     196            0 :   Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
     197            0 :   Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
     198              :   // input_.dim = [ b, 1, h, w ]
     199              : 
     200            0 :   Tensor h_g;
     201              : 
     202            0 :   const TensorDim &ho_dim = hidden_.getDim();
     203            0 :   const TensorDim &in_dim = input_.getDim();
     204              : 
     205              :   // TODO: This transposed Input Tensor could be resued for backwarding
     206            0 :   Tensor in = transposeTensor(input_);
     207              : 
     208            0 :   Tensor out = Tensor({ho_dim[2], 1, ho_dim[0], ho_dim[3]}, true,
     209            0 :                       Initializer::NONE, context.getName() + ":inter_output");
     210              : 
     211            0 :   TensorDim i_dim = in_dim;
     212            0 :   i_dim.channel(1);
     213            0 :   i_dim.height(1);
     214              : 
     215            0 :   TensorDim h_dim = ho_dim;
     216            0 :   h_dim.channel(1);
     217            0 :   h_dim.height(1);
     218              : 
     219            0 :   if (dist_layer->requireLabel() &&
     220            0 :       context.isLabelAvailable(SINGLE_INOUT_IDX)) {
     221            0 :     Tensor &hidden_g = context.getLabel(SINGLE_INOUT_IDX);
     222            0 :     h_g = transposeTensor(hidden_g);
     223              :   }
     224              : 
     225            0 :   Var_Grad in_var(i_dim, Initializer::NONE, false, false, "input");
     226              :   Var_Grad out_var(h_dim, Initializer::NONE,
     227            0 :                    dist_layer->requireLabel() &&
     228            0 :                      context.isLabelAvailable(SINGLE_INOUT_IDX),
     229            0 :                    false, "output");
     230              : 
     231            0 :   fillWeightsFromContext(context);
     232            0 :   fillTensorsFromContext(context);
     233              : 
     234            0 :   for (unsigned int i = 0; i < in_dim.height(); ++i) {
     235              :     //
     236              :     // Iterate Height Direction. The dimension of in is input_[ b, 1, 1, width
     237              :     // ]. The dimension of out is hidden_[ b, 1, 1, width ]
     238              :     //
     239            0 :     Tensor label_iter;
     240              : 
     241              :     Tensor in_iter = in.getSharedDataTensor(
     242            0 :       i_dim, i * in_dim.batch() * in_dim.width(), true, in.getName());
     243              :     Tensor out_iter = out.getSharedDataTensor(
     244            0 :       h_dim, i * ho_dim.batch() * ho_dim.width(), true, out.getName());
     245              : 
     246            0 :     in_var.initializeVariable(in_iter);
     247            0 :     out_var.initializeVariable(out_iter);
     248              : 
     249            0 :     if (dist_layer->requireLabel() &&
     250            0 :         context.isLabelAvailable(SINGLE_INOUT_IDX)) {
     251            0 :       label_iter = h_g.getSharedDataTensor(
     252            0 :         h_dim, i * ho_dim.batch() * ho_dim.width(), true, h_g.getName());
     253            0 :       out_var.initializeGradient(label_iter);
     254              :     }
     255              : 
     256              :     RunLayerContext dist_context(
     257              :       context.getName(), context.getTrainable(), context.getLoss(),
     258            0 :       context.getInPlace(), context.getLossScale(), context.getContextData(),
     259            0 :       false, getWeightsForContext(), {&in_var}, {&out_var},
     260            0 :       getTensorsForContext());
     261              : 
     262            0 :     dist_layer->forwarding(dist_context, training);
     263            0 :   }
     264              : 
     265            0 :   hidden_.copy(transposeTensor(out));
     266            0 :   clearFromContext();
     267            0 : }
     268              : 
     269            0 : void TimeDistLayer::calcDerivative(RunLayerContext &context) {
     270              :   /// @fixme: this will be probably wrong as this mutates incoming derivative,
     271              :   /// we will need the layer to copy and paste instead of transpose and override
     272            0 :   Tensor &derivative_ = context.getOutputGradUnsafe(SINGLE_INOUT_IDX);
     273            0 :   Tensor &hval_ = context.getOutput(SINGLE_INOUT_IDX);
     274            0 :   Tensor &ret_ = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
     275            0 :   Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
     276              : 
     277            0 :   TensorDim der_dim = derivative_.getDim();
     278            0 :   TensorDim ret_dim = ret_.getDim();
     279              : 
     280            0 :   TensorDim r_dim = {ret_dim[2], 1, 1, ret_dim[3]};
     281            0 :   TensorDim d_dim = {der_dim[2], 1, 1, der_dim[3]};
     282              : 
     283            0 :   Var_Grad in_var(r_dim, Initializer::NONE, true, false, "input");
     284            0 :   Var_Grad out_var(d_dim, Initializer::NONE, true, false, "output");
     285              : 
     286            0 :   fillWeightsFromContext(context);
     287            0 :   fillTensorsFromContext(context);
     288              : 
     289            0 :   for (unsigned int i = 0; i < der_dim[0]; ++i) {
     290              :     Tensor ret_iter = ret_.getSharedDataTensor(
     291            0 :       r_dim, i * r_dim.batch() * r_dim.width(), true, ret_.getName());
     292              :     Tensor in_iter = input_.getSharedDataTensor(
     293            0 :       r_dim, i * r_dim.batch() * r_dim.width(), true, input_.getName());
     294              :     Tensor d_iter = derivative_.getSharedDataTensor(
     295            0 :       d_dim, i * d_dim.batch() * d_dim.width(), true, derivative_.getName());
     296              :     Tensor hval_iter = hval_.getSharedDataTensor(
     297            0 :       d_dim, i * d_dim.batch() * d_dim.width(), true, hval_.getName());
     298              : 
     299            0 :     in_var.initializeGradient(ret_iter);
     300            0 :     in_var.initializeVariable(in_iter);
     301            0 :     out_var.initializeGradient(d_iter);
     302            0 :     out_var.initializeVariable(hval_iter);
     303              : 
     304              :     RunLayerContext dist_context(
     305              :       context.getName(), context.getTrainable(), context.getLoss(),
     306            0 :       context.getInPlace(), context.getLossScale(), context.getContextData(),
     307            0 :       false, getWeightsForContext(), {&in_var}, {&out_var},
     308            0 :       getTensorsForContext());
     309              : 
     310            0 :     dist_layer->calcDerivative(dist_context);
     311            0 :   }
     312              : 
     313            0 :   ret_.copy(transposeTensor(ret_));
     314              :   // We are not going to transpose the data. The Date is not used anymore.
     315              :   // It will be overwritten at next iteration
     316              :   // Just reshpae the tensors
     317            0 :   hval_.reshape({der_dim[2], 1, der_dim[0], der_dim[3]});
     318            0 :   derivative_.reshape({der_dim[2], 1, der_dim[0], der_dim[3]});
     319            0 :   input_.reshape({ret_dim[2], 1, ret_dim[0], ret_dim[3]});
     320            0 :   clearFromContext();
     321            0 : }
     322              : 
     323            0 : void TimeDistLayer::calcGradient(RunLayerContext &context) {
     324              :   // Even if the dist_layer->getNumWeights() == 0, We do transpose here
     325              :   // for the calculation of derivatives and overwrite original tensors.
     326              :   // And use them in calcDerivatives() without transpose.
     327            0 :   transposeInOut(context);
     328              : 
     329            0 :   if (context.getNumWeights() == 0)
     330            0 :     return;
     331              : 
     332            0 :   Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
     333            0 :   const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
     334              : 
     335            0 :   TensorDim der_dim = derivative_.getDim();
     336            0 :   TensorDim in_dim = input_.getDim();
     337              : 
     338            0 :   TensorDim i_dim = {in_dim[2], 1, 1, in_dim[3]};
     339            0 :   TensorDim d_dim = {der_dim[2], 1, 1, der_dim[3]};
     340              : 
     341            0 :   fillWeightsFromContext(context);
     342            0 :   fillTensorsFromContext(context);
     343              : 
     344            0 :   for (unsigned int i = 0; i < der_dim[0]; ++i) {
     345              :     Tensor in_iter = input_.getSharedDataTensor(
     346            0 :       i_dim, i * i_dim.batch() * i_dim.width(), true, input_.getName());
     347              :     Tensor d_iter = derivative_.getSharedDataTensor(
     348            0 :       d_dim, i * d_dim.batch() * d_dim.width(), true, derivative_.getName());
     349              : 
     350            0 :     Var_Grad in_var(i_dim, Initializer::NONE, true, false, "input");
     351            0 :     Var_Grad out_var(d_dim, Initializer::NONE, true, false, "output");
     352              : 
     353            0 :     in_var.initializeVariable(in_iter);
     354            0 :     out_var.initializeGradient(d_iter);
     355              : 
     356              :     RunLayerContext dist_context(
     357              :       context.getName(), context.getTrainable(), context.getLoss(),
     358            0 :       context.getInPlace(), context.getLossScale(), context.getContextData(),
     359            0 :       false, getWeightsForContext(), {&in_var}, {&out_var},
     360            0 :       getTensorsForContext());
     361              : 
     362            0 :     dist_layer->calcGradient(dist_context);
     363            0 :   }
     364            0 :   clearFromContext();
     365            0 : }
     366              : 
     367            2 : void TimeDistLayer::fillLayerInitContext(InitLayerContext &context,
     368              :                                          const InitLayerContext &dist_context) {
     369              :   /** real set the input flags */
     370              :   auto const &input_dims = context.getInputDimensions();
     371            4 :   for (unsigned int idx = 0; idx < dist_context.getNumInputs(); idx++) {
     372            2 :     context.setDynDimFlagInputDimension(idx, input_dims[idx].getDynDimFlag());
     373            2 :     context.setEffDimFlagInputDimension(idx, input_dims[idx].getEffDimFlag());
     374              :   }
     375              : 
     376              :   /** real request of tensors */
     377            2 :   for (auto const &ts : dist_context.getTensorsSpec())
     378              :     context.requestTensor(ts);
     379              : 
     380              :   /** real request of weights */
     381            4 :   for (auto const &ws : dist_context.getWeightsSpec())
     382              :     context.requestWeight(ws);
     383            2 : }
     384              : 
     385            0 : void TimeDistLayer::setBatch(RunLayerContext &context, unsigned int batch) {
     386            0 :   if (context.getNumTensors() > 0) {
     387            0 :     const TensorDim &out_dim = context.getOutput(SINGLE_INOUT_IDX).getDim();
     388            0 :     const TensorDim &in_dim = context.getInput(SINGLE_INOUT_IDX).getDim();
     389              : 
     390            0 :     TensorDim i_dim = {in_dim[2], 1, 1, in_dim[3]};
     391            0 :     TensorDim o_dim = {out_dim[2], 1, 1, out_dim[3]};
     392              : 
     393            0 :     Var_Grad in_var(i_dim, Initializer::NONE, true, false, "input");
     394            0 :     Var_Grad out_var(o_dim, Initializer::NONE, true, false, "output");
     395              : 
     396            0 :     fillWeightsFromContext(context);
     397            0 :     fillTensorsFromContext(context);
     398              : 
     399              :     RunLayerContext dist_context(
     400              :       context.getName(), context.getTrainable(), context.getLoss(),
     401            0 :       context.getInPlace(), context.getLossScale(), context.getContextData(),
     402            0 :       false, getWeightsForContext(), {&in_var}, {&out_var},
     403            0 :       getTensorsForContext());
     404              : 
     405            0 :     dist_layer->setBatch(dist_context, batch);
     406              : 
     407            0 :     for (unsigned int idx = 0; idx < dist_context.getNumTensors(); idx++) {
     408            0 :       context.updateTensor(idx, dist_context.getTensor(idx).getDim().batch());
     409              :     }
     410              : 
     411            0 :     clearFromContext();
     412            0 :   }
     413            0 : }
     414              : 
     415              : } /* namespace nntrainer */
        

Generated by: LCOV version 2.0-1