LCOV - code coverage report
Current view: top level - nntrainer/layers - concat_layer.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 77.3 % 110 85
Test Date: 2025-12-14 20:38:17 Functions: 85.7 % 7 6

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
       4              :  *
       5              :  * @file   concat_layer.cpp
       6              :  * @date   27 Oct 2020
       7              :  * @see    https://github.com/nnstreamer/nntrainer
       8              :  * @author Jijoong Moon <jijoong.moon@samsung.com>
       9              :  * @author Donghyeon Jeong <dhyeon.jeong@samsung.com>
      10              :  * @bug    No known bugs except for NYI items
      11              :  * @brief  This is Concat Layer Class for Neural Network
      12              :  *
      13              :  * @todo merge concat and split layer to a common implementation
      14              :  */
      15              : 
      16              : #include <cstring>
      17              : #include <vector>
      18              : 
      19              : #include <concat_layer.h>
      20              : #include <layer_context.h>
      21              : #include <nntr_threads.h>
      22              : #include <nntrainer_error.h>
      23              : #include <nntrainer_log.h>
      24              : #include <node_exporter.h>
      25              : #include <tensor_dim.h>
      26              : #include <util_func.h>
      27              : 
      28              : namespace nntrainer {
      29          132 : ConcatLayer::ConcatLayer() : Layer(), leading_helper_dim(1) {}
      30              : 
      31              : static constexpr size_t SINGLE_INOUT_IDX = 0;
      32              : 
      33          108 : void ConcatLayer::finalize(InitLayerContext &context) {
      34              :   auto &concat_dimension_prop = std::get<props::ConcatDimension>(concat_props);
      35              :   /** for backward compatibility, default concat dimension will be channel */
      36              :   /// @todo this is hacky way to force concat dimension to width if channel
      37              :   /// dimension is taken, this is because recurrent realizer, return sequence
      38              :   /// exploits concat layer but have no control over where to stack/axis
      39              :   unsigned int concat_dimension =
      40          108 :     context.getInputDimensions().front().channel() > 1 ? 3 : 1;
      41          108 :   if (!concat_dimension_prop.empty())
      42            3 :     concat_dimension = concat_dimension_prop.get();
      43              : 
      44              :   /**
      45              :    * The concat is only done along the axis dimension.
      46              :    * For example, consider 2 inputs a, b with dimensions [b,c,h,w] each
      47              :    * 1. concat_dimension = 1, output_dim = [b,c_a+c_b,h,w]
      48              :    * 2. concat_dimension = 2, output_dim = [b,c,h_a+h_b,w]
      49              :    * 3. concat_dimension = 3, output_dim = [b,c,h,w_a+w_b]
      50              :    */
      51              :   auto const &input_dims = context.getInputDimensions();
      52              :   const TensorDim &input_dim_0 = input_dims[SINGLE_INOUT_IDX];
      53          108 :   unsigned int concat_dim_val = input_dim_0.getTensorDim(concat_dimension);
      54              : 
      55          212 :   for (unsigned int idx = 1; idx < input_dims.size(); ++idx) {
      56              :     const TensorDim &dim = input_dims[idx];
      57              : 
      58          520 :     for (unsigned int i = 0; i < ml::train::TensorDim::getNumDim(); ++i) {
      59          416 :       if (i == concat_dimension)
      60          104 :         continue;
      61          312 :       NNTR_THROW_IF(input_dim_0[i] != dim[i], std::runtime_error)
      62              :         << "Error: concat layer requires same shape from all input layers "
      63              :            "along non-concat dimension";
      64              :     }
      65          104 :     concat_dim_val += dim[concat_dimension];
      66              :   }
      67              : 
      68          108 :   TensorDim output_dim = input_dim_0;
      69          108 :   output_dim.setTensorDim(concat_dimension, concat_dim_val);
      70              : 
      71          108 :   context.setOutputDimensions({output_dim});
      72              : 
      73              :   /**
      74              :    * The following helper shapes facilitate efficient concatenation and split of
      75              :    * the data.
      76              :    *
      77              :    * The helper shapes are created by consolidating all the dimensions before
      78              :    * the concat dimension to the first axis and all the remaining dimensions to
      79              :    * the last axis.
      80              :    *
      81              :    * @note This is possible since the data starting from the concat dimension to
      82              :    * the end is always continuous.
      83              :    *
      84              :    * @example the following shows how the helper dimension will look with given
      85              :    * inputs and concat dimension.
      86              :    *
      87              :    *          | cat_dim 1 | cat_dim 2 | cat_dim 3
      88              :    *  --------|-----------|-----------|-----------
      89              :    *  input0  |  2:1:2:3  |  1:2:1:3  |  1:2:2:3
      90              :    *  input1  |  2:3:2:3  |  1:2:3:3  |  1:2:2:1
      91              :    *  --------|-----------|-----------|-----------
      92              :    *  helper0 |  2:1:1:6  |  2:1:1:3  |  4:1:1:3
      93              :    *  helper1 |  2:1:1:18 |  2:1:1:9  |  4:1:1:1
      94              :    *
      95              :    */
      96              :   /// Setup output_reshape_helper (how output should be reshaped)
      97          108 :   output_reshape_helper.channel(1);
      98          108 :   output_reshape_helper.height(1);
      99          108 :   output_reshape_helper.width(1);
     100          321 :   for (unsigned int axis = concat_dimension;
     101          429 :        axis < ml::train::TensorDim::getNumDim(); ++axis) {
     102          642 :     output_reshape_helper.width(output_reshape_helper.width() *
     103          321 :                                 output_dim.getTensorDim(axis));
     104              :   }
     105              : 
     106              :   /// Setup input_reshape_helper (how inputs should be reshaped)
     107          108 :   input_reshape_helper.resize(input_dims.size());
     108              : 
     109          320 :   for (unsigned int idx = 0; idx < input_reshape_helper.size(); idx++) {
     110          212 :     input_reshape_helper[idx].channel(1);
     111          212 :     input_reshape_helper[idx].height(1);
     112          212 :     input_reshape_helper[idx].width(1);
     113              : 
     114          630 :     for (unsigned int axis = concat_dimension;
     115          842 :          axis < ml::train::TensorDim::getNumDim(); ++axis) {
     116              : 
     117         1260 :       input_reshape_helper[idx].width(input_reshape_helper[idx].width() *
     118          630 :                                       input_dims[idx].getTensorDim(axis));
     119              :     }
     120              :   }
     121              : 
     122          108 :   leading_helper_dim = 1;
     123          111 :   for (unsigned int idx = 1; idx < concat_dimension; ++idx) {
     124            3 :     leading_helper_dim *= output_dim.getTensorDim(idx);
     125              :   }
     126              : 
     127          108 :   setBatch(input_dims[SINGLE_INOUT_IDX].batch());
     128          108 : }
     129              : 
     130          365 : void ConcatLayer::forwarding(RunLayerContext &context, bool training) {
     131              :   /**
     132              :    * Forwarding in ConcatLayer works as follows
     133              :    *
     134              :    *    in1        in2       in3                  output
     135              :    * |---0---| |----3----| |--6--|      |---0---||----3----||--6--|
     136              :    * |---1---| |----4----| |--7--|  =>  |---1---||----4----||--7--|
     137              :    * |---2---| |----5----| |--8--|      |---2---||----5----||--8--|
     138              :    *
     139              :    * @note For each input tensor, it iterates batches and copies the entire
     140              :    * width size to the corresponding output position. In the diagram above, the
     141              :    * row would be a batch, and the column would be a width. the number of each
     142              :    * block in the diagram indicates the order of copy to output.
     143              :    *
     144              :    * @todo avoid copy by creating input here as a shared_tensor of the output
     145              :    * here and then this layer can be in_place as well
     146              :    */
     147          365 :   Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
     148              : 
     149          365 :   const TensorDim out_dim = output.getDim();
     150          365 :   output.reshape(output_reshape_helper);
     151              :   unsigned int output_width_offset = 0;
     152          365 :   TensorDim::TensorType tensor_type = output.getTensorType();
     153              : 
     154         1095 :   for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
     155          730 :     Tensor &input = context.getInput(idx);
     156          730 :     const TensorDim in_dim = input.getDim();
     157          730 :     auto const &irh = input_reshape_helper[idx];
     158          730 :     input.reshape(irh);
     159          730 :     unsigned int data_copy_size = irh.width();
     160              : 
     161              :     /** loop over the dimensions before the concat dimension */
     162          730 :     if (in_dim.getDataType() == TensorDim::DataType::FP32) {
     163              :       /** copy continous tensor data (reshaped width) */
     164         9458 :       for (unsigned int batch = 0; batch < output.batch(); batch++) {
     165              :         Tensor dest_tensor = Tensor::Map<float>(
     166              :           output.getAddress<float>(batch, 0, 0, output_width_offset),
     167              :           data_copy_size * sizeof(float),
     168         8728 :           {1, 1, 1, data_copy_size, tensor_type});
     169              :         const Tensor source_tensor =
     170              :           Tensor::Map<float>(input.getAddress<float>(batch, 0, 0, 0),
     171              :                              data_copy_size * sizeof(float),
     172         8728 :                              {1, 1, 1, data_copy_size, tensor_type});
     173         8728 :         dest_tensor.copy(source_tensor);
     174         8728 :       }
     175            0 :     } else if (in_dim.getDataType() == TensorDim::DataType::FP16) {
     176              : #ifdef ENABLE_FP16
     177              :       /** copy continous tensor data (reshaped width) */
     178              :       for (unsigned int batch = 0; batch < output.batch(); batch++) {
     179              :         Tensor dest_tensor = Tensor::Map<_FP16>(
     180              :           output.getAddress<_FP16>(batch, 0, 0, output_width_offset),
     181              :           data_copy_size * sizeof(_FP16),
     182              :           {1, 1, 1, data_copy_size, tensor_type});
     183              :         const Tensor source_tensor =
     184              :           Tensor::Map<_FP16>(input.getAddress<_FP16>(batch, 0, 0, 0),
     185              :                              data_copy_size * sizeof(_FP16),
     186              :                              {1, 1, 1, data_copy_size, tensor_type});
     187              :         dest_tensor.copy(source_tensor);
     188              :       }
     189              : #else
     190            0 :       throw std::invalid_argument("Error: enable-fp16 is not enabled");
     191              : #endif
     192              :     }
     193              : 
     194          730 :     output_width_offset += irh.width();
     195          730 :     input.reshape(in_dim);
     196              :   }
     197              : 
     198          365 :   output.reshape(out_dim);
     199          365 : }
     200              : 
     201            0 : void ConcatLayer::incremental_forwarding(RunLayerContext &context,
     202              :                                          unsigned int from, unsigned int to,
     203              :                                          bool training) {
     204              :   /**
     205              :    * @todo avoid copy by creating input here as a shared_tensor of the output
     206              :    * here and then this layer can be in_place as well
     207              :    */
     208            0 :   Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
     209              : 
     210            0 :   const TensorDim out_dim = output.getDim();
     211            0 :   output.reshape(output_reshape_helper);
     212              :   unsigned int output_height_offset = 0;
     213            0 :   unsigned int data_copy_size = output_reshape_helper.width();
     214              : 
     215              :   // @todo: this implementation is only works when axis is 3(width). Consider
     216              :   // for other axes
     217            0 :   unsigned int batch_channel = out_dim.batch() * out_dim.channel();
     218              : 
     219            0 :   for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
     220            0 :     Tensor &input = context.getInput(idx);
     221            0 :     const TensorDim in_dim = input.getDim();
     222            0 :     auto const &irh = input_reshape_helper[idx];
     223            0 :     input.reshape(irh);
     224              : 
     225              :     /** loop over the dimensions before the concat dimension */
     226            0 :     for (unsigned int batch = batch_channel * from; batch < batch_channel * to;
     227              :          batch++) {
     228              :       /** loop over the concat dimension itself */
     229            0 :       for (unsigned int count = 0; count < irh.height(); count++) {
     230              :         Tensor dest_tensor = Tensor::Map(
     231              :           output.getAddress(batch, 0, output_height_offset + count, 0),
     232            0 :           data_copy_size * sizeof(float), {1, 1, 1, data_copy_size});
     233              :         const Tensor source_tensor = Tensor::Map(
     234              :           input.getAddress(batch, 0, count, 0), data_copy_size * sizeof(float),
     235            0 :           {1, 1, 1, data_copy_size});
     236            0 :         dest_tensor.copy(source_tensor);
     237            0 :       }
     238              :     }
     239              : 
     240            0 :     input.reshape(in_dim);
     241            0 :     output_height_offset += irh.height();
     242              :   }
     243              : 
     244            0 :   output.reshape(out_dim);
     245            0 : }
     246              : 
     247          178 : void ConcatLayer::calcDerivative(RunLayerContext &context) {
     248              :   /**
     249              :    * calcDerivative in ConcatLayer works as follows
     250              :    *
     251              :    *           output                    in1        in2       in3
     252              :    * |---0---||----3----||--6--|      |---0---| |----3----| |--6--|
     253              :    * |---1---||----4----||--7--|  =>  |---1---| |----4----| |--7--|
     254              :    * |---2---||----5----||--8--|      |---2---| |----5----| |--8--|
     255              :    *
     256              :    * @note For each input tensor, it iterates batches and copies the entire
     257              :    * input width size from the output tensor to the corresponding input. In the
     258              :    * diagram above, the row would be a batch, and the column would be a width.
     259              :    * The number of each block in the diagram indicates the order of copy to
     260              :    * inputs.
     261              :    *
     262              :    * @todo avoid copy by creating input here as a shared_tensor of the output
     263              :    * here and then this layer can be in_place as well
     264              :    */
     265          178 :   Tensor output = context.getIncomingDerivative(SINGLE_INOUT_IDX);
     266              : 
     267          178 :   output.reshape(output_reshape_helper);
     268              :   unsigned int output_width_offset = 0;
     269          178 :   TensorDim::TensorType tensor_type = output.getTensorType();
     270              : 
     271          534 :   for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
     272          356 :     Tensor &input = context.getOutgoingDerivative(idx);
     273          356 :     const TensorDim in_dim = input.getDim();
     274          356 :     auto const &irh = input_reshape_helper[idx];
     275          356 :     input.reshape(irh);
     276          356 :     unsigned int data_copy_size = irh.width();
     277              : 
     278          356 :     if (in_dim.getDataType() == TensorDim::DataType::FP32) {
     279              :       /** loop over the dimensions before the concat dimension */
     280         4642 :       for (unsigned int batch = 0; batch < output.batch(); batch++) {
     281              :         /** copy continous data (reshaped width size) in a tensor */
     282              :         const Tensor source_tensor = Tensor::Map<float>(
     283              :           output.getAddress<float>(batch, 0, 0, output_width_offset),
     284              :           data_copy_size * sizeof(float),
     285         4286 :           {1, 1, 1, data_copy_size, tensor_type});
     286              :         Tensor dest_tensor =
     287              :           Tensor::Map<float>(input.getAddress<float>(batch, 0, 0, 0),
     288              :                              data_copy_size * sizeof(float),
     289         4286 :                              {1, 1, 1, data_copy_size, tensor_type});
     290         4286 :         dest_tensor.copy(source_tensor);
     291         4286 :       }
     292            0 :     } else if (in_dim.getDataType() == TensorDim::DataType::FP16) {
     293              : #ifdef ENABLE_FP16
     294              :       /** loop over the dimensions before the concat dimension */
     295              :       for (unsigned int batch = 0; batch < output.batch(); batch++) {
     296              :         /** copy continous data (reshaped width size) in a tensor */
     297              :         const Tensor source_tensor = Tensor::Map<_FP16>(
     298              :           output.getAddress<_FP16>(batch, 0, 0, output_width_offset),
     299              :           data_copy_size * sizeof(_FP16),
     300              :           {1, 1, 1, data_copy_size, tensor_type});
     301              :         Tensor dest_tensor =
     302              :           Tensor::Map<_FP16>(input.getAddress<_FP16>(batch, 0, 0, 0),
     303              :                              data_copy_size * sizeof(_FP16),
     304              :                              {1, 1, 1, data_copy_size, tensor_type});
     305              :         dest_tensor.copy(source_tensor);
     306              :       }
     307              : #else
     308            0 :       throw std::invalid_argument("Error: enable-fp16 is not enabled");
     309              : #endif
     310              :     }
     311              : 
     312          356 :     input.reshape(in_dim);
     313          356 :     output_width_offset += irh.width();
     314              :   }
     315          178 : }
     316              : 
     317          612 : void ConcatLayer::setProperty(const std::vector<std::string> &values) {
     318          612 :   auto remain_props = loadProperties(values, concat_props);
     319          611 :   NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
     320            2 :     << "[ConcatLayer] Unknown Layer Properties count " +
     321            4 :          std::to_string(values.size());
     322          611 : }
     323              : 
     324           58 : void ConcatLayer::exportTo(Exporter &exporter,
     325              :                            const ml::train::ExportMethods &method) const {
     326              :   Layer::exportTo(exporter, method);
     327           58 :   exporter.saveResult(concat_props, method, this);
     328           58 : }
     329              : 
     330              : } /* namespace nntrainer */
        

Generated by: LCOV version 2.0-1