LCOV - code coverage report
Current view: top level - nntrainer/layers - fc_layer.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 44.2 % 172 76
Test Date: 2025-12-14 20:38:17 Functions: 80.0 % 10 8

            Line data    Source code
       1              : /**
       2              :  * Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved.
       3              :  *
       4              :  * Licensed under the Apache License, Version 2.0 (the "License");
       5              :  * you may not use this file except in compliance with the License.
       6              :  * You may obtain a copy of the License at
       7              :  *   http://www.apache.org/licenses/LICENSE-2.0
       8              :  * Unless required by applicable law or agreed to in writing, software
       9              :  * distributed under the License is distributed on an "AS IS" BASIS,
      10              :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      11              :  * See the License for the specific language governing permissions and
      12              :  * limitations under the License.
      13              :  *
      14              :  *
      15              :  * @file        fc_layer.cpp
      16              :  * @date        14 May 2020
      17              :  * @brief       This is Fully Connected Layer Class for Neural Network
      18              :  * @see         https://github.com/nnstreamer/nntrainer
      19              :  * @author      Jijoong Moon <jijoong.moon@samsung.com>
      20              :  * @bug         No known bugs except for NYI items
      21              :  *
      22              :  */
      23              : 
      24              : #include <common_properties.h>
      25              : #include <fc_layer.h>
      26              : #include <layer_context.h>
      27              : #include <lazy_tensor.h>
      28              : #include <nntrainer_error.h>
      29              : #include <nntrainer_log.h>
      30              : #include <node_exporter.h>
      31              : #include <util_func.h>
      32              : 
      33              : #include <iostream>
      34              : 
      35              : namespace nntrainer {
      36              : 
      37              : static constexpr size_t SINGLE_INOUT_IDX = 0;
      38              : 
      39              : enum FCParams { weight, bias };
      40              : enum LORAParams { loraA, loraB, loraTmp, loraOut };
      41              : 
      42          943 : FullyConnectedLayer::FullyConnectedLayer() :
      43              :   LayerImpl(),
      44          943 :   lora_scaling(1.0f),
      45          943 :   fc_props(props::Unit(), props::LoraRank(), props::LoraAlpha()),
      46          943 :   quantizer(nullptr) {
      47              :   weight_idx.fill(std::numeric_limits<unsigned>::max());
      48              :   lora_idx.fill(std::numeric_limits<unsigned>::max());
      49          943 : }
      50              : 
      51          652 : void FullyConnectedLayer::finalize(InitLayerContext &context) {
      52              :   auto &weight_regularizer =
      53              :     std::get<props::WeightRegularizer>(*layer_impl_props);
      54              :   auto &weight_regularizer_constant =
      55              :     std::get<props::WeightRegularizerConstant>(*layer_impl_props);
      56              :   auto &weight_initializer =
      57              :     std::get<props::WeightInitializer>(*layer_impl_props);
      58              :   auto &weight_decay = std::get<props::WeightDecay>(*layer_impl_props);
      59              :   auto &bias_decay = std::get<props::BiasDecay>(*layer_impl_props);
      60              :   auto &bias_initializer = std::get<props::BiasInitializer>(*layer_impl_props);
      61              :   auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
      62              : 
      63          652 :   const auto &unit = std::get<props::Unit>(fc_props).get();
      64              :   const auto &lora_rank = (std::get<props::LoraRank>(fc_props).empty())
      65          652 :                             ? 0
      66            0 :                             : std::get<props::LoraRank>(fc_props).get();
      67          652 :   lora_scaling = (lora_rank && !std::get<props::LoraAlpha>(fc_props).empty())
      68            0 :                    ? (float)std::get<props::LoraAlpha>(fc_props) / lora_rank
      69              :                    : 1;
      70              : 
      71          652 :   NNTR_THROW_IF(context.getNumInputs() != 1, std::invalid_argument)
      72              :     << "Fully connected layer takes only one input";
      73              : 
      74          652 :   std::vector<TensorDim> output_dims(1);
      75              : 
      76              :   /// @todo fc actaully supports multidimensions. EffDimFlag shouldn't be fixed
      77              :   /// like this.
      78          652 :   context.setEffDimFlagInputDimension(0, 0b1001);
      79          652 :   context.setDynDimFlagInputDimension(0, 0b1000);
      80              : 
      81              :   bool is_nchw = (context.getFormat() == Tformat::NCHW);
      82              :   /** set output dimensions */
      83              :   auto const &in_dim = context.getInputDimensions()[0];
      84          652 :   output_dims[0] = in_dim;
      85          652 :   is_nchw ? output_dims[0].width(unit) : output_dims[0].channel(unit);
      86              : 
      87              :   output_dims[0].setTensorType(
      88              :     {context.getFormat(), context.getActivationDataType()});
      89              : 
      90          652 :   context.setOutputDimensions(output_dims);
      91              : 
      92              :   /** set weight specifications */
      93              :   // @todo : This NCHW format setting is just temporal, it needs to be set by
      94              :   // global configuration
      95              : 
      96              :   /** Bias Dimension : (1, 1, 1, unit) */
      97              :   /// @note bias is directly added to activation
      98              :   /// since we have no dequantizer for add operation,
      99              :   /// we have to set its data type as same as activation.
     100              :   /// This should be updated when the dequantizer is supported.
     101              :   TensorDim bias_dim(
     102          652 :     1, is_nchw ? 1 : unit, 1, is_nchw ? unit : 1,
     103              :     TensorDim::TensorType(context.getFormat(), context.getActivationDataType()),
     104         1304 :     is_nchw ? 0b0001 : 0b0100);
     105              : 
     106              :   /** Weight Dimension : (1, 1, in_dim.width(), unit)*/
     107              :   TensorDim weight_dim(
     108          649 :     1, is_nchw ? 1 : unit, is_nchw ? in_dim.width() : 1,
     109            3 :     is_nchw ? unit : in_dim.channel(),
     110              :     TensorDim::TensorType(context.getFormat(), context.getWeightDataType()),
     111         1956 :     is_nchw ? 0b0011 : 0b0101);
     112              : 
     113          652 :   weight_idx[FCParams::weight] = context.requestWeight(
     114              :     weight_dim, weight_initializer, weight_regularizer,
     115              :     weight_regularizer_constant, weight_decay, "weight", true);
     116              : 
     117          652 :   if (disable_bias.empty() || disable_bias.get() == false) {
     118          652 :     weight_idx[FCParams::bias] =
     119         1304 :       context.requestWeight(bias_dim, bias_initializer, WeightRegularizer::NONE,
     120              :                             1.0f, bias_decay, "bias", true);
     121              :   }
     122              : 
     123              :   /** create weights for LoRA */
     124          652 :   if (lora_rank) {
     125              : 
     126              :     /** loraA Dimension : (1, 1, in_dim.width, lora_rank) */
     127              :     TensorDim loraA_dim(
     128            0 :       1, is_nchw ? 1 : lora_rank, is_nchw ? in_dim.width() : 1,
     129            0 :       is_nchw ? lora_rank : in_dim.channel(),
     130              :       TensorDim::TensorType(context.getFormat(), context.getWeightDataType()),
     131            0 :       is_nchw ? 0b0011 : 0b0101);
     132              : 
     133              :     /** loraB Dimension : (1, 1, lora_rank, unit) */
     134              :     TensorDim loraB_dim(
     135            0 :       1, is_nchw ? 1 : unit, is_nchw ? lora_rank : 1,
     136            0 :       is_nchw ? unit : lora_rank,
     137              :       TensorDim::TensorType(context.getFormat(), context.getWeightDataType()),
     138            0 :       is_nchw ? 0b0011 : 0b0101);
     139              : 
     140              :     /** loraTmp Dimension : (B, 1, in_dim.height(), lora_rank) */
     141              :     TensorDim loraTmp_dim(
     142            0 :       in_dim.batch(), is_nchw ? 1 : lora_rank, is_nchw ? in_dim.height() : 1,
     143            0 :       is_nchw ? lora_rank : in_dim.width(),
     144              :       TensorDim::TensorType(context.getFormat(),
     145              :                             context.getActivationDataType()),
     146            0 :       is_nchw ? 0b1011 : 0b1101);
     147              : 
     148              :     /** loraTmp Dimension : (B, 1, in_dim.height(), unit) */
     149              :     TensorDim loraOut_dim(
     150            0 :       in_dim.batch(), is_nchw ? 1 : unit, is_nchw ? in_dim.height() : 1,
     151            0 :       is_nchw ? unit : in_dim.width(),
     152              :       TensorDim::TensorType(context.getFormat(),
     153              :                             context.getActivationDataType()),
     154            0 :       is_nchw ? 0b1011 : 0b1101);
     155              : 
     156            0 :     lora_idx[LORAParams::loraA] = context.requestWeight(
     157              :       loraA_dim, Initializer::ZEROS, weight_regularizer,
     158              :       weight_regularizer_constant, weight_decay, "loraA", true);
     159              : 
     160            0 :     lora_idx[LORAParams::loraB] = context.requestWeight(
     161              :       loraB_dim, Initializer::LECUN_NORMAL, weight_regularizer,
     162              :       weight_regularizer_constant, weight_decay, "loraB", true);
     163              : 
     164            0 :     lora_idx[LORAParams::loraTmp] =
     165            0 :       context.requestTensor(loraTmp_dim, "hidden_tmp_lora", Initializer::NONE,
     166              :                             true, TensorLifespan::FORWARD_GRAD_LIFESPAN);
     167              : 
     168            0 :     lora_idx[LORAParams::loraOut] =
     169            0 :       context.requestTensor(loraOut_dim, "hidden_lora", Initializer::NONE, true,
     170              :                             TensorLifespan::FORWARD_FUNC_LIFESPAN);
     171              :   }
     172              : 
     173              :   ///@todo this quantizaer should be moved to tensor, not layer!
     174          652 :   switch (context.getWeightDataType()) {
     175            0 :   case ml::train::TensorDim::DataType::QINT4:
     176              :   case ml::train::TensorDim::DataType::QINT8:
     177              :   case ml::train::TensorDim::DataType::QINT16:
     178              :     quantizer =
     179            0 :       Quantization::createQuantizer(nntrainer::QScheme::PER_TENSOR_AFFINE);
     180            0 :     break;
     181          652 :   default:
     182              :     quantizer = nullptr;
     183              :     break;
     184              :   }
     185          652 : }
     186              : 
     187          437 : void FullyConnectedLayer::exportTo(
     188              :   Exporter &exporter, const ml::train::ExportMethods &method) const {
     189          437 :   LayerImpl::exportTo(exporter, method);
     190          437 :   exporter.saveResult(fc_props, method, this);
     191          437 : }
     192              : 
     193         4510 : void FullyConnectedLayer::setProperty(const std::vector<std::string> &values) {
     194         4510 :   auto remain_props = loadProperties(values, fc_props);
     195         4509 :   LayerImpl::setProperty(remain_props);
     196         4509 : }
     197              : 
     198          523 : void FullyConnectedLayer::setBatch(nntrainer::RunLayerContext &context,
     199              :                                    unsigned int batch) {
     200          523 :   if (!std::get<props::LoraRank>(fc_props).empty()) {
     201              :     // update Lora Tensor's batch info.
     202            0 :     context.updateTensor(lora_idx[LORAParams::loraTmp], batch);
     203            0 :     context.updateTensor(lora_idx[LORAParams::loraOut], batch);
     204              :   }
     205          523 : }
     206              : 
     207         7177 : void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) {
     208         7177 :   Tensor &weight = context.getWeight(weight_idx[FCParams::weight]);
     209         7177 :   Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
     210         7177 :   Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
     211              : 
     212              :   ///@todo This dequantization action should be moved to tensor.dot()
     213         7177 :   if (quantizer != nullptr) {
     214            0 :     Tensor weight_ = quantizer->dequantize(weight, input_.getDataType());
     215            0 :     input_.dot(weight_, hidden_, false, false);
     216            0 :   } else {
     217         7177 :     input_.dot(weight, hidden_, false, false);
     218              :   }
     219              : 
     220         7177 :   if (!std::get<props::LoraRank>(fc_props).empty()) {
     221            0 :     Tensor &loraA = context.getWeight(lora_idx[LORAParams::loraA]);
     222            0 :     Tensor &loraB = context.getWeight(lora_idx[LORAParams::loraB]);
     223            0 :     Tensor &hidden_tmp_lora = context.getTensor(lora_idx[LORAParams::loraTmp]);
     224            0 :     Tensor &hidden_out_lora = context.getTensor(lora_idx[LORAParams::loraOut]);
     225              : 
     226            0 :     input_.dot(loraA, hidden_tmp_lora, false, false);
     227            0 :     hidden_tmp_lora.dot(loraB, hidden_out_lora, false, false);
     228            0 :     hidden_out_lora.multiply_i(lora_scaling);
     229            0 :     hidden_.add_i(hidden_out_lora);
     230              :   }
     231              : 
     232              :   if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
     233         7177 :       disable_bias.empty() || disable_bias.get() == false) {
     234         7177 :     Tensor &bias = context.getWeight(weight_idx[FCParams::bias]);
     235         7177 :     hidden_.add_i(bias);
     236              :   }
     237         7177 : }
     238              : 
     239            0 : void FullyConnectedLayer::incremental_forwarding(RunLayerContext &context,
     240              :                                                  unsigned int from,
     241              :                                                  unsigned int to,
     242              :                                                  bool training) {
     243            0 :   Tensor &weight = context.getWeight(weight_idx[FCParams::weight]);
     244            0 :   Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
     245            0 :   Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
     246            0 :   Tensor loraA, loraB, hidden_tmp_lora, hidden_out_lora;
     247              : 
     248            0 :   if (!std::get<props::LoraRank>(fc_props).empty()) {
     249            0 :     loraA = context.getWeight(lora_idx[LORAParams::loraA]);
     250            0 :     loraB = context.getWeight(lora_idx[LORAParams::loraB]);
     251            0 :     hidden_tmp_lora = context.getTensor(lora_idx[LORAParams::loraTmp]);
     252            0 :     hidden_out_lora = context.getTensor(lora_idx[LORAParams::loraOut]);
     253              :   }
     254              : 
     255            0 :   TensorDim input_dim = input_.getDim();
     256            0 :   TensorDim hidden_dim = hidden_.getDim();
     257              : 
     258            0 :   TensorDim input_step_dim = input_dim;
     259            0 :   TensorDim hidden_step_dim = hidden_dim;
     260              : 
     261            0 :   input_step_dim.batch(1);
     262            0 :   input_step_dim.height(to - from);
     263            0 :   hidden_step_dim.batch(1);
     264            0 :   hidden_step_dim.height(to - from);
     265              : 
     266              :   // @todo make it parallelized with batch axis
     267            0 :   for (unsigned int b = 0; b < hidden_.batch(); ++b) {
     268              :     Tensor input_step = input_.getSharedDataTensor(
     269            0 :       input_step_dim, b * hidden_dim.getFeatureLen(), true);
     270              :     Tensor hidden_step = hidden_.getSharedDataTensor(
     271            0 :       hidden_step_dim, b * hidden_dim.getFeatureLen(), true);
     272              : 
     273            0 :     input_step.dot(weight, hidden_step, false, false);
     274              : 
     275            0 :     if (!std::get<props::LoraRank>(fc_props).empty()) {
     276            0 :       nntrainer::TensorDim hidden_tmp_lora_step_dim = hidden_tmp_lora.getDim();
     277            0 :       hidden_tmp_lora_step_dim.batch(1);
     278            0 :       hidden_tmp_lora_step_dim.height(to - from);
     279            0 :       nntrainer::TensorDim hidden_out_lora_step_dim = hidden_out_lora.getDim();
     280            0 :       hidden_out_lora_step_dim.batch(1);
     281            0 :       hidden_out_lora_step_dim.height(to - from);
     282              : 
     283              :       nntrainer::Tensor hidden_tmp_lora_step =
     284              :         hidden_tmp_lora.getSharedDataTensor(
     285              :           hidden_tmp_lora_step_dim,
     286            0 :           b * hidden_tmp_lora.height() * hidden_tmp_lora.width(), true);
     287              :       nntrainer::Tensor hidden_out_lora_step =
     288              :         hidden_out_lora.getSharedDataTensor(
     289              :           hidden_out_lora_step_dim,
     290            0 :           b * hidden_out_lora.height() * hidden_out_lora.width(), true);
     291              : 
     292            0 :       input_step.dot(loraA, hidden_tmp_lora_step, false, false);
     293            0 :       hidden_tmp_lora_step.dot(loraB, hidden_out_lora_step, false, false);
     294            0 :       hidden_out_lora_step.multiply_i(lora_scaling);
     295            0 :       hidden_step.add_i(hidden_out_lora_step);
     296            0 :     }
     297              : 
     298              :     if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
     299            0 :         disable_bias.empty() || disable_bias.get() == false) {
     300            0 :       Tensor &bias = context.getWeight(weight_idx[FCParams::bias]);
     301            0 :       hidden_step.add_i(bias);
     302              :     }
     303            0 :   }
     304            0 : }
     305              : 
     306         1117 : void FullyConnectedLayer::calcDerivative(RunLayerContext &context) {
     307         1117 :   Tensor &weight = context.getWeight(weight_idx[FCParams::weight]);
     308              : 
     309         1117 :   const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
     310         1117 :   Tensor &ret_ = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
     311              : 
     312         1117 :   if (!std::get<props::LoraRank>(fc_props).empty()) {
     313            0 :     Tensor &lora_A = context.getWeight(lora_idx[LORAParams::loraA]);
     314            0 :     Tensor &lora_B = context.getWeight(lora_idx[LORAParams::loraB]);
     315            0 :     ret_.dot_deriv_wrt_1(weight.add(lora_A.dot(lora_B).multiply(lora_scaling)),
     316              :                          derivative_, false, false);
     317              :   } else {
     318         1117 :     ret_.dot_deriv_wrt_1(weight, derivative_, false, false);
     319              :   }
     320         1117 : }
     321              : 
     322         6296 : void FullyConnectedLayer::calcGradient(RunLayerContext &context) {
     323              : 
     324              :   /** (default) calcGradient - compute gradient of weight and bias */
     325         6296 :   if (std::get<props::LoraRank>(fc_props).empty()) {
     326         6296 :     Tensor &djdw = context.getWeightGrad(weight_idx[FCParams::weight]);
     327         6296 :     djdw.setZero();
     328              : 
     329         6296 :     const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
     330         6296 :     Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
     331              : 
     332              :     if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
     333         6296 :         disable_bias.empty() || disable_bias.get() == false) {
     334         6296 :       Tensor &djdb = context.getWeightGrad(weight_idx[FCParams::bias]);
     335         6296 :       djdb.setZero();
     336              : 
     337         6296 :       if (context.isGradientFirstAccess(weight_idx[FCParams::bias])) {
     338         6286 :         derivative_.sum({0, 1, 2}, djdb);
     339              :       } else {
     340              :         /// @todo optimize below by adding beta to Tensor::sum
     341           10 :         Tensor t = derivative_.sum({0, 1, 2});
     342           10 :         djdb.add_i(t);
     343           10 :       }
     344              :     }
     345              : 
     346         6296 :     input_.dot_deriv_wrt_2(
     347              :       djdw, derivative_, false, false,
     348         6296 :       !context.isGradientFirstAccess(weight_idx[FCParams::weight]));
     349         6296 :   } else {
     350              :     /** (lora) calcGradient - compute gradients of LoRA params only */
     351            0 :     Tensor &djdla = context.getWeightGrad(lora_idx[LORAParams::loraA]);
     352            0 :     Tensor &djdlb = context.getWeightGrad(lora_idx[LORAParams::loraB]);
     353            0 :     Tensor &djdtmp = context.getTensorGrad(lora_idx[LORAParams::loraTmp]);
     354              : 
     355            0 :     const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
     356            0 :     Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
     357            0 :     Tensor &loraA = context.getWeight(lora_idx[LORAParams::loraA]);
     358            0 :     Tensor &loraB = context.getWeight(lora_idx[LORAParams::loraB]);
     359            0 :     Tensor &loraTmp = context.getTensor(lora_idx[LORAParams::loraTmp]);
     360            0 :     const auto &lora_derivative_ = derivative_.multiply(lora_scaling);
     361              : 
     362            0 :     loraTmp.dot_deriv_wrt_2(
     363              :       djdlb, lora_derivative_, false, false,
     364            0 :       !context.isGradientFirstAccess(lora_idx[LORAParams::loraB]));
     365            0 :     djdtmp.dot_deriv_wrt_1(
     366              :       loraB, lora_derivative_, false, false,
     367            0 :       !context.isGradientFirstAccess(lora_idx[LORAParams::loraTmp]));
     368            0 :     input_.dot_deriv_wrt_2(
     369              :       djdla, djdtmp, false, false,
     370            0 :       !context.isGradientFirstAccess(lora_idx[LORAParams::loraA]));
     371            0 :   }
     372         6296 : }
     373              : 
     374              : } /* namespace nntrainer */
        

Generated by: LCOV version 2.0-1