LCOV - coverage_filtered.info - nntrainer/layers/multi_head_attention

LCOV - code coverage report

Current view:	top level - nntrainer/layers - multi_head_attention_layer.h (source / functions)		Coverage	Total	Hit
Test:	coverage_filtered.info	Lines:	100.0 %	3	3
Test Date:	2026-01-12 20:43:37	Functions:	100.0 %	2	2

            Line data    Source code

       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2022 hyeonseok Lee <hs89.lee@samsung.com>
       4              :  *
       5              :  * @file   multi_head_attention_layer.h
       6              :  * @date   08 July 2022
       7              :  * @see    https://github.com/nntrainer/nntrainer
       8              :  *         https://arxiv.org/abs/1706.03762
       9              :  * @author hyeonseok Lee <hs89.lee@samsung.com>
      10              :  * @bug    No known bugs except for NYI items
      11              :  * @brief  This is MultiHeadAttention Layer Class for Neural Network
      12              :  *
      13              :  */
      14              : 
      15              : #ifndef __MULTI_HEAD_ATTENTION_LAYER_H__
      16              : #define __MULTI_HEAD_ATTENTION_LAYER_H__
      17              : #ifdef __cplusplus
      18              : 
      19              : #include <acti_func.h>
      20              : #include <layer_impl.h>
      21              : 
      22              : namespace nntrainer {
      23              : 
      24              : /**
      25              :  * @class   Multi Head Attention Layer
      26              :  * @brief   Implementation of multi head attention which is described in paper
      27              :  * "Attention is all you need"
      28              :  */
      29              : class MultiHeadAttentionLayer : public LayerImpl {
      30              : public:
      31              :   /**
      32              :    * @brief     Constructor of MultiHeadAttention Layer
      33              :    */
      34              :   MultiHeadAttentionLayer();
      35              : 
      36              :   /**
      37              :    * @brief     Destructor of MultiHeadAttention Layer
      38              :    */
      39              :   ~MultiHeadAttentionLayer();
      40              : 
      41              :   /**
      42              :    *  @brief  Move constructor of MultiHeadAttentionLayer.
      43              :    *  @param[in] MultiHeadAttentionLayer &&
      44              :    */
      45              :   MultiHeadAttentionLayer(MultiHeadAttentionLayer &&rhs) noexcept = default;
      46              : 
      47              :   /**
      48              :    * @brief  Move assignment operator.
      49              :    * @parma[in] rhs MultiHeadAttentionLayer to be moved.
      50              :    */
      51              :   MultiHeadAttentionLayer &operator=(MultiHeadAttentionLayer &&rhs) = default;
      52              : 
      53              :   /**
      54              :    * @copydoc Layer::finalize(InitLayerContext &context)
      55              :    */
      56              :   void finalize(InitLayerContext &context) override;
      57              : 
      58              :   /**
      59              :    * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
      60              :    */
      61              :   void forwarding(RunLayerContext &context, bool training) override;
      62              : 
      63              :   /**
      64              :    * @copydoc Layer::incremental_forwarding(RunLayerContext &context, unsigned
      65              :    * int from, unsigned int to, bool training)
      66              :    */
      67              :   void incremental_forwarding(RunLayerContext &context, unsigned int from,
      68              :                               unsigned int to, bool training) override;
      69              : 
      70              :   /**
      71              :    * @copydoc Layer::calcDerivative(RunLayerContext &context)
      72              :    */
      73              :   void calcDerivative(RunLayerContext &context) override;
      74              : 
      75              :   /**
      76              :    * @copydoc Layer::calcGradient(RunLayerContext &context)
      77              :    */
      78              :   void calcGradient(RunLayerContext &context) override;
      79              : 
      80              :   /**
      81              :    * @copydoc bool supportBackwarding() const
      82              :    */
      83          210 :   bool supportBackwarding() const override { return true; };
      84              : 
      85              :   /**
      86              :    * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
      87              :    * method)
      88              :    */
      89              :   void exportTo(Exporter &exporter,
      90              :                 const ml::train::ExportMethods &method) const override;
      91              : 
      92              :   /**
      93              :    * @copydoc Layer::setProperty(const std::vector<std::string> &values)
      94              :    */
      95              :   void setProperty(const std::vector<std::string> &values) override;
      96              : 
      97              :   /**
      98              :    * @copydoc Layer::getType()
      99              :    */
     100         2372 :   const std::string getType() const override {
     101         2372 :     return MultiHeadAttentionLayer::type;
     102              :   };
     103              : 
     104              :   /**
     105              :    * @copydoc Layer::setBatch(RunLayerContext &context, unsigned int batch)
     106              :    */
     107              :   void setBatch(RunLayerContext &context, unsigned int batch) override;
     108              : 
     109              :   static constexpr const char *type = "multi_head_attention";
     110              : 
     111              : private:
     112              :   std::tuple<props::NumHeads, props::ProjectedKeyDim, props::ProjectedValueDim,
     113              :              props::OutputShape, props::DropOutRate,
     114              :              props::ReturnAttentionWeight, props::AverageAttentionWeight>
     115              :     multi_head_attention_props; /**< multi_head_attention layer properties */
     116              : 
     117              :   ActiFunc sm; /** softmax activation operation */
     118              :   std::array<unsigned int, 16>
     119              :     weight_idx; /**< indices of the weights and tensors */
     120              : 
     121              :   /**
     122              :    * @brief     to protect overflow
     123              :    */
     124              :   float epsilon;
     125              : 
     126              :   /**
     127              :    * @brief calculate common derivative
     128              :    * @param context Context of the layer
     129              :    */
     130              :   void calcCommonDerivative(RunLayerContext &context);
     131              : };
     132              : 
     133              : } // namespace nntrainer
     134              : 
     135              : #endif /* __cplusplus */
     136              : #endif /* __MULTI_HEAD_ATTENTION_LAYER_H__ */

Generated by: LCOV version 2.0-1