LCOV - code coverage report
Current view: top level - nntrainer/tensor - tensor_base.h (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 66.7 % 72 48
Test Date: 2026-01-12 20:43:37 Functions: 45.0 % 20 9

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * @file        tensor_base.h
       4              :  * @date        01 December 2023
       5              :  * @brief       This is Tensor base class
       6              :  * @see         https://github.com/nntrainer/nntrainer
       7              :  * @author      Jijoong Moon <jijoong.moon@samsung.com>
       8              :  * @author      Donghyeon Jeong <dhyeon.jeong@samsung.com>
       9              :  * @bug         No known bugs except for NYI items
      10              :  */
      11              : 
      12              : #ifndef __TENSOR_BASE_H__
      13              : #define __TENSOR_BASE_H__
      14              : #ifdef __cplusplus
      15              : 
      16              : #include <memory>
      17              : #include <stdexcept>
      18              : 
      19              : #include <memory_data.h>
      20              : #include <nntrainer_error.h>
      21              : #include <quantizer.h>
      22              : #include <tensor_dim.h>
      23              : #include <util_func.h>
      24              : 
      25              : #define transposeloop(cl, ci, cj, ck, sl, si, sj, sk)                          \
      26              :   do {                                                                         \
      27              :     unsigned int i, j, k, l;                                                   \
      28              :     int inidx = 0, outidx = 0;                                                 \
      29              :     for (cl = 0; cl < sl; cl++)                                                \
      30              :       for (ci = 0; ci < si; ci++)                                              \
      31              :         for (cj = 0; cj < sj; cj++)                                            \
      32              :           for (ck = 0; ck < sk; ck++) {                                        \
      33              :             outidx = si * sj * sk * cl + sj * sk * ci + sk * cj + ck;          \
      34              :             inidx = l * SI * SJ * SK + i * SJ * SK + j * SK + k;               \
      35              :             outptr[outidx] = inptr[inidx];                                     \
      36              :           }                                                                    \
      37              :   } while (0);
      38              : 
      39              : #define transposeloop_nhwc(cl, ci, cj, ck, sl, si, sj, sk)                     \
      40              :   do {                                                                         \
      41              :     unsigned int i, j, k, l;                                                   \
      42              :     int inidx = 0, outidx = 0;                                                 \
      43              :     for (cl = 0; cl < sl; cl++)                                                \
      44              :       for (ci = 0; ci < si; ci++)                                              \
      45              :         for (cj = 0; cj < sj; cj++)                                            \
      46              :           for (ck = 0; ck < sk; ck++) {                                        \
      47              :             outidx = si * sj * sk * cl + sj * sk * ci + sk * cj + ck;          \
      48              :             inidx = l * SJ * SK * SI + j * SK * SI + k * SI + i;               \
      49              :             outptr[outidx] = inptr[inidx];                                     \
      50              :           }                                                                    \
      51              :   } while (0);
      52              : 
      53              : namespace nntrainer {
      54              : 
      55              : using TensorDim = ml::train::TensorDim;
      56              : using Tformat = ml::train::TensorDim::Format;
      57              : using Tdatatype = ml::train::TensorDim::DataType;
      58              : using TStorageOrder = ml::train::TensorDim::StorageOrder;
      59              : 
      60              : /**
      61              :  * @brief     Enumeration of Weight Initialization Type
      62              :  * @todo      support intialization from file
      63              :  */
      64              : enum class Initializer {
      65              :   ZEROS,          /** Zero initialization */
      66              :   ONES,           /** One initialization */
      67              :   LECUN_NORMAL,   /** LeCun normal initialization */
      68              :   LECUN_UNIFORM,  /** uniform initialization */
      69              :   XAVIER_NORMAL,  /** Xavier normal initialization */
      70              :   XAVIER_UNIFORM, /** Xavier uniform initialization */
      71              :   HE_NORMAL,      /** He normal initialization */
      72              :   HE_UNIFORM,     /** He uniform initialization */
      73              :   NONE            /** No initialization */
      74              : };
      75              : 
      76              : class Tensor;
      77              : class SrcSharedTensorBase;
      78              : 
      79              : /**
      80              :  * @class TensorBase class
      81              :  * @brief TensorBase is an abstract class that provides a base for various
      82              :  * tensor classes with different data types such as FloatTensor to extend and
      83              :  * implement abstract methods.
      84              :  *
      85              :  * @note Basic functions required for tensor memory allocation and data
      86              :  * modification, such as allocate(), getData(), and setValue(), are necessary
      87              :  * when creating subclasses (new tensor class).
      88              :  *
      89              :  * The remaining operations that are used for mathematical operations are not
      90              :  * essential to create a new tensor class but later should be implemented in a
      91              :  * child class in order to utilize its tensor operations fully.
      92              :  */
      93              : class TensorBase {
      94              : public:
      95              :   /**
      96              :    * @brief     Basic Constructor of Tensor
      97              :    */
      98       658329 :   TensorBase(std::string name_ = "", Tformat fm = Tformat::NCHW,
      99       658329 :              Tdatatype d_type = Tdatatype::FP32) :
     100      1316658 :     dim(TensorDim(fm, d_type)),
     101       658329 :     strides(dim.computeStrides()),
     102       658329 :     contiguous(true),
     103       658329 :     initializer(Initializer::NONE),
     104       658329 :     name(name_),
     105              :     data(nullptr),
     106       658329 :     offset(0),
     107       658329 :     file_offset(0),
     108       658329 :     src_tensor() {}
     109              : 
     110              :   /**
     111              :    * @brief     Constructor of Tensor with dimension, possibly lazily
     112              :    * @param d Tensor dim for this tensor
     113              :    * @param alloc_now If the memory of the tensor must be allocated
     114              :    * @param init Initializer for the tensor
     115              :    * @param name Name of the tensor
     116              :    */
     117              :   TensorBase(const TensorDim &d, bool alloc_now,
     118              :              Initializer init = Initializer::NONE, std::string name = "");
     119              : 
     120              :   /**
     121              :    * @brief     Constructor of Tensor with dimension/buf
     122              :    * @param d Tensor dim for this tensor
     123              :    * @param buf buffer
     124              :    * @note Memory for this tensor is instantaneously allocated
     125              :    */
     126              :   TensorBase(const TensorDim &d, const void *buf = nullptr) :
     127              :     TensorBase(d, true) {}
     128              : 
     129              :   /**
     130              :    *  @brief  Copy constructor of TensorBase.
     131              :    *  @param[in] Tensor &
     132              :    */
     133       540310 :   TensorBase(const TensorBase &rhs) {
     134       540310 :     dim = rhs.dim;
     135       540310 :     strides = rhs.strides;
     136       540310 :     contiguous = rhs.contiguous;
     137       540310 :     initializer = rhs.initializer;
     138       540310 :     name = rhs.name;
     139              :     data = rhs.data;
     140       540310 :     offset = rhs.offset;
     141       540310 :     file_offset = rhs.file_offset;
     142              :     src_tensor = rhs.src_tensor;
     143       540310 :   }
     144              : 
     145              :   /**
     146              :    * @brief     Comparison operator overload
     147              :    * @param[in] rhs Tensor to be compared with
     148              :    * @note      Only compares Tensor information
     149              :    */
     150              :   bool operator==(const TensorBase &rhs) const;
     151              : 
     152              :   /**
     153              :    * @brief     Comparison operator overload
     154              :    * @param[in] rhs Tensor to be compared with
     155              :    * @note      Only compares Tensor information
     156              :    */
     157              :   bool operator!=(const TensorBase &rhs) const { return !(*this == rhs); }
     158              : 
     159              :   /**
     160              :    * @copydoc Tensor::setTensorVar(TensorDim d, void *buf, size_t offset)
     161              :    */
     162              :   void setTensorVar(TensorDim d, void *buf, size_t offset);
     163              : 
     164              :   /**
     165              :    * @brief Basic Destructor
     166              :    */
     167      2397078 :   virtual ~TensorBase() {}
     168              : 
     169              :   /**
     170              :    * @copydoc Tensor::allocate()
     171              :    */
     172              :   virtual void allocate() = 0;
     173              : 
     174              :   /**
     175              :    * @copydoc Tensor::deallocate()
     176              :    */
     177              :   virtual void deallocate() = 0;
     178              : 
     179              :   /**
     180              :    * @copydoc Tensor::isAllocated()
     181              :    */
     182              :   bool isAllocated() { return data != nullptr; }
     183              : 
     184              :   /**
     185              :    * @brief activate function with the given pointer address
     186              :    * @note This should be called for virtual tensor only.
     187              :    */
     188            0 :   void activate(void *addr) {
     189            0 :     data = std::shared_ptr<MemoryData>(new MemoryData((void *)addr));
     190            0 :   }
     191              : 
     192              :   /**
     193              :    * @brief deactivate
     194              :    * @note This should be called for virtual tensor only.
     195              :    */
     196            0 :   void deactivate() {
     197              :     data.reset();
     198              :     data = nullptr;
     199            0 :   }
     200              : 
     201              :   /**
     202              :    * @copydoc Tensor::getData()
     203              :    */
     204              :   virtual void *getData() const = 0;
     205              : 
     206              :   /**
     207              :    * @copydoc Tensor::getData(size_t idx)
     208              :    */
     209              :   virtual void *getData(size_t idx) const = 0;
     210              : 
     211              :   /**
     212              :    * @copydoc Tensor::getScale()
     213              :    */
     214            1 :   virtual void *getScale() const {
     215              :     throw std::invalid_argument(
     216            1 :       "Tensor::getScale() is not supported in tensor data type " +
     217            3 :       getStringDataType());
     218              :   }
     219              : 
     220              :   /**
     221              :    * @copydoc Tensor::getScale(size_t idx)
     222              :    */
     223            0 :   virtual void *getScale(size_t idx) const {
     224              :     throw std::invalid_argument(
     225            0 :       "Tensor::getScale() is not supported in tensor data type " +
     226            0 :       getStringDataType());
     227              :   }
     228              : 
     229              :   /**
     230              :    * @copydoc Tensor::getZeroPoint()
     231              :    */
     232            0 :   virtual unsigned int *getZeroPoint() const {
     233              :     throw std::invalid_argument(
     234            0 :       "Tensor::getZeroPoint() is not supported in tensor data type " +
     235            0 :       getStringDataType());
     236              :   }
     237              : 
     238              :   /**
     239              :    * @copydoc Tensor::getZeroPoint(size_t idx)
     240              :    */
     241            0 :   virtual unsigned int *getZeroPoint(size_t idx) const {
     242              :     throw std::invalid_argument(
     243            0 :       "Tensor::getZeroPoint() is not supported in tensor data type " +
     244            0 :       getStringDataType());
     245              :   }
     246              : 
     247              :   /**
     248              :    * @brief     i data index
     249              :    * @retval    address of ith data
     250              :    */
     251              :   virtual void *getAddress(unsigned int i) = 0;
     252              : 
     253              :   /**
     254              :    * @brief     i data index
     255              :    * @retval    address of ith data
     256              :    */
     257              :   virtual const void *getAddress(unsigned int i) const = 0;
     258              : 
     259              :   /**
     260              :    * @copydoc Tensor::setValue(float value)
     261              :    */
     262              :   virtual void setValue(float value) = 0;
     263              : 
     264              :   /**
     265              :    * @copydoc Tensor::setValue(b, c, h, w, value)
     266              :    */
     267              :   virtual void setValue(unsigned int b, unsigned int c, unsigned int h,
     268              :                         unsigned int w, float value) = 0;
     269              : 
     270              :   /**
     271              :    * @copydoc Tensor::addValue()
     272              :    */
     273              :   virtual void addValue(unsigned int b, unsigned int c, unsigned int h,
     274              :                         unsigned int w, float value, float beta) = 0;
     275              : 
     276              :   /**
     277              :    * @copydoc Tensor::setZero()
     278              :    */
     279              :   virtual void setZero() = 0;
     280              : 
     281              :   /**
     282              :    * @copydoc Tensor::setRandNormal()
     283              :    */
     284              :   virtual void setRandNormal(float mean, float stddev);
     285              : 
     286              :   /**
     287              :    * @copydoc Tensor::setRandBernoulli()
     288              :    */
     289              :   virtual void setRandUniform(float min, float max);
     290              : 
     291              :   /**
     292              :    * @copydoc Tensor::setRandBernoulli()
     293              :    */
     294              :   virtual void setRandBernoulli(float probability);
     295              : 
     296              :   /**
     297              :    * @copydoc Tensor::initialize()
     298              :    */
     299              :   virtual void initialize() = 0;
     300              : 
     301              :   /**
     302              :    * @copydoc Tensor::initialize(Initializer init)
     303              :    */
     304              :   virtual void initialize(Initializer init) = 0;
     305              : 
     306              :   /**
     307              :    * @copydoc Tensor::multiply_strided(Tensor const &m, Tensor &output,
     308              :    * const float beta)
     309              :    */
     310              :   virtual Tensor multiply_strided(Tensor const &m, Tensor &output,
     311              :                                   const float beta) const;
     312              : 
     313              :   /**
     314              :    * @copydoc Tensor::multiply_i(float const &value)
     315              :    */
     316              :   virtual int multiply_i(float const &value);
     317              : 
     318              :   /**
     319              :    * @copydoc Tensor::multiply(float const &value, Tensor &output)
     320              :    */
     321              :   virtual Tensor &multiply(float const &value, Tensor &output) const;
     322              : 
     323              :   /**
     324              :    * @copydoc Tensor::multiply(Tensor const &m, Tensor &output, const
     325              :    * float beta = 0.0)
     326              :    */
     327              :   virtual Tensor &multiply(Tensor const &m, Tensor &output,
     328              :                            const float beta = 0.0) const;
     329              : 
     330              :   /**
     331              :    * @copydoc Tensor::divide(float const &value, Tensor &output)
     332              :    */
     333              :   virtual Tensor &divide(float const &value, Tensor &output) const;
     334              : 
     335              :   /**
     336              :    * @copydoc Tensor::divide(Tensor const &m, Tensor &output)
     337              :    */
     338              :   virtual Tensor &divide(Tensor const &m, Tensor &output) const;
     339              : 
     340              :   /**
     341              :    * @copydoc Tensor::add_strided(Tensor const &input, Tensor &output,
     342              :    * const float beta)
     343              :    */
     344              :   virtual Tensor &add_strided(Tensor const &input, Tensor &output,
     345              :                               const float beta) const;
     346              : 
     347              :   /**
     348              :    * @copydoc Tensor::add_i_partial()
     349              :    */
     350              :   virtual int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m,
     351              :                             unsigned int incX, unsigned int incY,
     352              :                             const Tensor alphas, unsigned int alpha_idx);
     353              : 
     354              :   /**
     355              :    * @copydoc Tensor::add(float const &value, Tensor &output)
     356              :    */
     357              :   virtual Tensor &add(float const &value, Tensor &output) const;
     358              : 
     359              :   /**
     360              :    * @copydoc Tensor::add(Tensor const &m, Tensor &output, float const
     361              :    * alpha)
     362              :    */
     363              :   virtual Tensor &add(Tensor const &m, Tensor &output, float const alpha) const;
     364              : 
     365              :   /**
     366              :    * @copydoc Tensor::subtract(float const &value, Tensor &output)
     367              :    */
     368              :   virtual Tensor &subtract(float const &value, Tensor &output) const;
     369              : 
     370              :   /**
     371              :    * @brief      Sum all the Tensor elements according to the batch
     372              :    * @param[out] output Tensor(batch, 1, 1, 1)
     373              :    */
     374              :   virtual void sum_by_batch(Tensor &output) const;
     375              : 
     376              :   /**
     377              :    * @copydoc Tensor::sum(unsigned int axis, Tensor &output, float alpha,
     378              :    * float beta) const
     379              :    */
     380              :   virtual Tensor &sum(unsigned int axis, Tensor &output, float alpha,
     381              :                       float beta) const;
     382              : 
     383              :   /**
     384              :    * @copydoc Tensor::abs()
     385              :    */
     386              :   virtual Tensor &abs(Tensor &output) const;
     387              : 
     388              :   /**
     389              :    * @copydoc Tensor::l2norm
     390              :    */
     391              :   virtual float l2norm() const;
     392              : 
     393              :   /**
     394              :    * @copydoc Tensor::normalization_i(unsigned int dim, float p, float epsilon)
     395              :    */
     396              :   virtual void normalization_i(unsigned int dim, float p = 2.0,
     397              :                                float epsilon = 1e-12);
     398              : 
     399              :   /**
     400              :    * @copydoc Tensor::pow(float exponent, Tensor &output)
     401              :    */
     402              :   virtual Tensor &pow(float exponent, Tensor &output) const;
     403              : 
     404              :   /**
     405              :    * @copydoc Tensor::sqrt(Tensor &output)
     406              :    */
     407              :   virtual Tensor &sqrt(Tensor &output) const;
     408              : 
     409              :   /**
     410              :    * @copydoc Tensor::erf(Tensor &output)
     411              :    */
     412              :   virtual Tensor &erf(Tensor &output) const;
     413              : 
     414              :   /**
     415              :    * @brief    sin transform function
     416              :    * @param[out] out out to store the result
     417              :    */
     418              :   virtual void sin(Tensor &out, float alpha = 1.0);
     419              : 
     420              :   /**
     421              :    * @brief    cos transform function
     422              :    * @param[out] out out to store the result
     423              :    */
     424              :   virtual void cos(Tensor &out, float alpha = 1.0);
     425              : 
     426              :   /**
     427              :    * @brief    tangent transform function
     428              :    * @param[out] output output to store the result
     429              :    */
     430              :   virtual void tan(Tensor &output, float alpha = 1.0);
     431              : 
     432              :   /**
     433              :    * @brief      inverse squared root function
     434              :    * @param[out] out out to store the result
     435              :    */
     436              :   virtual void inv_sqrt(Tensor &out);
     437              : 
     438              :   /**
     439              :    * @brief     Dot Product of Tensor ( equal MxM )
     440              :    * @details   This applies dot of the last dimension of this and
     441              :    * second-last dimension of passed tensor m.
     442              :    * @param[in] input Tensor
     443              :    * @param[in] output output Tensor
     444              :    * @param[in] trans Transpose
     445              :    * @param[in] trans_in Transpose input
     446              :    * @param[in] beta beta
     447              :    * @retval    Calculated Tensor
     448              :    */
     449              :   virtual Tensor &dot(Tensor const &input, Tensor &output, bool trans,
     450              :                       bool trans_in, float beta) const;
     451              : 
     452              :   /**
     453              :    * @brief     Dot Product of Tensors ( equal MxMs )
     454              :    * @details   This applies dot of the last dimension of this and
     455              :    * second-last dimension of passed tensor m.
     456              :    * @param[in] input Tensor
     457              :    * @param[in] output output Tensors
     458              :    * @param[in] trans Transpose
     459              :    * @param[in] trans_in Transpose input
     460              :    * @param[in] beta beta
     461              :    * @retval    Calculated Tensor
     462              :    */
     463              :   virtual void dot(std::vector<Tensor *> input, std::vector<Tensor *> output,
     464              :                    bool trans, bool trans_in, float beta) const;
     465              : 
     466              :   /**
     467              :    * @copydoc Tensor::dropout_mask(float dropout)
     468              :    */
     469              :   virtual void dropout_mask(float dropout);
     470              : 
     471              :   /**
     472              :    * @copydoc Tensor::filter_mask(const Tensor &mask_len, bool reverse)
     473              :    */
     474              :   virtual void filter_mask(const Tensor &mask_len, bool reverse);
     475              : 
     476              :   /**
     477              :    * @copydoc Tensor::zoneout_mask(Tensor &opposite, float zoneout)
     478              :    */
     479              :   virtual void zoneout_mask(Tensor &opposite, float zoneout);
     480              : 
     481              :   /**
     482              :    * @copydoc Tensor::split(std::vector<size_t> sizes, int axis)
     483              :    */
     484              :   virtual std::vector<Tensor> split(std::vector<size_t> sizes, int axis);
     485              : 
     486              :   /**
     487              :    * @copydoc Tensor::concat()
     488              :    */
     489              :   virtual Tensor concat(const std::vector<Tensor> &tensors, int axis,
     490              :                         Tensor &output);
     491              : 
     492              :   /**
     493              :    * @copydoc Tensor::print(std::ostream &out)
     494              :    */
     495              :   virtual void print(std::ostream &out) const = 0;
     496              : 
     497              :   /**
     498              :    * @copydoc Tensor::apply(std::function<T(T)> f, Tensor &output)
     499              :    * @note    This will be only used in FloatTensor.
     500              :    */
     501              :   virtual Tensor &apply(std::function<float(float)> f, Tensor &output) const;
     502              : 
     503              : #ifdef ENABLE_FP16
     504              :   /**
     505              :    * @copydoc Tensor::apply(std::function<T(T)> f, Tensor &output)
     506              :    * @note    This will be only used in HalfTensor.
     507              :    */
     508              :   virtual Tensor &apply(std::function<_FP16(_FP16)> f, Tensor &output) const;
     509              : #endif
     510              : 
     511              :   /**
     512              :    * @brief     Copy the Tensor
     513              :    * @param[in] from Tensor to be copied
     514              :    *
     515              :    * @note copy can reshape the tensor to match the shape
     516              :    */
     517              :   virtual void copy(const Tensor &from) = 0;
     518              : 
     519              :   /**
     520              :    * @brief     Copy the Tensor
     521              :    * @param[in] from Tensor to be copied
     522              :    */
     523              :   virtual void copyData(const Tensor &from) = 0;
     524              : 
     525              :   /**
     526              :    * @brief      Copy the Tensor
     527              :    * @param[in]  input Tensor to be copied
     528              :    * @param[out] output output Tensor
     529              :    */
     530              :   virtual void copy_with_stride(const Tensor &input, Tensor &output) = 0;
     531              : 
     532              :   /**
     533              :    * @brief     Save the Tensor into file
     534              :    * @param[in] file input file stream
     535              :    */
     536              :   virtual void save(std::ostream &file);
     537              : 
     538              :   /**
     539              :    * @brief     Read the Tensor from file
     540              :    * @param[in] file input file stream
     541              :    */
     542              :   virtual void read(std::ifstream &file, size_t start_offset = 0,
     543              :                     bool read_from_offset = false);
     544              : 
     545              :   /**
     546              :    * @brief     Read the Tensor from file
     547              :    * @param[in] file input file stream
     548              :    */
     549              :   virtual void read(ReadSource src, size_t start_offset = 0,
     550              :                     bool read_from_offset = false);
     551              : 
     552              :   /**
     553              :    * @copydoc Tensor::readFSU()
     554              :    */
     555              :   virtual void readFSU();
     556              : 
     557              :   /**
     558              :    * @copydoc Tensor::argmax()
     559              :    */
     560              :   virtual std::vector<unsigned int> argmax() const;
     561              : 
     562              :   /**
     563              :    * @copydoc Tensor::argmin()
     564              :    */
     565              :   virtual std::vector<unsigned int> argmin() const;
     566              : 
     567              :   /**
     568              :    * @brief Compute top-K maximum values along the width dimension
     569              :    *
     570              :    * @details This function computes the top-K maximum values and their
     571              :    * corresponding indices along the **width** dimension for each batch,
     572              :    * channel, and height slice. The operation preserves the original tensor
     573              :    * format (NCHW/NHWC) while reducing the width dimension to size K. The
     574              :    * indices are stored in the provided `indices` array, and the top-K values
     575              :    *          are stored in the provided `output_data` buffer.
     576              :    *
     577              :    * @param[in] k Number of largest elements to select (1 <= k <= width_size)
     578              :    * @param[out] output_data Buffer to store top-K values (must be
     579              :    * pre-allocated)
     580              :    * @param[out] indices Array to store corresponding indices (must be
     581              :    * pre-allocated)
     582              :    *
     583              :    * @throw std::invalid_argument If:
     584              :    *         - k is 0 or exceeds width dimension size
     585              :    *         - Called on non-floating point tensor (UINT8/UINT16/etc)
     586              :    */
     587              :   virtual void topK(unsigned int k, void *output_data, uint32_t *indices);
     588              : 
     589              :   /**
     590              :    * @copydoc Tensor::max_abs()
     591              :    */
     592              :   virtual float max_abs() const = 0;
     593              : 
     594              :   /**
     595              :    * @copydoc Tensor::maxValue()
     596              :    */
     597              :   virtual float maxValue() const = 0;
     598              : 
     599              :   /**
     600              :    * @copydoc Tensor::minValue()
     601              :    */
     602              :   virtual float minValue() const = 0;
     603              : 
     604              :   /**
     605              :    * @copydoc Tensor::transpose(const std::string &direction, Tensor &out)
     606              :    */
     607              :   virtual Tensor &transpose(const std::string &direction, Tensor &out) const;
     608              : 
     609              :   /**
     610              :    * @brief     put data of Tensor
     611              :    * @note      It is only effective when fsu is used
     612              :    */
     613              :   void putData() const;
     614              : 
     615              :   /**
     616              :    * @brief Set the memory buffer for the tensor
     617              :    * @param buf the memory buffer
     618              :    * @param off offset
     619              :    */
     620              :   void setMemoryData(const std::shared_ptr<MemoryData> buf, size_t off);
     621              : 
     622              :   /**
     623              :    * @brief     return Data pointer of Tensor
     624              :    * @retval    template T pointer (float pointer as default)
     625              :    */
     626              :   const std::shared_ptr<MemoryData> getMemoryData() const;
     627              : 
     628              :   /**
     629              :    * @brief     return offset
     630              :    */
     631              :   size_t getOffset() const;
     632              : 
     633              :   /**
     634              :    * @brief     get FileOffset of Tensor
     635              :    * @return    size_t fileOffset
     636              :    */
     637              :   size_t getFileOffset() const;
     638              : 
     639              :   /**
     640              :    * @brief     set FileOffset to Tensor
     641              :    * @param     off FileOffset
     642              :    */
     643              :   void setFileOffset(size_t off);
     644              : 
     645              :   /**
     646              :    * @brief     set Tensor Dim
     647              :    * @param[in] d TensorDim
     648              :    * @note      Throws std::invalid_argument if size mismatch
     649              :    */
     650              :   void reshape(const TensorDim &d);
     651              : 
     652              :   /**
     653              :    * @brief     return a copy of the Tensor Dim
     654              :    * @retval    TensorDim
     655              :    */
     656        25046 :   TensorDim getDim() const { return TensorDim(dim); }
     657              : 
     658              :   /**
     659              :    * @brief     return Tensor Type
     660              :    */
     661              :   TensorDim::TensorType getTensorType() const { return dim.getTensorType(); }
     662              : 
     663              :   /**
     664              :    * @brief Get initializer for the tensor
     665              :    * @retval initializer of the tensor
     666              :    */
     667         1434 :   Initializer getInitializer() const { return initializer; }
     668              : 
     669              :   /**
     670              :    * @brief Get format for the tensor
     671              :    * @retval format of the tensor
     672              :    */
     673              :   TensorDim::Format getFormat() const { return dim.getFormat(); }
     674              : 
     675              :   /**
     676              :    * @brief Get data type for the tensor
     677              :    * @retval data type of the tensor
     678              :    */
     679              :   Tdatatype getDataType() const { return dim.getDataType(); }
     680              : 
     681              :   /**
     682              :    * @brief     update batch size for this tensor
     683              :    * @param     batch size
     684              :    */
     685              :   void updateBatch(unsigned int batch);
     686              : 
     687              :   /**
     688              :    * @brief     update the dimension for this tensor
     689              :    * @param     dimension dimension to be updated
     690              :    */
     691              :   void updateDimension(TensorDim dimension);
     692              : 
     693              :   /**
     694              :    * @brief     return whether tensor is contiguous or not.
     695              :    * @retval    bool contiguous
     696              :    */
     697       110616 :   const bool getContiguous() const noexcept { return contiguous; }
     698              : 
     699              :   /**
     700              :    * @brief     return current stride of tensor.
     701              :    * @retval    int[MAXDIM] strides
     702              :    */
     703              :   const std::array<size_t, TensorDim::MAXDIM> getStrides() const noexcept {
     704        81265 :     return strides;
     705              :   }
     706              : 
     707              :   /**
     708              :    * @brief     Set name of the tensor
     709              :    */
     710         1199 :   void setName(const std::string &name_) { name = name_; }
     711              : 
     712              :   /**
     713              :    * @brief     Get name of the tensor
     714              :    * @retval    string name
     715              :    */
     716        85744 :   const std::string &getName() const { return name; }
     717              : 
     718              :   /**
     719              :    * @brief Get linear index given the n-d index
     720              :    */
     721              :   size_t getIndex(unsigned int b, unsigned int c, unsigned int h,
     722              :                   unsigned int w) const noexcept;
     723              : 
     724              :   /**
     725              :    * @brief     Save quantization information
     726              :    */
     727            0 :   virtual void save_quantization_info(std::ostream &file) {}
     728              : 
     729              :   /**
     730              :    * @brief     Read quantization information
     731              :    */
     732            0 :   virtual void read_quantization_info(std::ifstream &file,
     733              :                                       size_t start_offset = 0,
     734            0 :                                       bool read_from_offset = false) {}
     735              : 
     736              :   /**
     737              :    * @brief     Read quantization information
     738              :    */
     739            0 :   virtual void read_quantization_info(ReadSource src, size_t start_offset = 0,
     740            0 :                                       bool read_from_offset = false) {}
     741              : 
     742              :   /**
     743              :    * @brief     Get size of current tensor
     744              :    * @retval    unsigned int size of the current tensor
     745              :    */
     746     25131303 :   virtual size_t size() const { return dim.getDataLen(); }
     747              : 
     748              :   /**
     749              :    * @brief     Get if the tensor is empty
     750              :    * @retval    true if the tensor is empty
     751              :    */
     752      1649189 :   bool empty() const { return size() == 0; }
     753              : 
     754              :   /**
     755              :    * @brief     Get size of the data in bytes
     756              :    * @retval    size_t Size in bytes
     757              :    */
     758        25465 :   size_t bytes() const { return size() * dim.getDataTypeSize(); }
     759              : 
     760              :   /**
     761              :    * @brief     Get a total size of the memory data in bytes
     762              :    * @retval    size_t Size in bytes
     763              :    */
     764        38180 :   virtual size_t getMemoryBytes() const {
     765        38180 :     return size() * dim.getDataTypeSize();
     766              :   }
     767              : 
     768              :   /**
     769              :    * @brief     return Tensor batch size
     770              :    * @retval    batch size
     771              :    */
     772      5779517 :   size_t batch() const { return dim.batch(); }
     773              : 
     774              :   /**
     775              :    * @brief     return Tensor channel size
     776              :    * @retval    channel size
     777              :    */
     778      5734476 :   size_t channel() const { return dim.channel(); }
     779              : 
     780              :   /**
     781              :    * @brief     return Tensor height size
     782              :    * @retval    height size
     783              :    */
     784      5987775 :   size_t height() const { return dim.height(); }
     785              : 
     786              :   /**
     787              :    * @brief     return Tensor width size
     788              :    * @retval    width size
     789              :    */
     790      6113976 :   size_t width() const { return dim.width(); }
     791              : 
     792              :   /**
     793              :    * @brief     return Tensor scale factor size if exists
     794              :    * @retval    scale factor size
     795              :    * @note      Override for quantize tensor
     796              :    */
     797       169008 :   virtual size_t scale_size() const { return 0; }
     798              : 
     799              :   /**
     800              :    * @brief     return Tensor quantization scheme
     801              :    * @retval    Qscheme qscheme
     802              :    * @note      Override for quantize tensor
     803              :    */
     804            0 :   virtual QScheme q_scheme() const {
     805              :     throw std::invalid_argument(
     806            0 :       "Tensor::q_scheme() is not supported in tensor data type " +
     807            0 :       getStringDataType());
     808              :   }
     809              : 
     810              :   /**
     811              :    * @brief Merge the given two axis for tensor at second axis inplace
     812              :    *
     813              :    * @param axis1 first axis to merge
     814              :    * @param axis2 second axis to merge
     815              :    */
     816              :   void mergeAxis(unsigned int axis1, unsigned int axis2);
     817              : 
     818              :   /**
     819              :    * @brief Allocate data based on the source tensor
     820              :    * @note As this memory is shared, do NOT initialize
     821              :    */
     822              :   void allocateSrcTensor();
     823              : 
     824              :   /**
     825              :    * @brief Update destination tensor to share memory with source tensor
     826              :    *
     827              :    * @param src src tensor containing the memory
     828              :    * @param dest destination tensor which will share the memory
     829              :    * @param offset offset to be used from the start of the data in bytes
     830              :    * @note The new tensor will share the same data as the current tensor but
     831              :    * can have different size.
     832              :    * @note New size added with offset must be less than the size of the original
     833              :    * tensor.
     834              :    */
     835              :   void createSharedDataTensor(const TensorBase *src, TensorBase *dest,
     836              :                               size_t offset) const;
     837              : 
     838              :   /**
     839              :    * @brief Get new tensor which shares memory with current tensor but different
     840              :    * shape
     841              :    *
     842              :    * @param[in] dim new dimension to be set for this tensor
     843              :    * @param[in] offset offset to be used from the start of the data in elements
     844              :    * @param[in] reset_stride reset stride
     845              :    * @param[in] name_ name of the Tensor
     846              :    * @param[out] ret output TensorBase pointer
     847              :    * @note The new tensor will share the same data as the current tensor but
     848              :    * can have different size.
     849              :    * @note New size added with offset must be less than the size of the original
     850              :    * tensor.
     851              :    */
     852              :   void getSharedDataTensor(const TensorDim dim_, size_t offset,
     853              :                            bool reset_stride, const std::string &name_,
     854              :                            TensorBase *ret);
     855              : 
     856              :   /**
     857              :    * @copydoc Tensor::isValid()
     858              :    */
     859              :   virtual bool isValid() const = 0;
     860              : 
     861              :   static constexpr float epsilon = 1e-5f;
     862              : 
     863              : protected:
     864              :   TensorDim dim;
     865              :   std::array<size_t, TensorDim::MAXDIM> strides;
     866              :   bool contiguous;
     867              :   Initializer initializer;
     868              :   std::string name; /**< name of the tensor */
     869              :   std::shared_ptr<MemoryData> data;
     870              :   size_t offset;
     871              :   size_t file_offset; /**< offset of the tensor in the file */
     872              : 
     873              :   /**<
     874              :    * When using shared_data with tensor, this stores the ptr of the source
     875              :    * tensor which handles the full memory. If tensor data is already allocated,
     876              :    * this does not affect the tensor. If the tensor data is not allocated, and
     877              :    * src_ptr is valid, this tensor will use the memory allocated by the src_ptr
     878              :    */
     879              :   std::shared_ptr<SrcSharedTensorBase> src_tensor;
     880              : 
     881              :   /**
     882              :    * @struct External Loop Info for broadcasted info
     883              :    * @brief External Loop Info for broadcasted iteration. Please refer to
     884              :    * DISABLED_private_external_loop_n in unittest_nntrainer_tensor.
     885              :    * @note This should better be implemented in iterator fashion before used
     886              :    * extensively.
     887              :    */
     888              :   struct BroadcastInfo {
     889              : 
     890              :     /**
     891              :      * @brief Construct a new External Loop Info object
     892              :      */
     893              :     BroadcastInfo() :
     894        76768 :       buffer_size(0),
     895        76768 :       buffer_axis(-1),
     896        76768 :       strides{0, 0, 0, 0},
     897              :       tensor_type({Tformat::NCHW, Tdatatype::FP32}) {}
     898              : 
     899              :     unsigned int buffer_size; /**< virtual size of the buffer */
     900              :     int buffer_axis;          /**< the smallest axis that should be looped.
     901              :                                    -1 means no loop needed*/
     902              :     std::array<unsigned int, TensorDim::MAXDIM>
     903              :       strides; /**< modified strides for the loop */
     904              :     nntrainer::TensorDim::TensorType tensor_type;
     905              :   };
     906              : 
     907              :   /**
     908              :    * @brief compute Loop info for broadcasting and vectorization
     909              :    *
     910              :    * @param m target tensor to be calculated against.
     911              :    * @return BroadcastInfo Loopinfo needed to run external loop
     912              :    */
     913              :   BroadcastInfo computeBroadcastInfo(const Tensor &m) const;
     914              : 
     915              :   /**
     916              :    * @brief Calcuates variables needed to perform tensor flatten dot product
     917              :    *
     918              :    * @param[in]  input Tensor
     919              :    * @param[in]  output output Tensor
     920              :    * @param[in]  trans Transpose
     921              :    * @param[in]  trans_in Transpose input
     922              :    * @param[out] first_three_flat flattened the fist 3 axis
     923              :    * @param[out] last_axis last axis
     924              :    * @param[out] input_first_three_flat input's flattened the fist 3 axis
     925              :    * @param[out] input_last_axis input's last axis
     926              :    * @param[out] M number of op(this)'s and output's row
     927              :    * @param[out] N number of op(inputs)'s and output's columns
     928              :    * @param[out] K number of op(this)'s column and op(input)'s row
     929              :    * @param[out] lda leading dimension of this
     930              :    * @param[out] ldb leading dimension of input
     931              :    * @param[out] ldc leading dimension of output
     932              :    *
     933              :    * @note op(X) is one of X or X**T
     934              :    */
     935              :   void calculateFlattenDot(Tensor const &input, Tensor &output, bool trans,
     936              :                            bool trans_in, unsigned int &first_three_flat,
     937              :                            unsigned int &last_axis,
     938              :                            unsigned int &input_first_three_flat,
     939              :                            unsigned int &input_last_axis, unsigned int &M,
     940              :                            unsigned int &N, unsigned int &K, unsigned int &lda,
     941              :                            unsigned int &ldb, unsigned int &ldc) const;
     942              : 
     943              :   /**
     944              :    * @brief  Get the Data Type String object
     945              :    * @return std::string of tensor data type
     946              :    * @note   TensorBase::getStringDataType() should not be called. Please define
     947              :    * this function in the derived class to the corresponding data type.
     948              :    */
     949            0 :   virtual std::string getStringDataType() const { return "Undefined type"; }
     950              : };
     951              : 
     952              : /**
     953              :  * @class SrcSharedTensorBase
     954              :  * @brief Source of the shared tensor
     955              :  */
     956              : class SrcSharedTensorBase {
     957              : public:
     958              :   /**
     959              :    * @brief   Constructor for the class
     960              :    */
     961              :   SrcSharedTensorBase() : src(nullptr), off(0) {}
     962              : 
     963              :   /**
     964              :    * @brief   Constructor for the class
     965              :    */
     966       301854 :   SrcSharedTensorBase(const TensorBase *tensor, size_t offset) :
     967       301854 :     src(tensor), off(offset) {}
     968              : 
     969              :   /**
     970              :    * @brief   Get the allocated src tensor
     971              :    */
     972       603708 :   const TensorBase *tensor() const {
     973       603708 :     if (!src)
     974            0 :       throw std::runtime_error("Accessing empty src tensor");
     975              : 
     976       603708 :     return src;
     977              :   }
     978              : 
     979              :   /**
     980              :    * @brief   Get the offset from the source tensor
     981              :    */
     982       301854 :   size_t offset() const { return off; }
     983              : 
     984              : private:
     985              :   const TensorBase *src; /**< Tensor of the source */
     986              :   size_t off;            /**< offset from the source data ptr */
     987              : };
     988              : 
     989              : } // namespace nntrainer
     990              : 
     991              : #endif /* __cplusplus */
     992              : #endif /* __TENSOR_BASE_H__ */
        

Generated by: LCOV version 2.0-1