LCOV - coverage_filtered.info - nntrainer/tensor/q4_0

LCOV - code coverage report

Current view:	top level - nntrainer/tensor - q4_0_tensor.cpp (source / functions)		Coverage	Total	Hit
Test:	coverage_filtered.info	Lines:	66.7 %	48	32
Test Date:	2025-12-14 20:38:17	Functions:	63.6 %	11	7

            Line data    Source code

       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * @file        q4_0_tensor.cpp
       4              :  * @date        23 April 2025
       5              :  * @brief       This is Q4_0_Tensor class for Q4_0 quantized tensor.
       6              :  * @see         https://github.com/nnstreamer/nntrainer
       7              :  * @author      Donghyeon Jeong <dhyeon.jeong@samsung.com>
       8              :  * @bug         No known bugs except for NYI items
       9              :  */
      10              : 
      11              : #include <cpu_backend.h>
      12              : #include <q4_0_tensor.h>
      13              : #include <tensor.h>
      14              : 
      15              : namespace nntrainer {
      16              : 
      17            0 : Q4_0_Tensor::Q4_0_Tensor(std::string name_, Tformat fm) :
      18            0 :   TensorBase(name_, fm) {
      19            0 :   offset = 0;
      20            0 : }
      21              : 
      22            4 : Q4_0_Tensor::Q4_0_Tensor(const TensorDim &d, bool alloc_now, Initializer init,
      23            4 :                          std::string name) :
      24            4 :   TensorBase(d, false, init, name) {
      25            6 :   NNTR_THROW_IF(d.batch() != 1 || d.channel() != 1 || d.width() % QK4_0 != 0,
      26              :                 std::invalid_argument)
      27              :     << "Q4_0_Tensor must be 2 dimensional tensor with batch size 1 and "
      28              :        "width must be divisible by 32";
      29              : 
      30            2 :   if (alloc_now)
      31            2 :     allocate();
      32            2 :   offset = 0;
      33            4 : }
      34              : 
      35            0 : Q4_0_Tensor::Q4_0_Tensor(const TensorDim &d, const void *buf) :
      36            0 :   Q4_0_Tensor(d, true, Initializer::NONE, "") {
      37            0 :   if (d.getDataLen() != 0) {
      38            0 :     if (buf != nullptr)
      39            0 :       copy_q40(buf);
      40              :   }
      41            0 : }
      42              : 
      43            2 : void Q4_0_Tensor::allocate() {
      44            2 :   if (empty() || data)
      45              :     return;
      46              : 
      47            2 :   if (src_tensor) {
      48              :     /// allocate data based on the source tensor
      49            0 :     allocateSrcTensor();
      50              :     /** as this memory is shared, do NOT initialize */
      51              :   } else {
      52              :     /// allocate new memory for the tensor data
      53              :     MemoryData *mem_data;
      54              : 
      55       222338 :     mem_data = new MemoryData((void *)(new uint8_t[size()]{}));
      56            2 :     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
      57            2 :       delete[] mem_data->template getAddr<uint8_t>();
      58            2 :       delete mem_data;
      59              :     });
      60              : 
      61            2 :     offset = 0;
      62            2 :     initialize();
      63              :   }
      64              : }
      65              : 
      66            5 : void *Q4_0_Tensor::getData() const {
      67            5 :   if (!data)
      68              :     return nullptr;
      69              : 
      70              :   data->validate();
      71            5 :   return data->getAddr<uint8_t>() + offset;
      72              : }
      73              : 
      74            9 : size_t Q4_0_Tensor::size() const {
      75            9 :   size_t num_blocks = height() * width() / QK4_0;
      76            9 :   return Q4_0_SIZE * num_blocks;
      77              : }
      78              : 
      79            0 : size_t Q4_0_Tensor::getMemoryBytes() const { return size() * sizeof(uint8_t); }
      80              : 
      81            0 : void Q4_0_Tensor::copy_q40(const void *buf) {
      82            0 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
      83              :     << getName() << " is not contiguous, cannot copy.";
      84              : 
      85            0 :   if (buf == getData()) {
      86              :     return;
      87              :   }
      88              :   // copy tensor data
      89            0 :   scopy(size(), (uint8_t *)buf, 1, (uint8_t *)getData(), 1);
      90              : }
      91              : 
      92            2 : void Q4_0_Tensor::setZero() {
      93            2 :   uint8_t *data = (uint8_t *)getData();
      94            2 :   std::fill(data, data + size(), 0);
      95            2 : }
      96              : 
      97            2 : void Q4_0_Tensor::initialize() {
      98            2 :   if (empty() || !isAllocated())
      99              :     return;
     100              : 
     101            2 :   setZero();
     102            2 :   putData();
     103              : }
     104              : 
     105            1 : QScheme Q4_0_Tensor::q_scheme() const { return QScheme::Q4_0; }
     106              : 
     107              : } // namespace nntrainer

Generated by: LCOV version 2.0-1