LCOV - code coverage report
Current view: top level - nntrainer/tensor - q4_k_tensor.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 60.0 % 40 24
Test Date: 2025-12-14 20:38:17 Functions: 62.5 % 8 5

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * @file        q4_k_tensor.cpp
       4              :  * @date        23 April 2025
       5              :  * @brief       This is Q4_K_Tensor class for Q4_K quantized tensor.
       6              :  * @see         https://github.com/nnstreamer/nntrainer
       7              :  * @author      Donghyeon Jeong <dhyeon.jeong@samsung.com>
       8              :  * @bug         No known bugs except for NYI items
       9              :  */
      10              : 
      11              : #include <cpu_backend.h>
      12              : #include <q4_k_tensor.h>
      13              : #include <tensor.h>
      14              : 
      15              : namespace nntrainer {
      16              : 
      17            0 : Q4_K_Tensor::Q4_K_Tensor(std::string name_, Tformat fm, QScheme qscheme_) :
      18            0 :   Uint4QTensor(name_, fm, QScheme::Q4_Kx8) {}
      19              : 
      20            8 : Q4_K_Tensor::Q4_K_Tensor(const TensorDim &d, bool alloc_now, Initializer init,
      21            8 :                          std::string name, QScheme qscheme_) :
      22            8 :   Uint4QTensor(d, false, init, name, qscheme_) {
      23            9 :   NNTR_THROW_IF(d.batch() != 1 || d.channel() != 1 ||
      24              :                   (d.height() % 256 != 0 && d.width() % 256 != 0),
      25              :                 std::invalid_argument)
      26              :     << "Q4_K_Tensor must be 2 dimensional tensor with batch size 1 and "
      27              :        "height or width must be divisible by 256";
      28              : 
      29            7 :   if (qscheme_ == QScheme::Q4_Kx8) {
      30            9 :     NNTR_THROW_IF(d.height() % 8 != 0 || d.width() % 8 != 0,
      31              :                   std::invalid_argument)
      32              :       << "Q4_Kx8 Tensor must have height or width must be divisible by 8";
      33              :   }
      34              : 
      35            6 :   if (alloc_now)
      36            5 :     allocate();
      37            6 : }
      38              : 
      39            0 : Q4_K_Tensor::Q4_K_Tensor(const TensorDim &d, const void *buf,
      40            0 :                          QScheme qscheme_) :
      41            0 :   Q4_K_Tensor(d, true, Initializer::NONE, "", qscheme_) {
      42            0 :   if (d.getDataLen() != 0) {
      43            0 :     if (buf != nullptr)
      44            0 :       copy_q4k(buf);
      45              :   }
      46            0 : }
      47              : 
      48            6 : void Q4_K_Tensor::allocate() {
      49            6 :   if (empty() || data)
      50              :     return;
      51              : 
      52            6 :   if (src_tensor) {
      53              :     /// allocate data based on the source tensor
      54            0 :     allocateSrcTensor();
      55              :     /** as this memory is shared, do NOT initialize */
      56              :   } else {
      57              :     /// allocate new memory for the tensor data
      58              :     MemoryData *mem_data;
      59              : 
      60      1844358 :     mem_data = new MemoryData((void *)(new uint8_t[size()]{}));
      61            6 :     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
      62            6 :       delete[] mem_data->template getAddr<uint8_t>();
      63            6 :       delete mem_data;
      64              :     });
      65              : 
      66            6 :     offset = 0;
      67            6 :     initialize();
      68              :   }
      69              : }
      70              : 
      71       110621 : size_t Q4_K_Tensor::size() const {
      72       110621 :   if (qscheme == QScheme::Q4_Kx8) {
      73       110621 :     size_t num_blocks = height() * width() / (256 * 8);
      74       110621 :     return Q4_Kx8_SIZE * num_blocks;
      75              :   } else {
      76            0 :     size_t num_blocks = height() * width() / 256;
      77            0 :     return Q4_K_SIZE * num_blocks;
      78              :   }
      79              : }
      80              : 
      81            4 : size_t Q4_K_Tensor::getMemoryBytes() const { return size() * sizeof(uint8_t); }
      82              : 
      83            1 : size_t Q4_K_Tensor::scale_size() const { return 0; }
      84              : 
      85            0 : void Q4_K_Tensor::copy_q4k(const void *buf) {
      86            0 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
      87              :     << getName() << " is not contiguous, cannot copy.";
      88              : 
      89            0 :   if (buf == getData()) {
      90              :     return;
      91              :   }
      92              :   // copy tensor data
      93            0 :   scopy(size(), (uint8_t *)buf, 1, (uint8_t *)getData(), 1);
      94              : }
      95              : 
      96              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1