LCOV - code coverage report
Current view: top level - nntrainer/tensor - uint4_tensor.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 14.6 % 323 47
Test Date: 2025-12-14 20:38:17 Functions: 22.7 % 44 10

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * @file        uint4_tensor.cpp
       4              :  * @date        20 March 2025
       5              :  * @brief       This is Uint4QTensor class for quantized 4-bit unsigned integer
       6              :  * calculation
       7              :  * @see         https://github.com/nnstreamer/nntrainer
       8              :  * @author      Donghyeon Jeong <dhyeon.jeong@samsung.com>
       9              :  * @bug         No known bugs except for NYI items
      10              :  */
      11              : 
      12              : #include <iomanip>
      13              : #include <iostream>
      14              : 
      15              : #include <cpu_backend.h>
      16              : #include <tensor.h>
      17              : #include <uint4_tensor.h>
      18              : 
      19              : namespace nntrainer {
      20              : 
      21            0 : Uint4QTensor::Uint4QTensor(std::string name_, Tformat fm, QScheme qscheme_) :
      22            0 :   TensorBase(name_, fm, Tdatatype::UINT4), qscheme(qscheme_) {}
      23              : 
      24            8 : Uint4QTensor::Uint4QTensor(const TensorDim &d, bool alloc_now, Initializer init,
      25            8 :                            std::string name, QScheme qscheme_) :
      26            8 :   TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
      27            8 :   if (alloc_now)
      28            0 :     allocate();
      29            8 : }
      30              : 
      31            0 : Uint4QTensor::Uint4QTensor(const TensorDim &d, const void *buf,
      32            0 :                            QScheme qscheme_) :
      33            0 :   Uint4QTensor(d, true, Initializer::NONE, "", qscheme_) {
      34            0 :   if (d.getDataLen() != 0) {
      35            0 :     if (buf != nullptr)
      36            0 :       copy(buf);
      37              :   }
      38            0 : }
      39              : 
      40            0 : Uint4QTensor::Uint4QTensor(
      41              :   std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
      42              :   std::vector<float> const &scales,
      43            0 :   std::vector<unsigned int> const &zero_points, Tformat fm, QScheme qscheme_) :
      44            0 :   qscheme(qscheme_) {
      45            0 :   if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
      46              :     throw std::out_of_range(
      47            0 :       "[Tensor] trying to initialize Uint4QTensor from empty vector");
      48              :   }
      49              : 
      50            0 :   NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
      51              :     << "invalid scale factor size " << scales.size();
      52              : 
      53            0 :   dim.setTensorDim(0, d.size());
      54            0 :   if (fm == Tformat::NCHW) {
      55            0 :     dim.setTensorDim(1, d[0].size());
      56            0 :     dim.setTensorDim(2, d[0][0].size());
      57            0 :     dim.setTensorDim(3, d[0][0][0].size());
      58              :   } else {
      59            0 :     dim.setTensorDim(2, d[0].size());
      60            0 :     dim.setTensorDim(3, d[0][0].size());
      61            0 :     dim.setTensorDim(1, d[0][0][0].size());
      62              :   }
      63              : 
      64              :   dim.setTensorType({fm, Tdatatype::UINT4});
      65              : 
      66            0 :   strides = dim.computeStrides();
      67            0 :   contiguous = true;
      68            0 :   initializer = Initializer::NONE;
      69            0 :   qscheme = qscheme_;
      70              : 
      71              :   /// @note sizeof(float) * scale_size() assumes scale factors are in
      72              :   /// full-precision fp.
      73              :   MemoryData *mem_data = new MemoryData((
      74              :     void
      75            0 :       *)(new uint8_t[(dim.getDataLen() + 1) / 2 + sizeof(float) * scale_size() +
      76            0 :                      sizeof(unsigned int) * scale_size()]()));
      77            0 :   data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *ptr) {
      78            0 :     delete[] ptr->getAddr<uint8_t>();
      79            0 :     delete ptr;
      80              :   });
      81              : 
      82            0 :   offset = 0;
      83              : 
      84            0 :   if (fm == Tformat::NCHW) {
      85            0 :     for (unsigned int i = 0; i < batch(); ++i)
      86            0 :       for (unsigned int j = 0; j < channel(); ++j)
      87            0 :         for (unsigned int k = 0; k < height(); ++k)
      88            0 :           for (unsigned int l = 0; l < width(); ++l)
      89            0 :             this->setValue(i, j, k, l, d[i][j][k][l]);
      90              :   } else {
      91            0 :     for (unsigned int i = 0; i < batch(); ++i)
      92            0 :       for (unsigned int j = 0; j < height(); ++j)
      93            0 :         for (unsigned int k = 0; k < width(); ++k)
      94            0 :           for (unsigned int l = 0; l < channel(); ++l)
      95            0 :             this->setValue(i, l, j, k, d[i][j][k][l]);
      96              :   }
      97              : 
      98              :   // copy scale factors
      99            0 :   scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
     100              : 
     101            0 :   unsigned int *zps = getZeroPoint();
     102              : 
     103              :   // copy zero points
     104            0 :   for (size_t i = 0; i < zero_points.size(); ++i) {
     105            0 :     zps[i] = zero_points[i];
     106              :   }
     107            0 : }
     108              : 
     109            1 : bool Uint4QTensor::operator==(const Uint4QTensor &rhs) const {
     110            1 :   if (qscheme != rhs.qscheme)
     111              :     return false;
     112              : 
     113              :   // compare quantized data
     114            1 :   const uint8_t *_data = (uint8_t *)getData();
     115            1 :   const uint8_t *_rdata = (uint8_t *)rhs.getData();
     116       110593 :   for (size_t i = 0; i < (size() + 1) / 2; ++i) {
     117       110592 :     if (_data[i] != _rdata[i])
     118              :       return false;
     119              :   }
     120              : 
     121              :   // compare scale factors
     122            1 :   const float *_scales = (float *)getScale();
     123            1 :   const float *_rscales = (float *)rhs.getScale();
     124            1 :   for (size_t i = 0; i < scale_size(); ++i) {
     125            0 :     if (std::fabs(_scales[i] - _rscales[i]) > 1e-5)
     126              :       return false;
     127              :   }
     128              : 
     129              :   return true;
     130              : }
     131              : 
     132            0 : void Uint4QTensor::allocate() {
     133            0 :   if (empty() || data)
     134              :     return;
     135              : 
     136            0 :   if (src_tensor) {
     137              :     /// allocate data based on the source tensor
     138            0 :     allocateSrcTensor();
     139              :     /** as this memory is shared, do NOT initialize */
     140              :   } else {
     141              :     /// allocate new memory for the tensor data
     142              :     MemoryData *mem_data;
     143              : 
     144              :     /// quantized 4-bit is stored as a 8-bit signed integer (uint4x2)
     145              :     mem_data = new MemoryData(
     146            0 :       (void *)(new uint8_t[(dim.getDataLen() + 1) / 2 +
     147            0 :                            sizeof(float) * scale_size() +
     148            0 :                            sizeof(unsigned int) * scale_size()]{}));
     149            0 :     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
     150            0 :       delete[] mem_data->template getAddr<uint8_t>();
     151            0 :       delete mem_data;
     152              :     });
     153              : 
     154            0 :     offset = 0;
     155            0 :     initialize();
     156              :   }
     157              : }
     158              : 
     159            0 : void Uint4QTensor::deallocate() {
     160              :   data = nullptr;
     161            0 :   offset = 0;
     162            0 : }
     163              : 
     164           16 : void *Uint4QTensor::getData() const {
     165           16 :   if (!data)
     166              :     return nullptr;
     167              : 
     168              :   data->validate();
     169           15 :   return data->getAddr<uint8_t>() + offset;
     170              : }
     171              : 
     172            0 : void *Uint4QTensor::getData(size_t idx) const {
     173            0 :   if (!data)
     174              :     return nullptr;
     175              : 
     176              :   data->validate();
     177            0 :   return data->getAddr<uint8_t>() + offset + (idx / 2);
     178              : }
     179              : 
     180            2 : void *Uint4QTensor::getScale() const {
     181            2 :   if (!data)
     182              :     return nullptr;
     183              : 
     184              :   data->validate();
     185            2 :   return ((uint8_t *)getData()) + (size() + 1) / 2;
     186              : }
     187              : 
     188            0 : void *Uint4QTensor::getScale(size_t idx) const {
     189            0 :   NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
     190              :     << "Tensor::getScale() index is not valid";
     191              : 
     192            0 :   if (!data)
     193              :     return nullptr;
     194              : 
     195              :   data->validate();
     196            0 :   return ((float *)getScale()) + idx;
     197              : }
     198              : 
     199            0 : unsigned int *Uint4QTensor::getZeroPoint() const {
     200            0 :   if (!data)
     201              :     return nullptr;
     202              : 
     203              :   data->validate();
     204            0 :   return ((unsigned int *)((float *)((uint8_t *)getData() +
     205            0 :                                      ((size() + 1) / 2)))) +
     206            0 :          scale_size();
     207              : }
     208              : 
     209            0 : unsigned int *Uint4QTensor::getZeroPoint(size_t idx) const {
     210            0 :   NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
     211              :     << "Uint4QTensor::getZeroPoint() index is not valid";
     212              : 
     213            0 :   if (!data)
     214              :     return nullptr;
     215              : 
     216              :   data->validate();
     217            0 :   return (((unsigned int *)((float *)((uint8_t *)getData() +
     218            0 :                                       ((size() + 1) / 2)))) +
     219            0 :           scale_size()) +
     220            0 :          idx;
     221              : }
     222              : 
     223            0 : void *Uint4QTensor::getAddress(unsigned int i) {
     224            0 :   size_t index = getIndex(batch(), channel(), height(), width());
     225            0 :   if (i > index) {
     226              :     return nullptr;
     227              :   }
     228            0 :   return &((uint8_t *)getData())[i / 2];
     229              : }
     230              : 
     231            0 : const void *Uint4QTensor::getAddress(unsigned int i) const {
     232            0 :   size_t index = getIndex(batch(), channel(), height(), width());
     233            0 :   if (i > index) {
     234              :     return nullptr;
     235              :   }
     236            0 :   return &((uint8_t *)getData())[i / 2];
     237              : }
     238              : 
     239            0 : const uint8_t Uint4QTensor::getValue(unsigned int i) const {
     240            0 :   uint8_t value = ((uint8_t *)getData())[i / 2];
     241            0 :   return (i % 2 == 0) ? ((value >> 4) & 0xF) : (value & 0x0F);
     242              : }
     243              : 
     244            0 : uint8_t Uint4QTensor::getValue(unsigned int i) {
     245            0 :   uint8_t value = ((uint8_t *)getData())[i / 2];
     246            0 :   return (i % 2 == 0) ? ((value >> 4) & 0xF) : (value & 0x0F);
     247              : }
     248              : 
     249            0 : const uint8_t Uint4QTensor::getValue(unsigned int b, unsigned int c,
     250              :                                      unsigned int h, unsigned int w) const {
     251            0 :   return getValue(getIndex(b, c, h, w));
     252              : }
     253              : 
     254            0 : uint8_t Uint4QTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
     255              :                                unsigned int w) {
     256            0 :   return getValue(getIndex(b, c, h, w));
     257              : }
     258              : 
     259              : /// @todo this func should be template function
     260            0 : void Uint4QTensor::setValue(float value) {
     261            0 :   NNTR_THROW_IF(value > 15 || value < 0, std::out_of_range)
     262              :     << "Value must be in range [0, 15]. Input value: " << value;
     263              : 
     264            0 :   uint8_t val = static_cast<uint8_t>(value);
     265            0 :   uint8_t *data = (uint8_t *)getData();
     266            0 :   std::fill(data, data + (size() + 1) / 2, (val << 4) | (val & 0x0f));
     267            0 : }
     268              : 
     269              : /// @todo this func should be template function
     270            0 : void Uint4QTensor::addValue(unsigned int b, unsigned int c, unsigned int h,
     271              :                             unsigned int w, float value, float beta) {
     272            0 :   auto const &idx = getIndex(b, c, h, w);
     273            0 :   float output = getValue(idx);
     274            0 :   output *= beta;
     275            0 :   output += value;
     276              : 
     277              :   // if result value is out of range, clamp to max/min value
     278              :   uint8_t val =
     279            0 :     static_cast<uint8_t>(std::trunc(std::clamp((int)output, 0, 15)));
     280              : 
     281              :   // encode result value to uint8 data
     282            0 :   ((uint8_t *)getData())[idx / 2] =
     283            0 :     (idx % 2 == 0) ? (val << 4) | (((uint8_t *)getData())[idx / 2] & 0x0f)
     284            0 :                    : (((uint8_t *)getData())[idx / 2] & 0xf0) | (val & 0x0f);
     285            0 : }
     286              : 
     287              : /// @todo this func should be template function
     288            0 : void Uint4QTensor::setValue(unsigned int b, unsigned int c, unsigned int h,
     289              :                             unsigned int w, float value) {
     290            0 :   NNTR_THROW_IF(value > 15 || value < 0, std::out_of_range)
     291              :     << "Value must be in range [0, 15]. Input value: " << value;
     292              : 
     293            0 :   auto const &idx = getIndex(b, c, h, w);
     294            0 :   uint8_t val = static_cast<uint8_t>(value);
     295              : 
     296            0 :   ((uint8_t *)getData())[idx / 2] =
     297            0 :     (idx % 2 == 0) ? (val << 4) | (((uint8_t *)getData())[idx / 2] & 0x0f)
     298            0 :                    : (((uint8_t *)getData())[idx / 2] & 0xf0) | (val & 0x0f);
     299            0 : }
     300              : 
     301            0 : void Uint4QTensor::setZero() {
     302              :   /// @todo accelerate with SIMD
     303            0 :   setValue(0);
     304            0 : }
     305              : 
     306            6 : void Uint4QTensor::initialize() {
     307            6 :   if (empty() || !isAllocated())
     308              :     return;
     309              : 
     310              :   /// @note Sampling from the normal/uniform distribution is invalid
     311            6 :   switch (initializer) {
     312            0 :   case Initializer::ZEROS:
     313            0 :     setZero();
     314            0 :     break;
     315            0 :   case Initializer::ONES:
     316            0 :     setValue(1.0f);
     317            0 :     break;
     318              :   case Initializer::NONE:
     319              :     break;
     320            0 :   default:
     321              :     throw std::invalid_argument(
     322            0 :       "Initializer other than zero and one is not valid for " +
     323            0 :       getStringDataType());
     324              :     break;
     325              :   }
     326              : 
     327            6 :   putData();
     328              : }
     329              : 
     330            0 : void Uint4QTensor::initialize(Initializer init) {
     331            0 :   initializer = init;
     332            0 :   initialize();
     333            0 : }
     334              : 
     335            0 : void Uint4QTensor::copy(const Tensor &from) {
     336            0 :   reshape(from.getDim());
     337            0 :   copy(from.getData());
     338            0 : }
     339              : 
     340            0 : void Uint4QTensor::copyData(const Tensor &from) {
     341            0 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     342              :     << getName() << " is not contiguous, cannot copy.";
     343              : 
     344            0 :   NNTR_THROW_IF(size() != from.size(), std::invalid_argument)
     345              :     << "Size of the tensor to copy must match.";
     346              : 
     347              :   /// @todo support copy from float32 & float16 to uint8 data
     348            0 :   switch (from.getDataType()) {
     349              :   case ml::train::TensorDim::DataType::UINT4:
     350            0 :     copy(from.getData());
     351              :     break;
     352            0 :   default:
     353            0 :     throw std::invalid_argument("Error: Unsupported data type");
     354              :     break;
     355              :   }
     356            0 : }
     357              : 
     358            0 : void Uint4QTensor::copy_with_stride(const Tensor &input, Tensor &output) {
     359            0 :   for (unsigned int b = 0; b < output.batch(); ++b) {
     360            0 :     for (unsigned int c = 0; c < output.channel(); ++c) {
     361            0 :       for (unsigned int h = 0; h < output.height(); ++h) {
     362            0 :         for (unsigned int w = 0; w < output.width(); ++w) {
     363            0 :           output.setValue(b, c, h, w, input.getValue<uint8_t>(b, c, h, w));
     364              :         }
     365              :       }
     366              :     }
     367              :   }
     368            0 : }
     369              : 
     370            2 : void Uint4QTensor::save(std::ostream &file) {
     371              :   /// @note Save quantization information
     372            2 :   save_quantization_info(file);
     373              : 
     374            2 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     375              : 
     376            2 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     377            0 :     << "save size: " << getMemoryBytes()
     378              :     << " is too big. It cannot be represented by std::streamsize";
     379              : 
     380            2 :   checkedWrite(file, (char *)getData(), sz,
     381              :                "[Uint4QTensor::save] operation failed");
     382            2 :   putData();
     383            2 : }
     384              : 
     385            2 : void Uint4QTensor::read(std::ifstream &file, size_t start_offset,
     386              :                         bool read_from_offset) {
     387            2 :   if (start_offset == std::numeric_limits<size_t>::max()) {
     388            0 :     start_offset = file_offset;
     389              :   }
     390            2 :   read_quantization_info(file, start_offset, read_from_offset);
     391              : 
     392            2 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     393              : 
     394            2 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     395            0 :     << "read size: " << getMemoryBytes()
     396              :     << " is too big. It cannot be represented by std::streamsize";
     397              : 
     398            2 :   if (read_from_offset) {
     399            0 :     start_offset += sizeof(uint16_t);
     400              :   }
     401              : 
     402            2 :   checkedRead(file, (char *)getData(), sz,
     403              :               "[Uint4QTensor::read] operation failed", start_offset,
     404              :               read_from_offset);
     405            2 :   putData();
     406            2 : }
     407              : 
     408            0 : void Uint4QTensor::read(ReadSource src, size_t start_offset,
     409              :                         bool read_from_offset) {
     410            0 :   if (start_offset == std::numeric_limits<size_t>::max()) {
     411            0 :     start_offset = file_offset;
     412              :   }
     413            0 :   read_quantization_info(src, start_offset, read_from_offset);
     414              : 
     415            0 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     416              : 
     417            0 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     418            0 :     << "read size: " << getMemoryBytes()
     419              :     << " is too big. It cannot be represented by std::streamsize";
     420              : 
     421            0 :   if (read_from_offset) {
     422            0 :     start_offset += sizeof(uint16_t);
     423              :   }
     424              : 
     425            0 :   checkedRead(src, (char *)getData(), sz,
     426              :               "[Uint4QTensor::read] operation failed", start_offset,
     427              :               read_from_offset);
     428            0 :   putData();
     429            0 : }
     430              : 
     431            0 : std::vector<unsigned int> Uint4QTensor::argmax() const {
     432              :   std::vector<unsigned int> result;
     433            0 :   const uint8_t *data = (uint8_t *)getData();
     434              :   size_t batch_size = batch();
     435            0 :   size_t feature_len = dim.getFeatureLen();
     436            0 :   result.resize(batch_size);
     437              : 
     438            0 :   for (unsigned int b = 0; b < batch_size; ++b) {
     439              :     uint8_t curr_val, max_val = 0;
     440              :     unsigned int max_element_idx = 0;
     441            0 :     for (unsigned int idx = 0; idx < feature_len; ++idx) {
     442            0 :       curr_val = getValue(idx + b * feature_len);
     443              : 
     444            0 :       if (curr_val > max_val) {
     445              :         max_val = curr_val;
     446              :         max_element_idx = idx;
     447              :       }
     448              :     }
     449            0 :     result[b] = max_element_idx;
     450              :   }
     451            0 :   return result;
     452            0 : }
     453              : 
     454            0 : std::vector<unsigned int> Uint4QTensor::argmin() const {
     455              :   std::vector<unsigned int> result;
     456            0 :   const uint8_t *data = (uint8_t *)getData();
     457              :   size_t batch_size = batch();
     458            0 :   size_t feature_len = dim.getFeatureLen();
     459            0 :   result.resize(batch_size);
     460              : 
     461            0 :   for (unsigned int b = 0; b < batch_size; ++b) {
     462              :     uint8_t curr_val, min_val = 15;
     463              :     unsigned int min_element_idx = 0;
     464            0 :     for (unsigned int idx = 0; idx < feature_len; ++idx) {
     465            0 :       curr_val = getValue(idx + b * feature_len);
     466              : 
     467            0 :       if (curr_val < min_val) {
     468              :         min_val = curr_val;
     469              :         min_element_idx = idx;
     470              :       }
     471              :     }
     472            0 :     result[b] = min_element_idx;
     473              :   }
     474            0 :   return result;
     475            0 : }
     476              : 
     477            0 : float Uint4QTensor::max_abs() const {
     478              :   uint8_t abs_max_val = 0;
     479              :   uint8_t curr_val;
     480            0 :   for (unsigned int idx = 0; idx < size(); ++idx) {
     481            0 :     curr_val = getValue(idx);
     482            0 :     abs_max_val = (curr_val > abs_max_val) ? curr_val : abs_max_val;
     483              : 
     484              :     // Terminate search when abs_max_val is an Uint4 absolute max value 15
     485            0 :     if (abs_max_val == 15)
     486              :       return abs_max_val;
     487              :   }
     488              : 
     489            0 :   return abs_max_val;
     490              : }
     491              : 
     492            0 : float Uint4QTensor::maxValue() const {
     493              :   uint8_t max_val = 0;
     494              :   uint8_t curr_val;
     495            0 :   for (unsigned int idx = 0; idx < size(); ++idx) {
     496            0 :     curr_val = getValue(idx);
     497            0 :     max_val = (curr_val > max_val) ? curr_val : max_val;
     498              : 
     499              :     // Terminate search when max_val is an Uint4 max value 15
     500            0 :     if (max_val == 15)
     501              :       return max_val;
     502              :   }
     503              : 
     504            0 :   return max_val;
     505              : }
     506              : 
     507            0 : float Uint4QTensor::minValue() const {
     508              :   uint8_t min_val = 15;
     509              :   uint8_t curr_val;
     510            0 :   for (unsigned int idx = 0; idx < size(); ++idx) {
     511            0 :     curr_val = getValue(idx);
     512            0 :     min_val = (curr_val < min_val) ? curr_val : min_val;
     513              : 
     514              :     // Terminate search when min_val is an Uint4 min value 0
     515            0 :     if (min_val == 0)
     516              :       return min_val;
     517              :   }
     518              : 
     519            0 :   return min_val;
     520              : }
     521              : 
     522            0 : void Uint4QTensor::print(std::ostream &out) const {
     523            0 :   const uint8_t *data = (uint8_t *)getData();
     524            0 :   unsigned int len = size();
     525            0 :   out << "data addr: " << reinterpret_cast<const float *>(data) << '\n';
     526            0 :   out << dim;
     527              : 
     528            0 :   if (len > 100) {
     529            0 :     out << '[' << (int)getValue(0) << ' ' << (int)getValue(1) << ' '
     530            0 :         << (int)getValue(2) << " ... " << (int)getValue(len - 3) << ' '
     531            0 :         << (int)getValue(len - 2) << ' ' << (int)getValue(len - 1) << ']'
     532              :         << std::endl;
     533            0 :     return;
     534              :   }
     535              : 
     536            0 :   std::ios init(NULL);
     537            0 :   init.copyfmt(out);
     538            0 :   if (getFormat() == Tformat::NCHW) {
     539            0 :     for (unsigned int k = 0; k < batch(); k++) {
     540            0 :       for (unsigned int l = 0; l < channel(); l++) {
     541            0 :         for (unsigned int i = 0; i < height(); i++) {
     542            0 :           for (unsigned int j = 0; j < width(); j++) {
     543            0 :             out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
     544              :           }
     545              :           out << std::endl;
     546              :         }
     547              :         out << std::endl;
     548              :       }
     549              :       out << "-------" << std::endl;
     550              :     }
     551              :   } else {
     552            0 :     for (unsigned int k = 0; k < batch(); k++) {
     553            0 :       for (unsigned int i = 0; i < height(); i++) {
     554            0 :         for (unsigned int j = 0; j < width(); j++) {
     555            0 :           for (unsigned int l = 0; l < channel(); l++) {
     556            0 :             out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
     557              :           }
     558              :           out << std::endl;
     559              :         }
     560              :         out << std::endl;
     561              :       }
     562              :       out << "-------" << std::endl;
     563              :     }
     564            0 :     out.copyfmt(init);
     565              :   }
     566              : 
     567              :   /// print quantization information
     568            0 :   const float *q_scales = (float *)getScale();
     569            0 :   const unsigned int *q_zero_points = getZeroPoint();
     570              : 
     571            0 :   if (scale_size() > 50) {
     572            0 :     out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
     573            0 :         << q_scales[2] << " ... " << q_scales[len - 3] << ' '
     574            0 :         << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
     575              : 
     576            0 :     out << "Zero points: [" << q_zero_points[0] << ' ' << q_zero_points[1]
     577            0 :         << ' ' << q_zero_points[2] << " ... " << q_zero_points[len - 3] << ' '
     578            0 :         << q_zero_points[len - 2] << ' ' << q_zero_points[len - 1] << ']'
     579              :         << std::endl;
     580              :     return;
     581              :   }
     582              : 
     583            0 :   out << "Scale factors: ";
     584            0 :   for (unsigned i = 0; i < scale_size(); ++i) {
     585            0 :     out << q_scales[i] << " ";
     586              :   }
     587              :   out << std::endl;
     588              : 
     589            0 :   out << "Zero points: ";
     590            0 :   for (unsigned i = 0; i < scale_size(); ++i) {
     591            0 :     out << q_zero_points[i] << " ";
     592              :   }
     593              :   out << std::endl;
     594              : }
     595              : 
     596            0 : size_t Uint4QTensor::getMemoryBytes() const {
     597            0 :   return ((size() + 1) / 2) * dim.getDataTypeSize() +
     598            0 :          scale_size() * sizeof(float) + scale_size() * sizeof(unsigned int);
     599              : }
     600              : 
     601            0 : size_t Uint4QTensor::scale_size() const {
     602            0 :   switch (qscheme) {
     603              :   case QScheme::PER_TENSOR_AFFINE:
     604              :     return 1;
     605              :     break;
     606            0 :   case QScheme::PER_CHANNEL_AFFINE:
     607            0 :     return height();
     608              :     break;
     609              :   default:
     610              :     break;
     611              :   }
     612            0 :   return 0;
     613              : }
     614              : 
     615            1 : QScheme Uint4QTensor::q_scheme() const { return qscheme; }
     616              : 
     617            0 : void Uint4QTensor::copy(const void *buf) {
     618            0 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     619              :     << getName() << " is not contiguous, cannot copy.";
     620              : 
     621            0 :   if (buf == getData()) {
     622              :     return;
     623              :   }
     624              :   // copy tensor data
     625            0 :   scopy((size() + 1) / 2, (uint8_t *)buf, 1, (uint8_t *)getData(), 1);
     626              : 
     627              :   // copy scale factor data
     628            0 :   float *scales = (float *)(((uint8_t *)buf) + (size() + 1) / 2);
     629            0 :   scopy(scale_size(), scales, 1, (float *)getScale(), 1);
     630              : 
     631              :   // copy zero points
     632              :   unsigned int *zps =
     633            0 :     (unsigned int *)((float *)(((uint8_t *)buf) + size()) + scale_size());
     634              : 
     635            0 :   memcpy(getZeroPoint(), zps, scale_size() * sizeof(unsigned int));
     636              : }
     637              : 
     638            2 : void Uint4QTensor::save_quantization_info(std::ostream &file) {
     639            2 :   checkedWrite(file, (char *)&qscheme, sizeof(uint16_t),
     640              :                "[Uint4QTensor::save] failed to write quantization information");
     641            2 : }
     642              : 
     643            2 : void Uint4QTensor::read_quantization_info(std::ifstream &file,
     644              :                                           size_t start_offset,
     645              :                                           bool read_from_offset) {
     646            2 :   checkedRead(file, (char *)&qscheme, sizeof(uint16_t),
     647              :               "[Uint4QTensor::read] failed to read quantization information",
     648              :               start_offset, read_from_offset);
     649            2 : }
     650              : 
     651            0 : void Uint4QTensor::read_quantization_info(ReadSource src, size_t start_offset,
     652              :                                           bool read_from_offset) {
     653            0 :   checkedRead(src, (char *)&qscheme, sizeof(uint16_t),
     654              :               "[Uint4QTensor::read] failed to read quantization information",
     655              :               start_offset, read_from_offset);
     656            0 : }
     657              : 
     658              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1