LCOV - coverage_filtered.info - nntrainer/tensor/uint

LCOV - code coverage report

Current view:	top level - nntrainer/tensor - uint_tensor.cpp (source / functions)		Coverage	Total	Hit
Test:	coverage_filtered.info	Lines:	64.2 %	307	197
Test Date:	2025-12-14 20:38:17	Functions:	48.5 %	132	64

            Line data    Source code

       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * @file        uint_tensor.cpp
       4              :  * @date        02 April 2024
       5              :  * @brief       This is UIntTensor class for unsigned integer calculation
       6              :  *          This uint_tensor.cpp contains some codes to define
       7              :  *          UIntTensor template methods. This file cannot be used directly but
       8              :  *          included by uint_tensor.h only.
       9              :  * @see         https://github.com/nnstreamer/nntrainer
      10              :  * @author      Donghyeon Jeong <dhyeon.jeong@samsung.com>
      11              :  * @author      Eunju Yang <ej.yang@samsung.com>
      12              :  * @bug         No known bugs except for NYI items
      13              :  */
      14              : 
      15              : #ifdef __UINT_TENSOR_H__
      16              : 
      17              : template <typename T>
      18            3 : UIntTensor<T>::UIntTensor(std::string name_, Tformat fm, QScheme qscheme_) :
      19            9 :   TensorBase(name_, fm, checkTensorDataType()), qscheme(qscheme_) {}
      20              : 
      21              : template <typename T>
      22           52 : UIntTensor<T>::UIntTensor(const TensorDim &d, bool alloc_now, Initializer init,
      23              :                           std::string name, QScheme qscheme_) :
      24           52 :   TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
      25           52 :   if (alloc_now)
      26           52 :     allocate();
      27           52 : }
      28              : 
      29              : template <typename T>
      30           40 : UIntTensor<T>::UIntTensor(const TensorDim &d, const void *buf,
      31              :                           QScheme qscheme_) :
      32           40 :   UIntTensor(d, true, Initializer::NONE, "", qscheme_) {
      33           40 :   if (d.getDataLen() != 0) {
      34           40 :     if (buf != nullptr)
      35            5 :       copy(buf);
      36              :   }
      37           40 : }
      38              : 
      39              : template <typename T>
      40            7 : UIntTensor<T>::UIntTensor(
      41              :   std::vector<std::vector<std::vector<std::vector<T>>>> const &d,
      42              :   std::vector<float> const &scales,
      43              :   std::vector<unsigned int> const &zero_points, Tformat fm, QScheme qscheme_) :
      44           14 :   qscheme(qscheme_) {
      45            7 :   if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty() ||
      46           12 :       scales.empty() || zero_points.empty()) {
      47            2 :     throw std::out_of_range(
      48              :       "[Tensor] trying to initialize UIntTensor from empty vector");
      49              :   }
      50              : 
      51            5 :   dim.setTensorDim(0, d.size());
      52            5 :   if (fm == Tformat::NCHW) {
      53            5 :     dim.setTensorDim(1, d[0].size());
      54            5 :     dim.setTensorDim(2, d[0][0].size());
      55            5 :     dim.setTensorDim(3, d[0][0][0].size());
      56              :   } else {
      57            0 :     dim.setTensorDim(2, d[0].size());
      58            0 :     dim.setTensorDim(3, d[0][0].size());
      59            0 :     dim.setTensorDim(1, d[0][0][0].size());
      60              :   }
      61              : 
      62            5 :   dim.setTensorType({fm, checkTensorDataType()});
      63              : 
      64            9 :   if (scale_size() != scales.size() || scale_size() != zero_points.size()) {
      65            4 :     throw std::invalid_argument("[Tensor] Scales vector or zero point vector "
      66              :                                 "size is invalid. scale size: " +
      67              :                                 std::to_string(scale_size()));
      68              :   }
      69              : 
      70            4 :   strides = dim.computeStrides();
      71            4 :   contiguous = true;
      72            4 :   initializer = Initializer::NONE;
      73              : 
      74            4 :   MemoryData *mem_data = new MemoryData(
      75           94 :     (void *)(new T[dim.getDataLen() + (sizeof(float) + sizeof(unsigned int)) /
      76            4 :                                         sizeof(T) * scale_size()]()));
      77            7 :   data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
      78            4 :     delete[] mem_data->getAddr<T>();
      79            4 :     delete mem_data;
      80              :   });
      81              : 
      82            4 :   offset = 0;
      83              : 
      84              :   // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
      85              :   // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
      86              :   // dim[1] == height, dim[2] == width, dim[3] == channel
      87            4 :   if (fm == Tformat::NCHW) {
      88            8 :     for (unsigned int i = 0; i < batch(); ++i)
      89           20 :       for (unsigned int j = 0; j < channel(); ++j)
      90           48 :         for (unsigned int k = 0; k < height(); ++k)
      91           96 :           for (unsigned int l = 0; l < width(); ++l)
      92           64 :             this->setValue(i, j, k, l, static_cast<float>(d[i][j][k][l]));
      93              :   } else {
      94            0 :     for (unsigned int i = 0; i < batch(); ++i)
      95            0 :       for (unsigned int j = 0; j < height(); ++j)
      96            0 :         for (unsigned int k = 0; k < width(); ++k)
      97            0 :           for (unsigned int l = 0; l < channel(); ++l)
      98            0 :             this->setValue(i, l, j, k, static_cast<float>(d[i][j][k][l]));
      99              :   }
     100              : 
     101              :   // copy scale factors
     102            8 :   scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
     103              : 
     104            4 :   unsigned int *zps = getZeroPoint();
     105              : 
     106              :   // copy zero points
     107            9 :   for (size_t i = 0; i < zero_points.size(); ++i) {
     108            5 :     zps[i] = zero_points[i];
     109              :   }
     110            7 : }
     111              : 
     112              : template <typename T>
     113           17 : bool UIntTensor<T>::operator==(const UIntTensor<T> &rhs) const {
     114           17 :   if (qscheme != rhs.qscheme)
     115              :     return false;
     116              : 
     117              :   // compare quantized data
     118           17 :   const T *_data = (T *)getData();
     119           17 :   const T *_rdata = (T *)rhs.getData();
     120         2212 :   for (size_t i = 0; i < size(); ++i) {
     121         2201 :     if (_data[i] != _rdata[i])
     122              :       return false;
     123              :   }
     124              : 
     125              :   // compare scale factors
     126           11 :   const float *_scales = (float *)getScale();
     127           11 :   const float *_rscales = (float *)rhs.getScale();
     128           22 :   for (size_t i = 0; i < scale_size(); ++i) {
     129           11 :     if (std::fabs(_scales[i] - _rscales[i]) > 1e-5)
     130              :       return false;
     131              :   }
     132              : 
     133              :   // compare zero points
     134           11 :   const unsigned int *_zps = getZeroPoint();
     135           11 :   const unsigned int *_rzps = rhs.getZeroPoint();
     136           22 :   for (size_t i = 0; i < scale_size(); ++i) {
     137           11 :     if (_zps[i] != _rzps[i])
     138              :       return false;
     139              :   }
     140              : 
     141              :   return true;
     142              : }
     143              : 
     144           65 : template <typename T> void UIntTensor<T>::allocate() {
     145           65 :   if (empty() || data)
     146              :     return;
     147              : 
     148           54 :   if (src_tensor) {
     149              :     /// allocate data based on the source tensor
     150            2 :     allocateSrcTensor();
     151              :     /** as this memory is shared, do NOT initialize */
     152              :   } else {
     153              :     /// allocate new memory for the tensor data
     154              :     MemoryData *mem_data;
     155              : 
     156           52 :     mem_data = new MemoryData(
     157         5317 :       (void *)(new T[dim.getDataLen() + (sizeof(float) + sizeof(unsigned int)) /
     158           52 :                                           sizeof(T) * scale_size()]{}));
     159           52 :     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
     160           52 :       delete[] mem_data->template getAddr<T>();
     161           52 :       delete mem_data;
     162              :     });
     163              : 
     164           52 :     offset = 0;
     165           52 :     initialize();
     166              :   }
     167              : }
     168              : 
     169            1 : template <typename T> void UIntTensor<T>::deallocate() {
     170              :   data = nullptr;
     171            1 :   offset = 0;
     172            1 : }
     173         1714 : template <typename T> void *UIntTensor<T>::getData() const {
     174         1714 :   if (!data)
     175              :     return nullptr;
     176              : 
     177              :   data->validate();
     178         1714 :   return data->getAddr<T>() + offset;
     179              : }
     180              : 
     181            2 : template <typename T> void *UIntTensor<T>::getData(size_t idx) const {
     182            2 :   if (!data)
     183              :     return nullptr;
     184              : 
     185              :   data->validate();
     186            2 :   return data->getAddr<T>() + offset + idx;
     187              : }
     188              : 
     189           43 : template <typename T> void *UIntTensor<T>::getScale() const {
     190           43 :   if (!data)
     191              :     return nullptr;
     192              : 
     193              :   data->validate();
     194           43 :   return ((T *)getData()) + size();
     195              : }
     196              : 
     197            0 : template <typename T> void *UIntTensor<T>::getScale(size_t idx) const {
     198            0 :   NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
     199              :     << "Tensor::getScale() index is not valid";
     200              : 
     201            0 :   if (!data)
     202              :     return nullptr;
     203              : 
     204              :   data->validate();
     205            0 :   return (float *)((T *)getData() + size()) + idx;
     206              : }
     207              : 
     208           43 : template <typename T> unsigned int *UIntTensor<T>::getZeroPoint() const {
     209           43 :   if (!data)
     210              :     return nullptr;
     211              : 
     212              :   data->validate();
     213           43 :   return ((unsigned int *)((float *)((T *)getData() + size()))) + scale_size();
     214              : }
     215              : 
     216              : template <typename T>
     217            0 : unsigned int *UIntTensor<T>::getZeroPoint(size_t idx) const {
     218            0 :   NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
     219              :     << "Tensor::getZeroPoint() index is not valid";
     220              : 
     221            0 :   if (!data)
     222              :     return nullptr;
     223              : 
     224              :   data->validate();
     225            0 :   return (((unsigned int *)((float *)((T *)getData() + size()))) +
     226            0 :           scale_size()) +
     227            0 :          idx;
     228              : }
     229              : 
     230            0 : template <typename T> void *UIntTensor<T>::getAddress(unsigned int i) {
     231            0 :   size_t index = getIndex(batch(), channel(), height(), width());
     232            0 :   if (i > index) {
     233              :     return nullptr;
     234              :   }
     235            0 :   return &((T *)getData())[i];
     236              : }
     237              : 
     238              : template <typename T>
     239            0 : const void *UIntTensor<T>::getAddress(unsigned int i) const {
     240            0 :   size_t index = getIndex(batch(), channel(), height(), width());
     241            0 :   if (i > index) {
     242              :     return nullptr;
     243              :   }
     244            0 :   return &((T *)getData())[i];
     245              : }
     246              : 
     247            0 : template <typename T> const T &UIntTensor<T>::getValue(unsigned int i) const {
     248            0 :   return ((T *)getData())[i];
     249              : }
     250              : 
     251            0 : template <typename T> T &UIntTensor<T>::getValue(unsigned int i) {
     252            0 :   return ((T *)getData())[i];
     253              : }
     254              : 
     255              : template <typename T>
     256            0 : const T &UIntTensor<T>::getValue(unsigned int b, unsigned int c, unsigned int h,
     257              :                                  unsigned int w) const {
     258            0 :   return getValue(getIndex(b, c, h, w));
     259              : }
     260              : 
     261              : template <typename T>
     262            0 : T &UIntTensor<T>::getValue(unsigned int b, unsigned int c, unsigned int h,
     263              :                            unsigned int w) {
     264            0 :   return getValue(getIndex(b, c, h, w));
     265              : }
     266              : 
     267           12 : template <typename T> void UIntTensor<T>::setValue(float value) {
     268           12 :   T *data = (T *)getData();
     269           12 :   std::fill(data, data + size(), static_cast<T>(value));
     270           12 : }
     271              : 
     272              : template <typename T>
     273            5 : void UIntTensor<T>::addValue(unsigned int b, unsigned int c, unsigned int h,
     274              :                              unsigned int w, float value, float beta) {
     275            5 :   auto const &idx = getIndex(b, c, h, w);
     276            5 :   float output = static_cast<float>(((T *)getData())[idx]);
     277            5 :   output *= beta;
     278            5 :   output += value;
     279              : 
     280            5 :   ((T *)getData())[idx] = static_cast<T>(output);
     281            5 : }
     282              : 
     283              : template <typename T>
     284         1287 : void UIntTensor<T>::setValue(unsigned int b, unsigned int c, unsigned int h,
     285              :                              unsigned int w, float value) {
     286         1287 :   ((T *)getData())[getIndex(b, c, h, w)] = static_cast<T>(value);
     287         1287 : }
     288              : 
     289            3 : template <typename T> void UIntTensor<T>::setZero() {
     290              :   /// @todo replace with apply_i or scal
     291            3 :   setValue(0);
     292            3 : }
     293              : 
     294           58 : template <typename T> void UIntTensor<T>::initialize() {
     295           58 :   if (empty() || !isAllocated())
     296              :     return;
     297              : 
     298              :   /// @note Sampling from the normal/uniform distribution is invalid
     299           58 :   switch (initializer) {
     300            3 :   case Initializer::ZEROS:
     301            3 :     setZero();
     302            3 :     break;
     303            6 :   case Initializer::ONES:
     304            6 :     setValue(1.0f);
     305            6 :     break;
     306              :   case Initializer::NONE:
     307              :     break;
     308            3 :   default:
     309            9 :     throw std::invalid_argument("Initializer not valid for " +
     310            3 :                                 getStringDataType());
     311              :     break;
     312              :   }
     313              : 
     314           55 :   putData();
     315              : }
     316              : 
     317            6 : template <typename T> void UIntTensor<T>::initialize(Initializer init) {
     318            6 :   initializer = init;
     319            6 :   initialize();
     320            3 : }
     321              : 
     322            2 : template <typename T> void UIntTensor<T>::copy(const Tensor &from) {
     323            2 :   reshape(from.getDim());
     324            2 :   copy(from.getData());
     325            2 : }
     326              : 
     327            2 : template <typename T> void UIntTensor<T>::copyData(const Tensor &from) {
     328            2 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     329              :     << getName() << " is not contiguous, cannot copy.";
     330              : 
     331            2 :   NNTR_THROW_IF(size() != from.size(), std::invalid_argument)
     332              :     << "Size of tensor to copy must match";
     333              : 
     334              :   // copy data with the same data type T
     335            2 :   if (from.getDataType() == getDataType()) {
     336            0 :     copy(from.getData<T>());
     337            0 :     return;
     338              :   }
     339              : 
     340              :   /// @todo support copy from other data types
     341            2 :   switch (from.getDataType()) {
     342            0 :   case ml::train::TensorDim::DataType::FP32: {
     343            0 :     copy_fp32(from.size(), from.getData<float>(), (T *)getData());
     344            0 :     break;
     345              :   }
     346            2 :   default:
     347            2 :     throw std::invalid_argument("Error: Unsupported data type");
     348              :     break;
     349              :   }
     350              : }
     351              : 
     352              : template <typename T>
     353            1 : void UIntTensor<T>::copy_with_stride(const Tensor &input, Tensor &output) {
     354            4 :   for (unsigned int b = 0; b < output.batch(); ++b) {
     355            6 :     for (unsigned int c = 0; c < output.channel(); ++c) {
     356           12 :       for (unsigned int h = 0; h < output.height(); ++h) {
     357           54 :         for (unsigned int w = 0; w < output.width(); ++w) {
     358           45 :           output.setValue(b, c, h, w,
     359           45 :                           static_cast<float>(input.getValue<T>(b, c, h, w)));
     360              :         }
     361              :       }
     362              :     }
     363              :   }
     364            1 : }
     365              : 
     366            1 : template <typename T> void UIntTensor<T>::save(std::ostream &file) {
     367              :   /// @note Save quantization information
     368            1 :   save_quantization_info(file);
     369              : 
     370            1 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     371              : 
     372            1 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     373            0 :     << "save size: " << getMemoryBytes()
     374              :     << " is too big. It cannot be represented by std::streamsize";
     375              : 
     376            1 :   checkedWrite(file, (char *)getData(), sz,
     377              :                "[UIntTensor::save] operation failed");
     378            1 :   putData();
     379            1 : }
     380              : 
     381              : template <typename T>
     382            1 : void UIntTensor<T>::read(std::ifstream &file, size_t start_offset,
     383              :                          bool read_from_offset) {
     384            1 :   if (start_offset == std::numeric_limits<size_t>::max()) {
     385            0 :     start_offset = file_offset;
     386              :   }
     387            1 :   read_quantization_info(file, start_offset, read_from_offset);
     388              : 
     389            1 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     390              : 
     391            1 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     392            0 :     << "read size: " << getMemoryBytes()
     393              :     << " is too big. It cannot be represented by std::streamsize";
     394              : 
     395            1 :   if (read_from_offset) {
     396            0 :     start_offset += sizeof(uint16_t);
     397              :   }
     398              : 
     399            1 :   checkedRead(file, (char *)getData(), sz,
     400              :               "[UIntTensor::read] operation failed", start_offset,
     401              :               read_from_offset);
     402            1 :   putData();
     403            1 : }
     404              : 
     405              : template <typename T>
     406            0 : void UIntTensor<T>::read(ReadSource src, size_t start_offset,
     407              :                          bool read_from_offset) {
     408            0 :   if (start_offset == std::numeric_limits<size_t>::max()) {
     409            0 :     start_offset = file_offset;
     410              :   }
     411            0 :   read_quantization_info(src, start_offset, read_from_offset);
     412              : 
     413            0 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     414              : 
     415            0 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     416            0 :     << "read size: " << getMemoryBytes()
     417              :     << " is too big. It cannot be represented by std::streamsize";
     418              : 
     419            0 :   if (read_from_offset) {
     420            0 :     start_offset += sizeof(uint16_t);
     421              :   }
     422              : 
     423            0 :   checkedRead(src, (char *)getData(), sz, "[UIntTensor::read] operation failed",
     424              :               start_offset, read_from_offset);
     425            0 :   putData();
     426            0 : }
     427              : 
     428            1 : template <typename T> std::vector<unsigned int> UIntTensor<T>::argmax() const {
     429              :   std::vector<unsigned int> result;
     430            1 :   const T *data = (T *)getData();
     431              :   size_t batch_size = batch();
     432            1 :   size_t feature_len = dim.getFeatureLen();
     433              : 
     434            1 :   result.resize(batch_size);
     435              : 
     436            4 :   for (unsigned int b = 0; b < batch_size; b++) {
     437              :     auto max_iter =
     438            3 :       std::max_element(data + b * feature_len, data + (b + 1) * feature_len);
     439            3 :     result[b] = std::distance(data, max_iter) - (b * feature_len);
     440              :   }
     441            1 :   return result;
     442            0 : }
     443              : 
     444            0 : template <typename T> std::vector<unsigned int> UIntTensor<T>::argmin() const {
     445              :   std::vector<unsigned int> result;
     446            0 :   const T *data = (T *)getData();
     447              :   size_t batch_size = batch();
     448            0 :   size_t feature_len = dim.getFeatureLen();
     449              : 
     450            0 :   result.resize(batch_size);
     451              : 
     452            0 :   for (unsigned int b = 0; b < batch_size; b++) {
     453              :     auto min_iter =
     454            0 :       std::min_element(data + b * feature_len, data + (b + 1) * feature_len);
     455            0 :     result[b] = std::distance(data, min_iter) - (b * feature_len);
     456              :   }
     457            0 :   return result;
     458            0 : }
     459              : 
     460            1 : template <typename T> float UIntTensor<T>::max_abs() const {
     461            1 :   return maxValue();
     462              : }
     463              : 
     464            1 : template <typename T> float UIntTensor<T>::maxValue() const {
     465            1 :   const T *data = (T *)getData();
     466            1 :   return static_cast<float>(*std::max_element(data, data + size()));
     467              : }
     468              : 
     469            2 : template <typename T> float UIntTensor<T>::minValue() const {
     470            2 :   const T *data = (T *)getData();
     471            2 :   return static_cast<float>(*std::min_element(data, data + size()));
     472              : }
     473              : 
     474            0 : template <typename T> void UIntTensor<T>::print(std::ostream &out) const {
     475            0 :   const T *data = (T *)getData();
     476            0 :   unsigned int len = size();
     477            0 :   out << "data addr: " << reinterpret_cast<const float *>(data) << '\n';
     478            0 :   out << dim;
     479              : 
     480            0 :   if (len > 512) {
     481            0 :     out << '[' << (int)data[0] << ' ' << (int)data[1] << ' ' << (int)data[2]
     482            0 :         << " ... " << (int)data[len - 3] << ' ' << (int)data[len - 2] << ' '
     483            0 :         << (int)data[len - 1] << ']' << std::endl;
     484            0 :     return;
     485              :   }
     486              : 
     487            0 :   std::ios init(NULL);
     488            0 :   init.copyfmt(out);
     489            0 :   if (getFormat() == Tformat::NCHW) {
     490            0 :     for (unsigned int k = 0; k < batch(); k++) {
     491            0 :       for (unsigned int l = 0; l < channel(); l++) {
     492            0 :         for (unsigned int i = 0; i < height(); i++) {
     493            0 :           for (unsigned int j = 0; j < width(); j++) {
     494            0 :             out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
     495              :           }
     496              :           out << std::endl;
     497              :         }
     498              :         out << std::endl;
     499              :       }
     500              :       out << "-------" << std::endl;
     501              :     }
     502              :   } else {
     503            0 :     for (unsigned int k = 0; k < batch(); k++) {
     504            0 :       for (unsigned int i = 0; i < height(); i++) {
     505            0 :         for (unsigned int j = 0; j < width(); j++) {
     506            0 :           for (unsigned int l = 0; l < channel(); l++) {
     507            0 :             out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
     508              :           }
     509              :           out << std::endl;
     510              :         }
     511              :         out << std::endl;
     512              :       }
     513              :       out << "-------" << std::endl;
     514              :     }
     515            0 :     out.copyfmt(init);
     516              :   }
     517              : 
     518              :   /// print quantization information
     519            0 :   const float *q_scales = (float *)getScale();
     520            0 :   const unsigned int *q_zero_points = getZeroPoint();
     521              : 
     522            0 :   if (scale_size() > 50) {
     523            0 :     out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
     524            0 :         << q_scales[2] << " ... " << q_scales[len - 3] << ' '
     525            0 :         << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
     526              : 
     527            0 :     out << "Zero points: [" << q_zero_points[0] << ' ' << q_zero_points[1]
     528            0 :         << ' ' << q_zero_points[2] << " ... " << q_zero_points[len - 3] << ' '
     529            0 :         << q_zero_points[len - 2] << ' ' << q_zero_points[len - 1] << ']'
     530              :         << std::endl;
     531              :     return;
     532              :   }
     533              : 
     534            0 :   out << "Scale factors: ";
     535            0 :   for (unsigned i = 0; i < scale_size(); ++i) {
     536            0 :     out << q_scales[i] << " ";
     537              :   }
     538              :   out << std::endl;
     539              : 
     540            0 :   out << "Zero points: ";
     541            0 :   for (unsigned i = 0; i < scale_size(); ++i) {
     542            0 :     out << q_zero_points[i] << " ";
     543              :   }
     544              :   out << std::endl;
     545              : }
     546              : 
     547            2 : template <typename T> size_t UIntTensor<T>::getMemoryBytes() const {
     548            4 :   return bytes() + scale_size() * sizeof(float) +
     549            2 :          scale_size() * sizeof(unsigned int);
     550              : }
     551              : 
     552              : template <typename T>
     553            1 : void UIntTensor<T>::save_quantization_info(std::ostream &file) {
     554            1 :   checkedWrite(file, (char *)&qscheme, sizeof(uint16_t),
     555              :                "[CharTensor::save] failed to write quantization information");
     556            1 : }
     557              : 
     558              : template <typename T>
     559            1 : void UIntTensor<T>::read_quantization_info(std::ifstream &file,
     560              :                                            size_t start_offset,
     561              :                                            bool read_from_offset) {
     562            1 :   checkedRead(file, (char *)&qscheme, sizeof(uint16_t),
     563              :               "[CharTensor::read] failed to read quantization information",
     564              :               start_offset, read_from_offset);
     565            1 : }
     566              : 
     567              : template <typename T>
     568            0 : void UIntTensor<T>::read_quantization_info(ReadSource src, size_t start_offset,
     569              :                                            bool read_from_offset) {
     570            0 :   checkedRead(src, (char *)&qscheme, sizeof(uint16_t),
     571              :               "[CharTensor::read] failed to read quantization information",
     572              :               start_offset, read_from_offset);
     573            0 : }
     574              : 
     575          168 : template <typename T> size_t UIntTensor<T>::scale_size() const {
     576          186 :   switch (qscheme) {
     577              :   case QScheme::PER_TENSOR_AFFINE:
     578              :     return 1;
     579            6 :   case QScheme::PER_CHANNEL_AFFINE:
     580            6 :     return width();
     581              :   default:
     582              :     break;
     583              :   }
     584            0 :   return 0;
     585              : }
     586              : 
     587            0 : template <typename T> QScheme UIntTensor<T>::q_scheme() const {
     588            0 :   return qscheme;
     589              : }
     590              : 
     591            7 : template <typename T> void UIntTensor<T>::copy(const void *buf) {
     592            7 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     593              :     << getName() << " is not contiguous, cannot copy.";
     594              : 
     595            7 :   if (buf == getData()) {
     596              :     return;
     597              :   }
     598              : 
     599              :   if (std::is_same<T, uint16_t>::value) {
     600              :     const uint16_t *data = (const uint16_t *)buf;
     601            3 :     uint16_t *rdata = (uint16_t *)getData();
     602            3 :     copy_u16((const unsigned int)size(), data, rdata);
     603              :   } else {
     604              :     /// @todo need to optimize
     605            4 :     memcpy(getData(), buf, size() * (sizeof(T)));
     606              :   }
     607              : 
     608              :   // copy scale factors
     609            7 :   float *scales = (float *)(((T *)buf) + size());
     610            7 :   scopy(scale_size(), scales, 1, (float *)getScale(), 1);
     611              : 
     612              :   // copy zero points
     613            7 :   unsigned int *zps =
     614            7 :     (unsigned int *)((float *)(((T *)buf) + size()) + scale_size());
     615              : 
     616            7 :   memcpy(getZeroPoint(), zps, scale_size() * sizeof(unsigned int));
     617              : }
     618              : 
     619              : #endif

Generated by: LCOV version 2.0-1