LCOV - code coverage report
Current view: top level - nntrainer/tensor - char_tensor.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 77.2 % 298 230
Test Date: 2025-12-14 20:38:17 Functions: 81.4 % 43 35

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * @file        char_tensor.cpp
       4              :  * @date        02 April 2024
       5              :  * @brief       This is CharTensor class for 8-bit integer calculation
       6              :  * @see         https://github.com/nnstreamer/nntrainer
       7              :  * @author      Donghyeon Jeong <dhyeon.jeong@samsung.com>
       8              :  * @bug         No known bugs except for NYI items
       9              :  */
      10              : 
      11              : #include <iomanip>
      12              : #include <iostream>
      13              : 
      14              : #include <char_tensor.h>
      15              : #include <cpu_backend.h>
      16              : #include <tensor.h>
      17              : 
      18              : namespace nntrainer {
      19              : 
      20            2 : CharTensor::CharTensor(std::string name_, Tformat fm, QScheme qscheme_) :
      21            4 :   TensorBase(name_, fm, Tdatatype::QINT8), qscheme(qscheme_) {}
      22              : 
      23           58 : CharTensor::CharTensor(const TensorDim &d, bool alloc_now, Initializer init,
      24           58 :                        std::string name, QScheme qscheme_) :
      25           58 :   TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
      26           58 :   if (alloc_now)
      27           54 :     allocate();
      28           58 : }
      29              : 
      30           32 : CharTensor::CharTensor(const TensorDim &d, const void *buf, QScheme qscheme_) :
      31           32 :   CharTensor(d, true, Initializer::NONE, "", qscheme_) {
      32           32 :   if (d.getDataLen() != 0) {
      33           32 :     if (buf != nullptr)
      34            8 :       copy(buf);
      35              :   }
      36           32 : }
      37              : 
      38            2 : CharTensor::CharTensor(
      39              :   std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
      40            4 :   std::vector<float> const &scales, Tformat fm, QScheme qscheme_) {
      41            2 :   if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
      42              :     throw std::out_of_range(
      43            1 :       "[Tensor] trying to initialize CharTensor from empty vector");
      44              :   }
      45              : 
      46            1 :   dim.setTensorDim(0, d.size());
      47            1 :   if (fm == Tformat::NCHW) {
      48            1 :     dim.setTensorDim(1, d[0].size());
      49            1 :     dim.setTensorDim(2, d[0][0].size());
      50            1 :     dim.setTensorDim(3, d[0][0][0].size());
      51              :   } else {
      52            0 :     dim.setTensorDim(2, d[0].size());
      53            0 :     dim.setTensorDim(3, d[0][0].size());
      54            0 :     dim.setTensorDim(1, d[0][0][0].size());
      55              :   }
      56              : 
      57              :   dim.setTensorType({fm, Tdatatype::QINT8});
      58              : 
      59            1 :   strides = dim.computeStrides();
      60            1 :   contiguous = true;
      61            1 :   initializer = Initializer::NONE;
      62            1 :   qscheme = qscheme_;
      63              : 
      64            1 :   NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
      65              :     << "invalid scale factor size " << scales.size();
      66              : 
      67              :   MemoryData *mem_data = new MemoryData(
      68          131 :     (void *)(new int8_t[dim.getDataLen() + sizeof(float) * scale_size()]()));
      69            2 :   data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
      70            1 :     delete[] mem_data->getAddr<int8_t>();
      71            1 :     delete mem_data;
      72              :   });
      73              : 
      74            1 :   offset = 0;
      75              : 
      76              :   // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
      77              :   // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
      78              :   // dim[1] == height, dim[2] == width, dim[3] == channel
      79            1 :   if (fm == Tformat::NCHW) {
      80            2 :     for (unsigned int i = 0; i < batch(); ++i)
      81            4 :       for (unsigned int j = 0; j < channel(); ++j)
      82           12 :         for (unsigned int k = 0; k < height(); ++k)
      83           99 :           for (unsigned int l = 0; l < width(); ++l)
      84           90 :             this->setValue(i, j, k, l, d[i][j][k][l]);
      85              :   } else {
      86            0 :     for (unsigned int i = 0; i < batch(); ++i)
      87            0 :       for (unsigned int j = 0; j < height(); ++j)
      88            0 :         for (unsigned int k = 0; k < width(); ++k)
      89            0 :           for (unsigned int l = 0; l < channel(); ++l)
      90            0 :             this->setValue(i, l, j, k, d[i][j][k][l]);
      91              :   }
      92              : 
      93              :   // copy scale factors
      94            1 :   scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
      95            2 : }
      96              : 
      97           10 : bool CharTensor::operator==(const CharTensor &rhs) const {
      98           10 :   if (qscheme != rhs.qscheme)
      99              :     return false;
     100              : 
     101              :   // compare quantized data
     102           10 :   const int8_t *_data = (int8_t *)getData();
     103           10 :   const int8_t *_rdata = (int8_t *)rhs.getData();
     104          741 :   for (size_t i = 0; i < size(); ++i) {
     105          734 :     if (_data[i] != _rdata[i])
     106              :       return false;
     107              :   }
     108              : 
     109              :   // compare scale factors
     110            7 :   const float *_scales = (float *)getScale();
     111            7 :   const float *_rscales = (float *)rhs.getScale();
     112           14 :   for (size_t i = 0; i < scale_size(); ++i) {
     113            7 :     if (std::fabs(_scales[i] - _rscales[i]) > 1e-5)
     114              :       return false;
     115              :   }
     116              : 
     117              :   return true;
     118              : }
     119              : 
     120           57 : void CharTensor::allocate() {
     121           57 :   if (empty() || data)
     122              :     return;
     123              : 
     124           56 :   if (src_tensor) {
     125              :     /// allocate data based on the source tensor
     126            2 :     allocateSrcTensor();
     127              :     /** as this memory is shared, do NOT initialize */
     128              :   } else {
     129              :     /// allocate new memory for the tensor data
     130              :     MemoryData *mem_data;
     131              : 
     132              :     mem_data = new MemoryData(
     133       607844 :       (void *)(new int8_t[dim.getDataLen() + 4 * scale_size()]{}));
     134           54 :     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
     135           54 :       delete[] mem_data->template getAddr<int8_t>();
     136           54 :       delete mem_data;
     137              :     });
     138              : 
     139           54 :     offset = 0;
     140           54 :     initialize();
     141              :   }
     142              : }
     143              : 
     144            1 : void CharTensor::deallocate() {
     145              :   data = nullptr;
     146            1 :   offset = 0;
     147            1 : }
     148              : 
     149         6597 : void *CharTensor::getData() const {
     150         6597 :   if (!data)
     151              :     return nullptr;
     152              : 
     153              :   data->validate();
     154         6597 :   return data->getAddr<int8_t>() + offset;
     155              : }
     156              : 
     157            1 : void *CharTensor::getData(size_t idx) const {
     158            1 :   if (!data)
     159              :     return nullptr;
     160              : 
     161              :   data->validate();
     162            1 :   return data->getAddr<int8_t>() + offset + idx;
     163              : }
     164              : 
     165           40 : void *CharTensor::getScale() const {
     166           40 :   if (!data)
     167              :     return nullptr;
     168              : 
     169              :   data->validate();
     170           40 :   return ((int8_t *)getData()) + size();
     171              : }
     172              : 
     173            0 : void *CharTensor::getScale(size_t idx) const {
     174            0 :   NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
     175              :     << "Tensor::getScale() index is not valid";
     176              : 
     177            0 :   if (!data)
     178              :     return nullptr;
     179              : 
     180              :   data->validate();
     181            0 :   return ((float *)getScale()) + idx;
     182              : }
     183              : 
     184            0 : void *CharTensor::getAddress(unsigned int i) {
     185            0 :   size_t index = getIndex(batch(), channel(), height(), width());
     186            0 :   if (i > index) {
     187              :     return nullptr;
     188              :   }
     189            0 :   return &((int8_t *)getData())[i];
     190              : }
     191              : 
     192            0 : const void *CharTensor::getAddress(unsigned int i) const {
     193            0 :   size_t index = getIndex(batch(), channel(), height(), width());
     194            0 :   if (i > index) {
     195              :     return nullptr;
     196              :   }
     197            0 :   return &((int8_t *)getData())[i];
     198              : }
     199              : 
     200           44 : const int8_t &CharTensor::getValue(unsigned int i) const {
     201           44 :   return ((int8_t *)getData())[i];
     202              : }
     203              : 
     204           29 : int8_t &CharTensor::getValue(unsigned int i) {
     205           29 :   return ((int8_t *)getData())[i];
     206              : }
     207              : 
     208           44 : const int8_t &CharTensor::getValue(unsigned int b, unsigned int c,
     209              :                                    unsigned int h, unsigned int w) const {
     210           44 :   return getValue(getIndex(b, c, h, w));
     211              : }
     212              : 
     213            5 : int8_t &CharTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
     214              :                              unsigned int w) {
     215            5 :   return getValue(getIndex(b, c, h, w));
     216              : }
     217              : 
     218           13 : void CharTensor::setValue(float value) {
     219           13 :   int8_t *data = (int8_t *)getData();
     220           13 :   std::fill(data, data + size(), static_cast<int8_t>(value));
     221           13 : }
     222              : 
     223            6 : void CharTensor::addValue(unsigned int b, unsigned int c, unsigned int h,
     224              :                           unsigned int w, float value, float beta) {
     225            6 :   auto const &idx = getIndex(b, c, h, w);
     226            6 :   float output = ((int8_t *)getData())[idx];
     227            6 :   output *= beta;
     228            6 :   output += value;
     229              : 
     230            6 :   ((int8_t *)getData())[idx] = static_cast<int8_t>(std::trunc(output));
     231            6 : }
     232              : 
     233         6124 : void CharTensor::setValue(unsigned int b, unsigned int c, unsigned int h,
     234              :                           unsigned int w, float value) {
     235         6124 :   ((int8_t *)getData())[getIndex(b, c, h, w)] = static_cast<int8_t>(value);
     236         6124 : }
     237              : 
     238            6 : void CharTensor::setZero() {
     239              :   /// @todo replace with apply_i or scal
     240            6 :   setValue(0);
     241            6 : }
     242              : 
     243           63 : void CharTensor::initialize() {
     244           63 :   if (empty() || !isAllocated())
     245              :     return;
     246              : 
     247              :   /// @note Sampling from the normal/uniform distribution is invalid
     248           63 :   switch (initializer) {
     249            5 :   case Initializer::ZEROS:
     250            5 :     setZero();
     251            5 :     break;
     252            4 :   case Initializer::ONES:
     253            4 :     setValue(1.0f);
     254            4 :     break;
     255              :   case Initializer::NONE:
     256              :     break;
     257            1 :   default:
     258            1 :     throw std::invalid_argument("Initializer not valid for " +
     259            3 :                                 getStringDataType());
     260              :     break;
     261              :   }
     262              : 
     263           62 :   putData();
     264              : }
     265              : 
     266            2 : void CharTensor::initialize(Initializer init) {
     267            2 :   initializer = init;
     268            2 :   initialize();
     269            1 : }
     270              : 
     271            0 : int CharTensor::multiply_i(float const &value) {
     272              :   // multiply value to scale factors
     273            0 :   float *g_scale = (float *)getScale();
     274              : 
     275            0 :   sscal(scale_size(), value, g_scale, 1);
     276            0 :   return ML_ERROR_NONE;
     277              : }
     278              : 
     279            1 : Tensor &CharTensor::multiply(Tensor const &input, Tensor &output,
     280              :                              const float scale) const {
     281            1 :   CREATE_IF_EMPTY_DIMS(output, dim, nullptr, q_scheme());
     282              : 
     283            1 :   NNTR_THROW_IF(q_scheme() != input.q_scheme(), std::invalid_argument)
     284              :     << "[Tensor] Cannot multiply tensors with different quantization schemes.";
     285              : 
     286              :   /// @note remove after vector scale multiply is implemented
     287            1 :   NNTR_THROW_IF(q_scheme() != QScheme::PER_TENSOR_AFFINE, std::invalid_argument)
     288              :     << "Multiplication other than per tensor affine quantization scheme is "
     289              :        "NYI.";
     290              : 
     291            1 :   float lhs_scale = *(float *)getScale();
     292            1 :   float rhs_scale = *input.getScale<float>();
     293              : 
     294              :   /// @note current impl assumes pre-established quantization parameters are set
     295              :   /// @todo 1. verify result_scale is valid 2. calculate qparams if not given
     296            0 :   NNTR_THROW_IF(std::fpclassify(lhs_scale) == FP_ZERO ||
     297              :                   std::fpclassify(rhs_scale) == FP_ZERO ||
     298              :                   std::fpclassify(scale) == FP_ZERO,
     299              :                 std::invalid_argument)
     300              :     << "scale factors not set, cannot multiply";
     301              : 
     302            1 :   float multiplier = lhs_scale * rhs_scale / scale;
     303              : 
     304            1 :   int8_t *lhs = (int8_t *)getData();
     305              :   int8_t *rhs = input.getData<int8_t>();
     306              :   int8_t *result = output.getData<int8_t>();
     307              : 
     308           17 :   for (unsigned int i = 0; i < size(); ++i) {
     309           16 :     int32_t accum_val =
     310           16 :       static_cast<int32_t>(lhs[i]) * static_cast<int32_t>(rhs[i]);
     311              : 
     312           16 :     result[i] =
     313           32 :       std::max(-128, std::min((int)std::lround(multiplier * accum_val), 127));
     314              :   }
     315              : 
     316            1 :   *output.getScale<float>() = scale;
     317              : 
     318            1 :   return output;
     319              : }
     320              : 
     321            1 : Tensor &CharTensor::add(Tensor const &input, Tensor &output,
     322              :                         float const scale) const {
     323            1 :   CREATE_IF_EMPTY_DIMS(output, dim, nullptr, qscheme);
     324              : 
     325            1 :   NNTR_THROW_IF(q_scheme() != input.q_scheme(), std::invalid_argument)
     326              :     << "[Tensor] Cannot multiply tensors with different quantization schemes.";
     327              : 
     328              :   /// @note remove after vector scale multiply is implemented
     329            1 :   NNTR_THROW_IF(q_scheme() != QScheme::PER_TENSOR_AFFINE, std::invalid_argument)
     330              :     << "Tensor addition other than per tensor affine quantization scheme is "
     331              :        "NYI.";
     332              : 
     333            1 :   float lhs_scale = *(float *)getScale();
     334            1 :   float rhs_scale = *input.getScale<float>();
     335              : 
     336              :   /// @note current impl assumes pre-established quantization parameters are set
     337              :   /// @todo 1. verify result_scale is valid 2. calculate qparams if not given
     338              :   ///       3. check qscheme is per tensor affine
     339            0 :   NNTR_THROW_IF(std::fpclassify(lhs_scale) == FP_ZERO ||
     340              :                   std::fpclassify(rhs_scale) == FP_ZERO ||
     341              :                   std::fpclassify(scale) == FP_ZERO,
     342              :                 std::invalid_argument)
     343              :     << "scale factors not set, cannot multiply";
     344              : 
     345              :   /// @todo check whether the following method has faster execution speed.
     346              :   /// 1. clone input A and B to A_fp32 and B_fp32
     347              :   /// 2. dequantize A_fp32 and B_fp32
     348              :   /// 3. perform addition: A_fp32.add(B_fp32, output_fp32)
     349              :   /// 4. quantize output_fp32
     350            2 :   for (unsigned int b = 0; b < batch(); ++b) {
     351            2 :     for (unsigned int c = 0; c < channel(); ++c) {
     352            5 :       for (unsigned int h = 0; h < height(); ++h) {
     353           20 :         for (unsigned int w = 0; w < width(); ++w) {
     354           16 :           float val = getValue(b, c, h, w) * lhs_scale +
     355           16 :                       input.getValue<int8_t>(b, c, h, w) * rhs_scale;
     356              : 
     357           16 :           output.setValue(
     358              :             b, c, h, w,
     359              :             static_cast<int8_t>(
     360           32 :               std::max(-128, std::min((int)std::lround(val / scale), 127))));
     361              :         }
     362              :       }
     363              :     }
     364              :   }
     365            1 :   *output.getScale<float>() = scale;
     366              : 
     367            1 :   return output;
     368              : }
     369              : 
     370            1 : void CharTensor::copy(const Tensor &from) {
     371            1 :   reshape(from.getDim());
     372            1 :   copy(from.getData());
     373            1 : }
     374              : 
     375            2 : void CharTensor::copyData(const Tensor &from) {
     376            2 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     377              :     << getName() << " is not contiguous, cannot copy.";
     378              : 
     379            2 :   NNTR_THROW_IF(size() != from.size(), std::invalid_argument)
     380              :     << "Size of tensor to copy must match";
     381              : 
     382              :   /// @todo support copy from float32 & float16 to int8 data
     383              :   /// @note this could require scale factor
     384            2 :   switch (from.getDataType()) {
     385              :   case ml::train::TensorDim::DataType::QINT8:
     386            0 :     copy(from.getData());
     387            0 :     break;
     388            2 :   case ml::train::TensorDim::DataType::FP32:
     389            4 :     copy_fp32(from.size(), from.getData<float>(), (int8_t *)getData());
     390            2 :     break;
     391            0 :   default:
     392            0 :     throw std::invalid_argument("Error: Unsupported data type");
     393              :     break;
     394              :   }
     395            2 : }
     396              : 
     397            1 : void CharTensor::copy_with_stride(const Tensor &input, Tensor &output) {
     398            4 :   for (unsigned int b = 0; b < output.batch(); ++b) {
     399            6 :     for (unsigned int c = 0; c < output.channel(); ++c) {
     400           12 :       for (unsigned int h = 0; h < output.height(); ++h) {
     401           54 :         for (unsigned int w = 0; w < output.width(); ++w) {
     402           45 :           output.setValue(b, c, h, w, input.getValue<int8_t>(b, c, h, w));
     403              :         }
     404              :       }
     405              :     }
     406              :   }
     407            1 : }
     408              : 
     409            0 : void CharTensor::save(std::ostream &file) {
     410              :   /// @note Save quantization information
     411            0 :   save_quantization_info(file);
     412              : 
     413            0 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     414              : 
     415            0 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     416            0 :     << "save size: " << getMemoryBytes()
     417              :     << " is too big. It cannot be represented by std::streamsize";
     418              : 
     419            0 :   checkedWrite(file, (char *)getData(), sz,
     420              :                "[CharTensor::save] operation failed");
     421            0 :   putData();
     422            0 : }
     423              : 
     424            0 : void CharTensor::read(std::ifstream &file, size_t start_offset,
     425              :                       bool read_from_offset) {
     426            0 :   if (start_offset == std::numeric_limits<size_t>::max()) {
     427            0 :     start_offset = file_offset;
     428              :   }
     429            0 :   read_quantization_info(file, start_offset, read_from_offset);
     430              : 
     431            0 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     432              : 
     433            0 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     434            0 :     << "read size: " << getMemoryBytes()
     435              :     << " is too big. It cannot be represented by std::streamsize";
     436              : 
     437            0 :   if (read_from_offset) {
     438            0 :     start_offset += sizeof(uint16_t);
     439              :   }
     440              : 
     441            0 :   checkedRead(file, (char *)getData(), sz,
     442              :               "[CharTensor::read] operation failed", start_offset,
     443              :               read_from_offset);
     444            0 :   putData();
     445            0 : }
     446              : 
     447            2 : std::vector<unsigned int> CharTensor::argmax() const {
     448              :   std::vector<unsigned int> result;
     449            2 :   const int8_t *data = (int8_t *)getData();
     450              :   size_t batch_size = batch();
     451            2 :   size_t feature_len = dim.getFeatureLen();
     452              : 
     453            2 :   result.resize(batch_size);
     454              : 
     455            7 :   for (unsigned int b = 0; b < batch_size; b++) {
     456              :     auto max_iter =
     457            5 :       std::max_element(data + b * feature_len, data + (b + 1) * feature_len);
     458            5 :     result[b] = std::distance(data, max_iter) - (b * feature_len);
     459              :   }
     460            2 :   return result;
     461            0 : }
     462              : 
     463            1 : std::vector<unsigned int> CharTensor::argmin() const {
     464              :   std::vector<unsigned int> result;
     465            1 :   const int8_t *data = (int8_t *)getData();
     466              :   size_t batch_size = batch();
     467            1 :   size_t feature_len = dim.getFeatureLen();
     468              : 
     469            1 :   result.resize(batch_size);
     470              : 
     471            3 :   for (unsigned int b = 0; b < batch_size; b++) {
     472              :     auto min_iter =
     473            2 :       std::min_element(data + b * feature_len, data + (b + 1) * feature_len);
     474            2 :     result[b] = std::distance(data, min_iter) - (b * feature_len);
     475              :   }
     476            1 :   return result;
     477            0 : }
     478              : 
     479            3 : float CharTensor::max_abs() const {
     480            3 :   const int8_t *data = (int8_t *)getData();
     481              :   unsigned int idx;
     482              : 
     483            3 :   int8_t max_val = data[0];
     484         5278 :   for (unsigned int i = 1; i < size(); i += 1) {
     485         5275 :     int8_t cur_val = (data[i] >= 0) ? data[i] : -1 * data[i];
     486         5275 :     if (cur_val > max_val) {
     487              :       max_val = cur_val;
     488              :     }
     489              :   }
     490              : 
     491            3 :   return max_val;
     492              : }
     493              : 
     494            1 : float CharTensor::maxValue() const {
     495            1 :   const int8_t *data = (int8_t *)getData();
     496            1 :   return *std::max_element(data, data + size());
     497              : }
     498              : 
     499            3 : float CharTensor::minValue() const {
     500            3 :   const int8_t *data = (int8_t *)getData();
     501            3 :   return *std::min_element(data, data + size());
     502              : }
     503              : 
     504            3 : void CharTensor::print(std::ostream &out) const {
     505            3 :   const int8_t *data = (int8_t *)getData();
     506            3 :   unsigned int len = size();
     507            3 :   out << "data addr: " << reinterpret_cast<const float *>(data) << '\n';
     508            3 :   out << dim;
     509              : 
     510            3 :   if (len > 100) {
     511            1 :     out << '[' << (int)data[0] << ' ' << (int)data[1] << ' ' << (int)data[2]
     512            2 :         << " ... " << (int)data[len - 3] << ' ' << (int)data[len - 2] << ' '
     513            1 :         << (int)data[len - 1] << ']' << std::endl;
     514            1 :     return;
     515              :   }
     516              : 
     517            2 :   std::ios init(NULL);
     518            2 :   init.copyfmt(out);
     519            2 :   if (getFormat() == Tformat::NCHW) {
     520            7 :     for (unsigned int k = 0; k < batch(); k++) {
     521           10 :       for (unsigned int l = 0; l < channel(); l++) {
     522           19 :         for (unsigned int i = 0; i < height(); i++) {
     523           42 :           for (unsigned int j = 0; j < width(); j++) {
     524           28 :             out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
     525              :           }
     526              :           out << std::endl;
     527              :         }
     528              :         out << std::endl;
     529              :       }
     530              :       out << "-------" << std::endl;
     531              :     }
     532              :   } else {
     533            0 :     for (unsigned int k = 0; k < batch(); k++) {
     534            0 :       for (unsigned int i = 0; i < height(); i++) {
     535            0 :         for (unsigned int j = 0; j < width(); j++) {
     536            0 :           for (unsigned int l = 0; l < channel(); l++) {
     537            0 :             out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
     538              :           }
     539              :           out << std::endl;
     540              :         }
     541              :         out << std::endl;
     542              :       }
     543              :       out << "-------" << std::endl;
     544              :     }
     545            0 :     out.copyfmt(init);
     546              :   }
     547              : 
     548              :   /// print quantization information
     549            2 :   const float *q_scales = (float *)getScale();
     550              : 
     551            2 :   if (scale_size() > 50) {
     552            0 :     out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
     553            0 :         << q_scales[2] << " ... " << q_scales[len - 3] << ' '
     554            0 :         << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
     555              :     return;
     556              :   }
     557              : 
     558            2 :   out << "Scale factors: ";
     559            4 :   for (unsigned i = 0; i < scale_size(); ++i) {
     560            2 :     out << q_scales[i] << " ";
     561              :   }
     562              :   out << std::endl;
     563              : }
     564              : 
     565            9 : size_t CharTensor::getMemoryBytes() const {
     566            9 :   return bytes() + scale_size() * sizeof(float);
     567              : }
     568              : 
     569          101 : size_t CharTensor::scale_size() const {
     570          101 :   switch (qscheme) {
     571              :   case QScheme::PER_TENSOR_AFFINE:
     572              :     return 1;
     573              :     break;
     574           15 :   case QScheme::PER_CHANNEL_AFFINE:
     575           15 :     return width();
     576              :     break;
     577              :   default:
     578              :     break;
     579              :   }
     580            0 :   return 0;
     581              : }
     582              : 
     583            6 : QScheme CharTensor::q_scheme() const { return qscheme; }
     584              : 
     585            9 : void CharTensor::copy(const void *buf) {
     586            9 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     587              :     << getName() << " is not contiguous, cannot copy.";
     588              : 
     589            9 :   if (buf == getData()) {
     590              :     return;
     591              :   }
     592              : 
     593            9 :   scopy(size(), (int8_t *)buf, 1, (int8_t *)getData(), 1);
     594              : 
     595            9 :   float *scales = (float *)(((int8_t *)buf) + size());
     596            9 :   scopy(scale_size(), scales, 1, (float *)getScale(), 1);
     597              : }
     598              : 
     599            0 : void CharTensor::save_quantization_info(std::ostream &file) {
     600            0 :   checkedWrite(file, (char *)&qscheme, sizeof(uint16_t),
     601              :                "[CharTensor::save] failed to write quantization information");
     602            0 : }
     603              : 
     604            0 : void CharTensor::read_quantization_info(std::ifstream &file,
     605              :                                         size_t start_offset,
     606              :                                         bool read_from_offset) {
     607            0 :   checkedRead(file, (char *)&qscheme, sizeof(uint16_t),
     608              :               "[CharTensor::read] failed to read quantization information",
     609              :               start_offset, read_from_offset);
     610            0 : }
     611              : 
     612              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1