LCOV - coverage_filtered.info - nntrainer/tensor/short

LCOV - code coverage report

Current view:	top level - nntrainer/tensor - short_tensor.cpp (source / functions)		Coverage	Total	Hit
Test:	coverage_filtered.info	Lines:	72.6 %	263	191
Test Date:	2025-12-14 20:38:17	Functions:	80.0 %	40	32

            Line data    Source code

       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * @file        short_tensor.cpp
       4              :  * @date        10 January 2025
       5              :  * @brief       This is ShortTensor class for 16-bit signed integer calculation
       6              :  * @see         https://github.com/nnstreamer/nntrainer
       7              :  * @author      Donghyeon Jeong <dhyeon.jeong@samsung.com>
       8              :  * @bug         No known bugs except for NYI items
       9              :  */
      10              : 
      11              : #include <iomanip>
      12              : #include <iostream>
      13              : 
      14              : #include <cpu_backend.h>
      15              : #include <short_tensor.h>
      16              : #include <tensor.h>
      17              : 
      18              : namespace nntrainer {
      19              : 
      20            1 : ShortTensor::ShortTensor(std::string name_, Tformat fm, QScheme qscheme_) :
      21            2 :   TensorBase(name_, fm, Tdatatype::QINT16), qscheme(qscheme_) {}
      22              : 
      23           21 : ShortTensor::ShortTensor(const TensorDim &d, bool alloc_now, Initializer init,
      24           21 :                          std::string name, QScheme qscheme_) :
      25           21 :   TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
      26           21 :   if (alloc_now)
      27           21 :     allocate();
      28           21 : }
      29              : 
      30            6 : ShortTensor::ShortTensor(const TensorDim &d, const void *buf,
      31            6 :                          QScheme qscheme_) :
      32            6 :   ShortTensor(d, true, Initializer::NONE, "", qscheme_) {
      33            6 :   if (d.getDataLen() != 0) {
      34            6 :     if (buf != nullptr)
      35            1 :       copy(buf);
      36              :   }
      37            6 : }
      38              : 
      39            1 : ShortTensor::ShortTensor(
      40              :   std::vector<std::vector<std::vector<std::vector<int16_t>>>> const &d,
      41            1 :   std::vector<float> const &scales, Tformat fm, QScheme qscheme_) :
      42            2 :   qscheme(qscheme_) {
      43            1 :   if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
      44              :     throw std::out_of_range(
      45            0 :       "[Tensor] trying to initialize ShortTensor from empty vector");
      46              :   }
      47              : 
      48            1 :   dim.setTensorDim(0, d.size());
      49            1 :   if (fm == Tformat::NCHW) {
      50            1 :     dim.setTensorDim(1, d[0].size());
      51            1 :     dim.setTensorDim(2, d[0][0].size());
      52            1 :     dim.setTensorDim(3, d[0][0][0].size());
      53              :   } else {
      54            0 :     dim.setTensorDim(2, d[0].size());
      55            0 :     dim.setTensorDim(3, d[0][0].size());
      56            0 :     dim.setTensorDim(1, d[0][0][0].size());
      57              :   }
      58              : 
      59              :   dim.setTensorType({fm, Tdatatype::QINT16});
      60              : 
      61            1 :   strides = dim.computeStrides();
      62            1 :   contiguous = true;
      63            1 :   initializer = Initializer::NONE;
      64              : 
      65              :   MemoryData *mem_data = new MemoryData(
      66            1 :     (void *)(new int16_t[dim.getDataLen() +
      67           98 :                          sizeof(float) / sizeof(int16_t) * scale_size()]()));
      68            1 :   data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
      69            1 :     delete[] mem_data->getAddr<int16_t>();
      70            1 :     delete mem_data;
      71              :   });
      72              : 
      73            1 :   offset = 0;
      74              : 
      75              :   // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
      76              :   // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
      77              :   // dim[1] == height, dim[2] == width, dim[3] == channel
      78            1 :   if (fm == Tformat::NCHW) {
      79            2 :     for (unsigned int i = 0; i < batch(); ++i)
      80            4 :       for (unsigned int j = 0; j < channel(); ++j)
      81           12 :         for (unsigned int k = 0; k < height(); ++k)
      82           99 :           for (unsigned int l = 0; l < width(); ++l)
      83           90 :             this->setValue(i, j, k, l, d[i][j][k][l]);
      84              :   } else {
      85            0 :     for (unsigned int i = 0; i < batch(); ++i)
      86            0 :       for (unsigned int j = 0; j < height(); ++j)
      87            0 :         for (unsigned int k = 0; k < width(); ++k)
      88            0 :           for (unsigned int l = 0; l < channel(); ++l)
      89            0 :             this->setValue(i, l, j, k, d[i][j][k][l]);
      90              :   }
      91              : 
      92              :   // copy scale factors
      93            1 :   scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
      94            1 : }
      95              : 
      96            4 : bool ShortTensor::operator==(const ShortTensor &rhs) const {
      97            4 :   if (qscheme != rhs.qscheme)
      98              :     return false;
      99              : 
     100              :   // compare quantized data
     101            4 :   const int16_t *_data = (int16_t *)getData();
     102            4 :   const int16_t *_rdata = (int16_t *)rhs.getData();
     103          474 :   for (size_t i = 0; i < size(); ++i) {
     104          470 :     if (_data[i] != _rdata[i])
     105              :       return false;
     106              :   }
     107              : 
     108              :   // compare scale factors
     109            4 :   const float *_scales = (float *)getScale();
     110            4 :   const float *_rscales = (float *)rhs.getScale();
     111            8 :   for (size_t i = 0; i < scale_size(); ++i) {
     112            4 :     if (std::fabs(_scales[i] - _rscales[i]) > 1e-5)
     113              :       return false;
     114              :   }
     115              : 
     116              :   return true;
     117              : }
     118              : 
     119           21 : void ShortTensor::allocate() {
     120           21 :   if (empty() || data)
     121              :     return;
     122              : 
     123           21 :   if (src_tensor) {
     124              :     /// allocate data based on the source tensor
     125            0 :     allocateSrcTensor();
     126              :     /** as this memory is shared, do NOT initialize */
     127              :   } else {
     128              :     /// allocate new memory for the tensor data
     129              :     MemoryData *mem_data;
     130              : 
     131              :     mem_data = new MemoryData(
     132           21 :       (void *)(new int16_t[dim.getDataLen() +
     133         1111 :                            sizeof(float) / sizeof(int16_t) * scale_size()]{}));
     134           21 :     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
     135           21 :       delete[] mem_data->template getAddr<int16_t>();
     136           21 :       delete mem_data;
     137              :     });
     138              : 
     139           21 :     offset = 0;
     140           21 :     initialize();
     141              :   }
     142              : }
     143              : 
     144            0 : void ShortTensor::deallocate() {
     145              :   data = nullptr;
     146            0 :   offset = 0;
     147            0 : }
     148              : 
     149          770 : void *ShortTensor::getData() const {
     150          770 :   if (!data)
     151              :     return nullptr;
     152              : 
     153              :   data->validate();
     154          770 :   return data->getAddr<int16_t>() + offset;
     155              : }
     156              : 
     157            1 : void *ShortTensor::getData(size_t idx) const {
     158            1 :   if (!data)
     159              :     return nullptr;
     160              : 
     161              :   data->validate();
     162            1 :   return data->getAddr<int16_t>() + offset + idx;
     163              : }
     164              : 
     165           15 : void *ShortTensor::getScale() const {
     166           15 :   if (!data)
     167              :     return nullptr;
     168              : 
     169              :   data->validate();
     170           15 :   return ((int16_t *)getData()) + size();
     171              : }
     172              : 
     173            0 : void *ShortTensor::getScale(size_t idx) const {
     174            0 :   NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
     175              :     << "Tensor::getScale() index is not valid";
     176              : 
     177            0 :   if (!data)
     178              :     return nullptr;
     179              : 
     180              :   data->validate();
     181            0 :   return ((float *)getScale()) + idx;
     182              : }
     183              : 
     184            0 : void *ShortTensor::getAddress(unsigned int i) {
     185            0 :   size_t index = getIndex(batch(), channel(), height(), width());
     186            0 :   if (i > index) {
     187              :     return nullptr;
     188              :   }
     189            0 :   return &((int16_t *)getData())[i];
     190              : }
     191              : 
     192            0 : const void *ShortTensor::getAddress(unsigned int i) const {
     193            0 :   size_t index = getIndex(batch(), channel(), height(), width());
     194            0 :   if (i > index) {
     195              :     return nullptr;
     196              :   }
     197            0 :   return &((int16_t *)getData())[i];
     198              : }
     199              : 
     200            4 : const int16_t &ShortTensor::getValue(unsigned int i) const {
     201            4 :   return ((int16_t *)getData())[i];
     202              : }
     203              : 
     204           29 : int16_t &ShortTensor::getValue(unsigned int i) {
     205           29 :   return ((int16_t *)getData())[i];
     206              : }
     207              : 
     208            4 : const int16_t &ShortTensor::getValue(unsigned int b, unsigned int c,
     209              :                                      unsigned int h, unsigned int w) const {
     210            4 :   return getValue(getIndex(b, c, h, w));
     211              : }
     212              : 
     213            5 : int16_t &ShortTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
     214              :                                unsigned int w) {
     215            5 :   return getValue(getIndex(b, c, h, w));
     216              : }
     217              : 
     218            7 : void ShortTensor::setValue(float value) {
     219            7 :   int16_t *data = (int16_t *)getData();
     220            7 :   std::fill(data, data + size(), static_cast<int16_t>(value));
     221            7 : }
     222              : 
     223            1 : void ShortTensor::addValue(unsigned int b, unsigned int c, unsigned int h,
     224              :                            unsigned int w, float value, float beta) {
     225            1 :   auto const &idx = getIndex(b, c, h, w);
     226            1 :   float output = ((int16_t *)getData())[idx];
     227            1 :   output *= beta;
     228            1 :   output += value;
     229              : 
     230            1 :   ((int16_t *)getData())[idx] = static_cast<int16_t>(std::trunc(output));
     231            1 : }
     232              : 
     233          601 : void ShortTensor::setValue(unsigned int b, unsigned int c, unsigned int h,
     234              :                            unsigned int w, float value) {
     235          601 :   ((int16_t *)getData())[getIndex(b, c, h, w)] = static_cast<int16_t>(value);
     236          601 : }
     237              : 
     238            5 : void ShortTensor::setZero() {
     239              :   /// @todo replace with apply_i or scal
     240            5 :   setValue(0);
     241            5 : }
     242              : 
     243           24 : void ShortTensor::initialize() {
     244           24 :   if (empty() || !isAllocated())
     245              :     return;
     246              : 
     247              :   /// @note Sampling from the normal/uniform distribution is invalid
     248           24 :   switch (initializer) {
     249            4 :   case Initializer::ZEROS:
     250            4 :     setZero();
     251            4 :     break;
     252            2 :   case Initializer::ONES:
     253            2 :     setValue(1.0f);
     254            2 :     break;
     255              :   case Initializer::NONE:
     256              :     break;
     257            0 :   default:
     258            0 :     throw std::invalid_argument("Initializer not valid for " +
     259            0 :                                 getStringDataType());
     260              :     break;
     261              :   }
     262              : 
     263           24 :   putData();
     264              : }
     265              : 
     266            0 : void ShortTensor::initialize(Initializer init) {
     267            0 :   initializer = init;
     268            0 :   initialize();
     269            0 : }
     270              : 
     271            1 : void ShortTensor::copy(const Tensor &from) {
     272            1 :   reshape(from.getDim());
     273            1 :   copy(from.getData());
     274            1 : }
     275              : 
     276            0 : void ShortTensor::copyData(const Tensor &from) {
     277            0 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     278              :     << getName() << " is not contiguous, cannot copy.";
     279              : 
     280            0 :   NNTR_THROW_IF(size() != from.size(), std::invalid_argument)
     281              :     << "Size of tensor to copy must match";
     282              : 
     283              :   /// @todo support copy from other data types
     284            0 :   switch (from.getDataType()) {
     285              :   case ml::train::TensorDim::DataType::QINT16:
     286            0 :     copy(from.getData());
     287            0 :     break;
     288            0 :   case ml::train::TensorDim::DataType::FP32:
     289            0 :     copy_fp32(from.size(), from.getData<float>(), (int16_t *)getData());
     290            0 :     break;
     291            0 :   default:
     292            0 :     throw std::invalid_argument("Error: Unsupported data type");
     293              :     break;
     294              :   }
     295            0 : }
     296              : 
     297            0 : void ShortTensor::copy_with_stride(const Tensor &input, Tensor &output) {
     298            0 :   for (unsigned int b = 0; b < output.batch(); ++b) {
     299            0 :     for (unsigned int c = 0; c < output.channel(); ++c) {
     300            0 :       for (unsigned int h = 0; h < output.height(); ++h) {
     301            0 :         for (unsigned int w = 0; w < output.width(); ++w) {
     302            0 :           output.setValue(b, c, h, w, input.getValue<int16_t>(b, c, h, w));
     303              :         }
     304              :       }
     305              :     }
     306              :   }
     307            0 : }
     308              : 
     309            1 : void ShortTensor::save(std::ostream &file) {
     310              :   /// @note Save quantization information
     311            1 :   save_quantization_info(file);
     312              : 
     313            1 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     314              : 
     315            1 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     316            0 :     << "save size: " << getMemoryBytes()
     317              :     << " is too big. It cannot be represented by std::streamsize";
     318              : 
     319            1 :   checkedWrite(file, (char *)getData(), sz,
     320              :                "[ShortTensor::save] operation failed");
     321            1 :   putData();
     322            1 : }
     323              : 
     324            1 : void ShortTensor::read(std::ifstream &file, size_t start_offset,
     325              :                        bool read_from_offset) {
     326            1 :   if (start_offset == std::numeric_limits<size_t>::max()) {
     327            0 :     start_offset = file_offset;
     328              :   }
     329            1 :   read_quantization_info(file, start_offset, read_from_offset);
     330              : 
     331            1 :   std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
     332              : 
     333            1 :   NNTR_THROW_IF(sz < 0, std::invalid_argument)
     334            0 :     << "read size: " << getMemoryBytes()
     335              :     << " is too big. It cannot be represented by std::streamsize";
     336              : 
     337            1 :   if (read_from_offset) {
     338            0 :     start_offset += sizeof(uint16_t);
     339              :   }
     340              : 
     341            1 :   checkedRead(file, (char *)getData(), sz,
     342              :               "[ShortTensor::read] operation failed", start_offset,
     343              :               read_from_offset);
     344            1 :   putData();
     345            1 : }
     346              : 
     347            1 : std::vector<unsigned int> ShortTensor::argmax() const {
     348              :   std::vector<unsigned int> result;
     349            1 :   const int16_t *data = (int16_t *)getData();
     350              :   size_t batch_size = batch();
     351            1 :   size_t feature_len = dim.getFeatureLen();
     352              : 
     353            1 :   result.resize(batch_size);
     354              : 
     355            3 :   for (unsigned int b = 0; b < batch_size; b++) {
     356              :     auto max_iter =
     357            2 :       std::max_element(data + b * feature_len, data + (b + 1) * feature_len);
     358            2 :     result[b] = std::distance(data, max_iter) - (b * feature_len);
     359              :   }
     360            1 :   return result;
     361            0 : }
     362              : 
     363            1 : std::vector<unsigned int> ShortTensor::argmin() const {
     364              :   std::vector<unsigned int> result;
     365            1 :   const int16_t *data = (int16_t *)getData();
     366              :   size_t batch_size = batch();
     367            1 :   size_t feature_len = dim.getFeatureLen();
     368              : 
     369            1 :   result.resize(batch_size);
     370              : 
     371            3 :   for (unsigned int b = 0; b < batch_size; b++) {
     372              :     auto min_iter =
     373            2 :       std::min_element(data + b * feature_len, data + (b + 1) * feature_len);
     374            2 :     result[b] = std::distance(data, min_iter) - (b * feature_len);
     375              :   }
     376            1 :   return result;
     377            0 : }
     378              : 
     379            1 : float ShortTensor::max_abs() const {
     380            1 :   const int16_t *data = (int16_t *)getData();
     381              :   unsigned int idx;
     382              : 
     383            1 :   int16_t max_val = data[0];
     384            4 :   for (unsigned int i = 1; i < size(); i += 1) {
     385            3 :     int16_t cur_val = (data[i] >= 0) ? data[i] : -1 * data[i];
     386            3 :     if (cur_val > max_val) {
     387              :       max_val = cur_val;
     388              :     }
     389              :   }
     390              : 
     391            1 :   return max_val;
     392              : }
     393              : 
     394            1 : float ShortTensor::maxValue() const {
     395            1 :   const int16_t *data = (int16_t *)getData();
     396            1 :   return *std::max_element(data, data + size());
     397              : }
     398              : 
     399            1 : float ShortTensor::minValue() const {
     400            1 :   const int16_t *data = (int16_t *)getData();
     401            1 :   return *std::min_element(data, data + size());
     402              : }
     403              : 
     404            1 : void ShortTensor::print(std::ostream &out) const {
     405            1 :   const int16_t *data = (int16_t *)getData();
     406            1 :   unsigned int len = size();
     407            1 :   out << "data addr: " << reinterpret_cast<const float *>(data) << '\n';
     408            1 :   out << dim;
     409              : 
     410            1 :   if (len > 512) {
     411            0 :     out << '[' << (int)data[0] << ' ' << (int)data[1] << ' ' << (int)data[2]
     412            0 :         << " ... " << (int)data[len - 3] << ' ' << (int)data[len - 2] << ' '
     413            0 :         << (int)data[len - 1] << ']' << std::endl;
     414            0 :     return;
     415              :   }
     416              : 
     417            1 :   std::ios init(NULL);
     418            1 :   init.copyfmt(out);
     419            1 :   if (getFormat() == Tformat::NCHW) {
     420            2 :     for (unsigned int k = 0; k < batch(); k++) {
     421            2 :       for (unsigned int l = 0; l < channel(); l++) {
     422            3 :         for (unsigned int i = 0; i < height(); i++) {
     423            6 :           for (unsigned int j = 0; j < width(); j++) {
     424            4 :             out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
     425              :           }
     426              :           out << std::endl;
     427              :         }
     428              :         out << std::endl;
     429              :       }
     430              :       out << "-------" << std::endl;
     431              :     }
     432              :   } else {
     433            0 :     for (unsigned int k = 0; k < batch(); k++) {
     434            0 :       for (unsigned int i = 0; i < height(); i++) {
     435            0 :         for (unsigned int j = 0; j < width(); j++) {
     436            0 :           for (unsigned int l = 0; l < channel(); l++) {
     437            0 :             out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
     438              :           }
     439              :           out << std::endl;
     440              :         }
     441              :         out << std::endl;
     442              :       }
     443              :       out << "-------" << std::endl;
     444              :     }
     445            0 :     out.copyfmt(init);
     446              :   }
     447              : 
     448              :   /// print quantization information
     449            1 :   const float *q_scales = (float *)getScale();
     450              : 
     451            1 :   if (scale_size() > 50) {
     452            0 :     out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
     453            0 :         << q_scales[2] << " ... " << q_scales[len - 3] << ' '
     454            0 :         << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
     455              :     return;
     456              :   }
     457              : 
     458            1 :   out << "Scale factors: ";
     459            2 :   for (unsigned i = 0; i < scale_size(); ++i) {
     460            1 :     out << q_scales[i] << " ";
     461              :   }
     462              :   out << std::endl;
     463              : }
     464              : 
     465            3 : size_t ShortTensor::getMemoryBytes() const {
     466            3 :   return bytes() + scale_size() * sizeof(float);
     467              : }
     468              : 
     469           41 : size_t ShortTensor::scale_size() const {
     470           41 :   switch (qscheme) {
     471              :   case QScheme::PER_TENSOR_AFFINE:
     472              :     return 1;
     473              :     break;
     474            2 :   case QScheme::PER_CHANNEL_AFFINE:
     475            2 :     return height();
     476              :     break;
     477              :   default:
     478              :     break;
     479              :   }
     480            0 :   return 0;
     481              : }
     482              : 
     483            0 : QScheme ShortTensor::q_scheme() const { return qscheme; }
     484              : 
     485            2 : void ShortTensor::copy(const void *buf) {
     486            2 :   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     487              :     << getName() << " is not contiguous, cannot copy.";
     488              : 
     489            2 :   if (buf == getData()) {
     490              :     return;
     491              :   }
     492              : 
     493            2 :   copy_s16(size(), (int16_t *)buf, (int16_t *)getData());
     494              : 
     495            2 :   float *scales = (float *)(((int16_t *)buf) + size());
     496            2 :   scopy(scale_size(), scales, 1, (float *)getScale(), 1);
     497              : }
     498              : 
     499            1 : void ShortTensor::save_quantization_info(std::ostream &file) {
     500            1 :   checkedWrite(file, (char *)&qscheme, sizeof(uint16_t),
     501              :                "[ShortTensor::save] failed to write quantization information");
     502            1 : }
     503            1 : void ShortTensor::read_quantization_info(std::ifstream &file,
     504              :                                          size_t start_offset,
     505              :                                          bool read_from_offset) {
     506            1 :   checkedRead(file, (char *)&qscheme, sizeof(uint16_t),
     507              :               "[ShortTensor::read] failed to read quantization information",
     508              :               start_offset, read_from_offset);
     509            1 : }
     510              : 
     511              : } // namespace nntrainer

Generated by: LCOV version 2.0-1