Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * @file uint4_tensor.cpp
4 : * @date 20 March 2025
5 : * @brief This is Uint4QTensor class for quantized 4-bit unsigned integer
6 : * calculation
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Donghyeon Jeong <dhyeon.jeong@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : */
11 :
12 : #include <iomanip>
13 : #include <iostream>
14 :
15 : #include <cpu_backend.h>
16 : #include <tensor.h>
17 : #include <uint4_tensor.h>
18 :
19 : namespace nntrainer {
20 :
21 0 : Uint4QTensor::Uint4QTensor(std::string name_, Tformat fm, QScheme qscheme_) :
22 0 : TensorBase(name_, fm, Tdatatype::UINT4), qscheme(qscheme_) {}
23 :
24 8 : Uint4QTensor::Uint4QTensor(const TensorDim &d, bool alloc_now, Initializer init,
25 8 : std::string name, QScheme qscheme_) :
26 8 : TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
27 8 : if (alloc_now)
28 0 : allocate();
29 8 : }
30 :
31 0 : Uint4QTensor::Uint4QTensor(const TensorDim &d, const void *buf,
32 0 : QScheme qscheme_) :
33 0 : Uint4QTensor(d, true, Initializer::NONE, "", qscheme_) {
34 0 : if (d.getDataLen() != 0) {
35 0 : if (buf != nullptr)
36 0 : copy(buf);
37 : }
38 0 : }
39 :
40 0 : Uint4QTensor::Uint4QTensor(
41 : std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
42 : std::vector<float> const &scales,
43 0 : std::vector<unsigned int> const &zero_points, Tformat fm, QScheme qscheme_) :
44 0 : qscheme(qscheme_) {
45 0 : if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
46 : throw std::out_of_range(
47 0 : "[Tensor] trying to initialize Uint4QTensor from empty vector");
48 : }
49 :
50 0 : NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
51 : << "invalid scale factor size " << scales.size();
52 :
53 0 : dim.setTensorDim(0, d.size());
54 0 : if (fm == Tformat::NCHW) {
55 0 : dim.setTensorDim(1, d[0].size());
56 0 : dim.setTensorDim(2, d[0][0].size());
57 0 : dim.setTensorDim(3, d[0][0][0].size());
58 : } else {
59 0 : dim.setTensorDim(2, d[0].size());
60 0 : dim.setTensorDim(3, d[0][0].size());
61 0 : dim.setTensorDim(1, d[0][0][0].size());
62 : }
63 :
64 : dim.setTensorType({fm, Tdatatype::UINT4});
65 :
66 0 : strides = dim.computeStrides();
67 0 : contiguous = true;
68 0 : initializer = Initializer::NONE;
69 0 : qscheme = qscheme_;
70 :
71 : /// @note sizeof(float) * scale_size() assumes scale factors are in
72 : /// full-precision fp.
73 : MemoryData *mem_data = new MemoryData((
74 : void
75 0 : *)(new uint8_t[(dim.getDataLen() + 1) / 2 + sizeof(float) * scale_size() +
76 0 : sizeof(unsigned int) * scale_size()]()));
77 0 : data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *ptr) {
78 0 : delete[] ptr->getAddr<uint8_t>();
79 0 : delete ptr;
80 : });
81 :
82 0 : offset = 0;
83 :
84 0 : if (fm == Tformat::NCHW) {
85 0 : for (unsigned int i = 0; i < batch(); ++i)
86 0 : for (unsigned int j = 0; j < channel(); ++j)
87 0 : for (unsigned int k = 0; k < height(); ++k)
88 0 : for (unsigned int l = 0; l < width(); ++l)
89 0 : this->setValue(i, j, k, l, d[i][j][k][l]);
90 : } else {
91 0 : for (unsigned int i = 0; i < batch(); ++i)
92 0 : for (unsigned int j = 0; j < height(); ++j)
93 0 : for (unsigned int k = 0; k < width(); ++k)
94 0 : for (unsigned int l = 0; l < channel(); ++l)
95 0 : this->setValue(i, l, j, k, d[i][j][k][l]);
96 : }
97 :
98 : // copy scale factors
99 0 : scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
100 :
101 0 : unsigned int *zps = getZeroPoint();
102 :
103 : // copy zero points
104 0 : for (size_t i = 0; i < zero_points.size(); ++i) {
105 0 : zps[i] = zero_points[i];
106 : }
107 0 : }
108 :
109 1 : bool Uint4QTensor::operator==(const Uint4QTensor &rhs) const {
110 1 : if (qscheme != rhs.qscheme)
111 : return false;
112 :
113 : // compare quantized data
114 1 : const uint8_t *_data = (uint8_t *)getData();
115 1 : const uint8_t *_rdata = (uint8_t *)rhs.getData();
116 110593 : for (size_t i = 0; i < (size() + 1) / 2; ++i) {
117 110592 : if (_data[i] != _rdata[i])
118 : return false;
119 : }
120 :
121 : // compare scale factors
122 1 : const float *_scales = (float *)getScale();
123 1 : const float *_rscales = (float *)rhs.getScale();
124 1 : for (size_t i = 0; i < scale_size(); ++i) {
125 0 : if (std::fabs(_scales[i] - _rscales[i]) > 1e-5)
126 : return false;
127 : }
128 :
129 : return true;
130 : }
131 :
132 0 : void Uint4QTensor::allocate() {
133 0 : if (empty() || data)
134 : return;
135 :
136 0 : if (src_tensor) {
137 : /// allocate data based on the source tensor
138 0 : allocateSrcTensor();
139 : /** as this memory is shared, do NOT initialize */
140 : } else {
141 : /// allocate new memory for the tensor data
142 : MemoryData *mem_data;
143 :
144 : /// quantized 4-bit is stored as a 8-bit signed integer (uint4x2)
145 : mem_data = new MemoryData(
146 0 : (void *)(new uint8_t[(dim.getDataLen() + 1) / 2 +
147 0 : sizeof(float) * scale_size() +
148 0 : sizeof(unsigned int) * scale_size()]{}));
149 0 : data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
150 0 : delete[] mem_data->template getAddr<uint8_t>();
151 0 : delete mem_data;
152 : });
153 :
154 0 : offset = 0;
155 0 : initialize();
156 : }
157 : }
158 :
159 0 : void Uint4QTensor::deallocate() {
160 : data = nullptr;
161 0 : offset = 0;
162 0 : }
163 :
164 16 : void *Uint4QTensor::getData() const {
165 16 : if (!data)
166 : return nullptr;
167 :
168 : data->validate();
169 15 : return data->getAddr<uint8_t>() + offset;
170 : }
171 :
172 0 : void *Uint4QTensor::getData(size_t idx) const {
173 0 : if (!data)
174 : return nullptr;
175 :
176 : data->validate();
177 0 : return data->getAddr<uint8_t>() + offset + (idx / 2);
178 : }
179 :
180 2 : void *Uint4QTensor::getScale() const {
181 2 : if (!data)
182 : return nullptr;
183 :
184 : data->validate();
185 2 : return ((uint8_t *)getData()) + (size() + 1) / 2;
186 : }
187 :
188 0 : void *Uint4QTensor::getScale(size_t idx) const {
189 0 : NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
190 : << "Tensor::getScale() index is not valid";
191 :
192 0 : if (!data)
193 : return nullptr;
194 :
195 : data->validate();
196 0 : return ((float *)getScale()) + idx;
197 : }
198 :
199 0 : unsigned int *Uint4QTensor::getZeroPoint() const {
200 0 : if (!data)
201 : return nullptr;
202 :
203 : data->validate();
204 0 : return ((unsigned int *)((float *)((uint8_t *)getData() +
205 0 : ((size() + 1) / 2)))) +
206 0 : scale_size();
207 : }
208 :
209 0 : unsigned int *Uint4QTensor::getZeroPoint(size_t idx) const {
210 0 : NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
211 : << "Uint4QTensor::getZeroPoint() index is not valid";
212 :
213 0 : if (!data)
214 : return nullptr;
215 :
216 : data->validate();
217 0 : return (((unsigned int *)((float *)((uint8_t *)getData() +
218 0 : ((size() + 1) / 2)))) +
219 0 : scale_size()) +
220 0 : idx;
221 : }
222 :
223 0 : void *Uint4QTensor::getAddress(unsigned int i) {
224 0 : size_t index = getIndex(batch(), channel(), height(), width());
225 0 : if (i > index) {
226 : return nullptr;
227 : }
228 0 : return &((uint8_t *)getData())[i / 2];
229 : }
230 :
231 0 : const void *Uint4QTensor::getAddress(unsigned int i) const {
232 0 : size_t index = getIndex(batch(), channel(), height(), width());
233 0 : if (i > index) {
234 : return nullptr;
235 : }
236 0 : return &((uint8_t *)getData())[i / 2];
237 : }
238 :
239 0 : const uint8_t Uint4QTensor::getValue(unsigned int i) const {
240 0 : uint8_t value = ((uint8_t *)getData())[i / 2];
241 0 : return (i % 2 == 0) ? ((value >> 4) & 0xF) : (value & 0x0F);
242 : }
243 :
244 0 : uint8_t Uint4QTensor::getValue(unsigned int i) {
245 0 : uint8_t value = ((uint8_t *)getData())[i / 2];
246 0 : return (i % 2 == 0) ? ((value >> 4) & 0xF) : (value & 0x0F);
247 : }
248 :
249 0 : const uint8_t Uint4QTensor::getValue(unsigned int b, unsigned int c,
250 : unsigned int h, unsigned int w) const {
251 0 : return getValue(getIndex(b, c, h, w));
252 : }
253 :
254 0 : uint8_t Uint4QTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
255 : unsigned int w) {
256 0 : return getValue(getIndex(b, c, h, w));
257 : }
258 :
259 : /// @todo this func should be template function
260 0 : void Uint4QTensor::setValue(float value) {
261 0 : NNTR_THROW_IF(value > 15 || value < 0, std::out_of_range)
262 : << "Value must be in range [0, 15]. Input value: " << value;
263 :
264 0 : uint8_t val = static_cast<uint8_t>(value);
265 0 : uint8_t *data = (uint8_t *)getData();
266 0 : std::fill(data, data + (size() + 1) / 2, (val << 4) | (val & 0x0f));
267 0 : }
268 :
269 : /// @todo this func should be template function
270 0 : void Uint4QTensor::addValue(unsigned int b, unsigned int c, unsigned int h,
271 : unsigned int w, float value, float beta) {
272 0 : auto const &idx = getIndex(b, c, h, w);
273 0 : float output = getValue(idx);
274 0 : output *= beta;
275 0 : output += value;
276 :
277 : // if result value is out of range, clamp to max/min value
278 : uint8_t val =
279 0 : static_cast<uint8_t>(std::trunc(std::clamp((int)output, 0, 15)));
280 :
281 : // encode result value to uint8 data
282 0 : ((uint8_t *)getData())[idx / 2] =
283 0 : (idx % 2 == 0) ? (val << 4) | (((uint8_t *)getData())[idx / 2] & 0x0f)
284 0 : : (((uint8_t *)getData())[idx / 2] & 0xf0) | (val & 0x0f);
285 0 : }
286 :
287 : /// @todo this func should be template function
288 0 : void Uint4QTensor::setValue(unsigned int b, unsigned int c, unsigned int h,
289 : unsigned int w, float value) {
290 0 : NNTR_THROW_IF(value > 15 || value < 0, std::out_of_range)
291 : << "Value must be in range [0, 15]. Input value: " << value;
292 :
293 0 : auto const &idx = getIndex(b, c, h, w);
294 0 : uint8_t val = static_cast<uint8_t>(value);
295 :
296 0 : ((uint8_t *)getData())[idx / 2] =
297 0 : (idx % 2 == 0) ? (val << 4) | (((uint8_t *)getData())[idx / 2] & 0x0f)
298 0 : : (((uint8_t *)getData())[idx / 2] & 0xf0) | (val & 0x0f);
299 0 : }
300 :
301 0 : void Uint4QTensor::setZero() {
302 : /// @todo accelerate with SIMD
303 0 : setValue(0);
304 0 : }
305 :
306 6 : void Uint4QTensor::initialize() {
307 6 : if (empty() || !isAllocated())
308 : return;
309 :
310 : /// @note Sampling from the normal/uniform distribution is invalid
311 6 : switch (initializer) {
312 0 : case Initializer::ZEROS:
313 0 : setZero();
314 0 : break;
315 0 : case Initializer::ONES:
316 0 : setValue(1.0f);
317 0 : break;
318 : case Initializer::NONE:
319 : break;
320 0 : default:
321 : throw std::invalid_argument(
322 0 : "Initializer other than zero and one is not valid for " +
323 0 : getStringDataType());
324 : break;
325 : }
326 :
327 6 : putData();
328 : }
329 :
330 0 : void Uint4QTensor::initialize(Initializer init) {
331 0 : initializer = init;
332 0 : initialize();
333 0 : }
334 :
335 0 : void Uint4QTensor::copy(const Tensor &from) {
336 0 : reshape(from.getDim());
337 0 : copy(from.getData());
338 0 : }
339 :
340 0 : void Uint4QTensor::copyData(const Tensor &from) {
341 0 : NNTR_THROW_IF(!contiguous, std::invalid_argument)
342 : << getName() << " is not contiguous, cannot copy.";
343 :
344 0 : NNTR_THROW_IF(size() != from.size(), std::invalid_argument)
345 : << "Size of the tensor to copy must match.";
346 :
347 : /// @todo support copy from float32 & float16 to uint8 data
348 0 : switch (from.getDataType()) {
349 : case ml::train::TensorDim::DataType::UINT4:
350 0 : copy(from.getData());
351 : break;
352 0 : default:
353 0 : throw std::invalid_argument("Error: Unsupported data type");
354 : break;
355 : }
356 0 : }
357 :
358 0 : void Uint4QTensor::copy_with_stride(const Tensor &input, Tensor &output) {
359 0 : for (unsigned int b = 0; b < output.batch(); ++b) {
360 0 : for (unsigned int c = 0; c < output.channel(); ++c) {
361 0 : for (unsigned int h = 0; h < output.height(); ++h) {
362 0 : for (unsigned int w = 0; w < output.width(); ++w) {
363 0 : output.setValue(b, c, h, w, input.getValue<uint8_t>(b, c, h, w));
364 : }
365 : }
366 : }
367 : }
368 0 : }
369 :
370 2 : void Uint4QTensor::save(std::ostream &file) {
371 : /// @note Save quantization information
372 2 : save_quantization_info(file);
373 :
374 2 : std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
375 :
376 2 : NNTR_THROW_IF(sz < 0, std::invalid_argument)
377 0 : << "save size: " << getMemoryBytes()
378 : << " is too big. It cannot be represented by std::streamsize";
379 :
380 2 : checkedWrite(file, (char *)getData(), sz,
381 : "[Uint4QTensor::save] operation failed");
382 2 : putData();
383 2 : }
384 :
385 2 : void Uint4QTensor::read(std::ifstream &file, size_t start_offset,
386 : bool read_from_offset) {
387 2 : if (start_offset == std::numeric_limits<size_t>::max()) {
388 0 : start_offset = file_offset;
389 : }
390 2 : read_quantization_info(file, start_offset, read_from_offset);
391 :
392 2 : std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
393 :
394 2 : NNTR_THROW_IF(sz < 0, std::invalid_argument)
395 0 : << "read size: " << getMemoryBytes()
396 : << " is too big. It cannot be represented by std::streamsize";
397 :
398 2 : if (read_from_offset) {
399 0 : start_offset += sizeof(uint16_t);
400 : }
401 :
402 2 : checkedRead(file, (char *)getData(), sz,
403 : "[Uint4QTensor::read] operation failed", start_offset,
404 : read_from_offset);
405 2 : putData();
406 2 : }
407 :
408 0 : void Uint4QTensor::read(ReadSource src, size_t start_offset,
409 : bool read_from_offset) {
410 0 : if (start_offset == std::numeric_limits<size_t>::max()) {
411 0 : start_offset = file_offset;
412 : }
413 0 : read_quantization_info(src, start_offset, read_from_offset);
414 :
415 0 : std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
416 :
417 0 : NNTR_THROW_IF(sz < 0, std::invalid_argument)
418 0 : << "read size: " << getMemoryBytes()
419 : << " is too big. It cannot be represented by std::streamsize";
420 :
421 0 : if (read_from_offset) {
422 0 : start_offset += sizeof(uint16_t);
423 : }
424 :
425 0 : checkedRead(src, (char *)getData(), sz,
426 : "[Uint4QTensor::read] operation failed", start_offset,
427 : read_from_offset);
428 0 : putData();
429 0 : }
430 :
431 0 : std::vector<unsigned int> Uint4QTensor::argmax() const {
432 : std::vector<unsigned int> result;
433 0 : const uint8_t *data = (uint8_t *)getData();
434 : size_t batch_size = batch();
435 0 : size_t feature_len = dim.getFeatureLen();
436 0 : result.resize(batch_size);
437 :
438 0 : for (unsigned int b = 0; b < batch_size; ++b) {
439 : uint8_t curr_val, max_val = 0;
440 : unsigned int max_element_idx = 0;
441 0 : for (unsigned int idx = 0; idx < feature_len; ++idx) {
442 0 : curr_val = getValue(idx + b * feature_len);
443 :
444 0 : if (curr_val > max_val) {
445 : max_val = curr_val;
446 : max_element_idx = idx;
447 : }
448 : }
449 0 : result[b] = max_element_idx;
450 : }
451 0 : return result;
452 0 : }
453 :
454 0 : std::vector<unsigned int> Uint4QTensor::argmin() const {
455 : std::vector<unsigned int> result;
456 0 : const uint8_t *data = (uint8_t *)getData();
457 : size_t batch_size = batch();
458 0 : size_t feature_len = dim.getFeatureLen();
459 0 : result.resize(batch_size);
460 :
461 0 : for (unsigned int b = 0; b < batch_size; ++b) {
462 : uint8_t curr_val, min_val = 15;
463 : unsigned int min_element_idx = 0;
464 0 : for (unsigned int idx = 0; idx < feature_len; ++idx) {
465 0 : curr_val = getValue(idx + b * feature_len);
466 :
467 0 : if (curr_val < min_val) {
468 : min_val = curr_val;
469 : min_element_idx = idx;
470 : }
471 : }
472 0 : result[b] = min_element_idx;
473 : }
474 0 : return result;
475 0 : }
476 :
477 0 : float Uint4QTensor::max_abs() const {
478 : uint8_t abs_max_val = 0;
479 : uint8_t curr_val;
480 0 : for (unsigned int idx = 0; idx < size(); ++idx) {
481 0 : curr_val = getValue(idx);
482 0 : abs_max_val = (curr_val > abs_max_val) ? curr_val : abs_max_val;
483 :
484 : // Terminate search when abs_max_val is an Uint4 absolute max value 15
485 0 : if (abs_max_val == 15)
486 : return abs_max_val;
487 : }
488 :
489 0 : return abs_max_val;
490 : }
491 :
492 0 : float Uint4QTensor::maxValue() const {
493 : uint8_t max_val = 0;
494 : uint8_t curr_val;
495 0 : for (unsigned int idx = 0; idx < size(); ++idx) {
496 0 : curr_val = getValue(idx);
497 0 : max_val = (curr_val > max_val) ? curr_val : max_val;
498 :
499 : // Terminate search when max_val is an Uint4 max value 15
500 0 : if (max_val == 15)
501 : return max_val;
502 : }
503 :
504 0 : return max_val;
505 : }
506 :
507 0 : float Uint4QTensor::minValue() const {
508 : uint8_t min_val = 15;
509 : uint8_t curr_val;
510 0 : for (unsigned int idx = 0; idx < size(); ++idx) {
511 0 : curr_val = getValue(idx);
512 0 : min_val = (curr_val < min_val) ? curr_val : min_val;
513 :
514 : // Terminate search when min_val is an Uint4 min value 0
515 0 : if (min_val == 0)
516 : return min_val;
517 : }
518 :
519 0 : return min_val;
520 : }
521 :
522 0 : void Uint4QTensor::print(std::ostream &out) const {
523 0 : const uint8_t *data = (uint8_t *)getData();
524 0 : unsigned int len = size();
525 0 : out << "data addr: " << reinterpret_cast<const float *>(data) << '\n';
526 0 : out << dim;
527 :
528 0 : if (len > 100) {
529 0 : out << '[' << (int)getValue(0) << ' ' << (int)getValue(1) << ' '
530 0 : << (int)getValue(2) << " ... " << (int)getValue(len - 3) << ' '
531 0 : << (int)getValue(len - 2) << ' ' << (int)getValue(len - 1) << ']'
532 : << std::endl;
533 0 : return;
534 : }
535 :
536 0 : std::ios init(NULL);
537 0 : init.copyfmt(out);
538 0 : if (getFormat() == Tformat::NCHW) {
539 0 : for (unsigned int k = 0; k < batch(); k++) {
540 0 : for (unsigned int l = 0; l < channel(); l++) {
541 0 : for (unsigned int i = 0; i < height(); i++) {
542 0 : for (unsigned int j = 0; j < width(); j++) {
543 0 : out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
544 : }
545 : out << std::endl;
546 : }
547 : out << std::endl;
548 : }
549 : out << "-------" << std::endl;
550 : }
551 : } else {
552 0 : for (unsigned int k = 0; k < batch(); k++) {
553 0 : for (unsigned int i = 0; i < height(); i++) {
554 0 : for (unsigned int j = 0; j < width(); j++) {
555 0 : for (unsigned int l = 0; l < channel(); l++) {
556 0 : out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
557 : }
558 : out << std::endl;
559 : }
560 : out << std::endl;
561 : }
562 : out << "-------" << std::endl;
563 : }
564 0 : out.copyfmt(init);
565 : }
566 :
567 : /// print quantization information
568 0 : const float *q_scales = (float *)getScale();
569 0 : const unsigned int *q_zero_points = getZeroPoint();
570 :
571 0 : if (scale_size() > 50) {
572 0 : out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
573 0 : << q_scales[2] << " ... " << q_scales[len - 3] << ' '
574 0 : << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
575 :
576 0 : out << "Zero points: [" << q_zero_points[0] << ' ' << q_zero_points[1]
577 0 : << ' ' << q_zero_points[2] << " ... " << q_zero_points[len - 3] << ' '
578 0 : << q_zero_points[len - 2] << ' ' << q_zero_points[len - 1] << ']'
579 : << std::endl;
580 : return;
581 : }
582 :
583 0 : out << "Scale factors: ";
584 0 : for (unsigned i = 0; i < scale_size(); ++i) {
585 0 : out << q_scales[i] << " ";
586 : }
587 : out << std::endl;
588 :
589 0 : out << "Zero points: ";
590 0 : for (unsigned i = 0; i < scale_size(); ++i) {
591 0 : out << q_zero_points[i] << " ";
592 : }
593 : out << std::endl;
594 : }
595 :
596 0 : size_t Uint4QTensor::getMemoryBytes() const {
597 0 : return ((size() + 1) / 2) * dim.getDataTypeSize() +
598 0 : scale_size() * sizeof(float) + scale_size() * sizeof(unsigned int);
599 : }
600 :
601 0 : size_t Uint4QTensor::scale_size() const {
602 0 : switch (qscheme) {
603 : case QScheme::PER_TENSOR_AFFINE:
604 : return 1;
605 : break;
606 0 : case QScheme::PER_CHANNEL_AFFINE:
607 0 : return height();
608 : break;
609 : default:
610 : break;
611 : }
612 0 : return 0;
613 : }
614 :
615 1 : QScheme Uint4QTensor::q_scheme() const { return qscheme; }
616 :
617 0 : void Uint4QTensor::copy(const void *buf) {
618 0 : NNTR_THROW_IF(!contiguous, std::invalid_argument)
619 : << getName() << " is not contiguous, cannot copy.";
620 :
621 0 : if (buf == getData()) {
622 : return;
623 : }
624 : // copy tensor data
625 0 : scopy((size() + 1) / 2, (uint8_t *)buf, 1, (uint8_t *)getData(), 1);
626 :
627 : // copy scale factor data
628 0 : float *scales = (float *)(((uint8_t *)buf) + (size() + 1) / 2);
629 0 : scopy(scale_size(), scales, 1, (float *)getScale(), 1);
630 :
631 : // copy zero points
632 : unsigned int *zps =
633 0 : (unsigned int *)((float *)(((uint8_t *)buf) + size()) + scale_size());
634 :
635 0 : memcpy(getZeroPoint(), zps, scale_size() * sizeof(unsigned int));
636 : }
637 :
638 2 : void Uint4QTensor::save_quantization_info(std::ostream &file) {
639 2 : checkedWrite(file, (char *)&qscheme, sizeof(uint16_t),
640 : "[Uint4QTensor::save] failed to write quantization information");
641 2 : }
642 :
643 2 : void Uint4QTensor::read_quantization_info(std::ifstream &file,
644 : size_t start_offset,
645 : bool read_from_offset) {
646 2 : checkedRead(file, (char *)&qscheme, sizeof(uint16_t),
647 : "[Uint4QTensor::read] failed to read quantization information",
648 : start_offset, read_from_offset);
649 2 : }
650 :
651 0 : void Uint4QTensor::read_quantization_info(ReadSource src, size_t start_offset,
652 : bool read_from_offset) {
653 0 : checkedRead(src, (char *)&qscheme, sizeof(uint16_t),
654 : "[Uint4QTensor::read] failed to read quantization information",
655 : start_offset, read_from_offset);
656 0 : }
657 :
658 : } // namespace nntrainer
|