Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * @file q4_k_tensor.cpp
4 : * @date 23 April 2025
5 : * @brief This is Q4_K_Tensor class for Q4_K quantized tensor.
6 : * @see https://github.com/nnstreamer/nntrainer
7 : * @author Donghyeon Jeong <dhyeon.jeong@samsung.com>
8 : * @bug No known bugs except for NYI items
9 : */
10 :
11 : #include <cpu_backend.h>
12 : #include <q4_k_tensor.h>
13 : #include <tensor.h>
14 :
15 : namespace nntrainer {
16 :
17 0 : Q4_K_Tensor::Q4_K_Tensor(std::string name_, Tformat fm, QScheme qscheme_) :
18 0 : Uint4QTensor(name_, fm, QScheme::Q4_Kx8) {}
19 :
20 8 : Q4_K_Tensor::Q4_K_Tensor(const TensorDim &d, bool alloc_now, Initializer init,
21 8 : std::string name, QScheme qscheme_) :
22 8 : Uint4QTensor(d, false, init, name, qscheme_) {
23 9 : NNTR_THROW_IF(d.batch() != 1 || d.channel() != 1 ||
24 : (d.height() % 256 != 0 && d.width() % 256 != 0),
25 : std::invalid_argument)
26 : << "Q4_K_Tensor must be 2 dimensional tensor with batch size 1 and "
27 : "height or width must be divisible by 256";
28 :
29 7 : if (qscheme_ == QScheme::Q4_Kx8) {
30 9 : NNTR_THROW_IF(d.height() % 8 != 0 || d.width() % 8 != 0,
31 : std::invalid_argument)
32 : << "Q4_Kx8 Tensor must have height or width must be divisible by 8";
33 : }
34 :
35 6 : if (alloc_now)
36 5 : allocate();
37 6 : }
38 :
39 0 : Q4_K_Tensor::Q4_K_Tensor(const TensorDim &d, const void *buf,
40 0 : QScheme qscheme_) :
41 0 : Q4_K_Tensor(d, true, Initializer::NONE, "", qscheme_) {
42 0 : if (d.getDataLen() != 0) {
43 0 : if (buf != nullptr)
44 0 : copy_q4k(buf);
45 : }
46 0 : }
47 :
48 6 : void Q4_K_Tensor::allocate() {
49 6 : if (empty() || data)
50 : return;
51 :
52 6 : if (src_tensor) {
53 : /// allocate data based on the source tensor
54 0 : allocateSrcTensor();
55 : /** as this memory is shared, do NOT initialize */
56 : } else {
57 : /// allocate new memory for the tensor data
58 : MemoryData *mem_data;
59 :
60 1844358 : mem_data = new MemoryData((void *)(new uint8_t[size()]{}));
61 6 : data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
62 6 : delete[] mem_data->template getAddr<uint8_t>();
63 6 : delete mem_data;
64 : });
65 :
66 6 : offset = 0;
67 6 : initialize();
68 : }
69 : }
70 :
71 110621 : size_t Q4_K_Tensor::size() const {
72 110621 : if (qscheme == QScheme::Q4_Kx8) {
73 110621 : size_t num_blocks = height() * width() / (256 * 8);
74 110621 : return Q4_Kx8_SIZE * num_blocks;
75 : } else {
76 0 : size_t num_blocks = height() * width() / 256;
77 0 : return Q4_K_SIZE * num_blocks;
78 : }
79 : }
80 :
81 4 : size_t Q4_K_Tensor::getMemoryBytes() const { return size() * sizeof(uint8_t); }
82 :
83 1 : size_t Q4_K_Tensor::scale_size() const { return 0; }
84 :
85 0 : void Q4_K_Tensor::copy_q4k(const void *buf) {
86 0 : NNTR_THROW_IF(!contiguous, std::invalid_argument)
87 : << getName() << " is not contiguous, cannot copy.";
88 :
89 0 : if (buf == getData()) {
90 : return;
91 : }
92 : // copy tensor data
93 0 : scopy(size(), (uint8_t *)buf, 1, (uint8_t *)getData(), 1);
94 : }
95 :
96 : } // namespace nntrainer
|