Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * @file tensor.cpp
4 : * @date 01 December 2023
5 : * @brief This is a Tensor class
6 : * @see https://github.com/nnstreamer/nntrainer
7 : * @author Jijoong Moon <jijoong.moon@samsung.com>
8 : * @author Donghyeon Jeong <dhyeon.jeong@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : */
11 :
12 : #include <numeric>
13 :
14 : #include <char_tensor.h>
15 : #include <float_tensor.h>
16 : #include <int4_tensor.h>
17 : #include <lazy_tensor.h>
18 : #include <q4_0_tensor.h>
19 : #include <q4_k_tensor.h>
20 : #include <q6_k_tensor.h>
21 : #include <short_tensor.h>
22 : #include <tensor.h>
23 : #include <uint4_tensor.h>
24 : #include <uint_tensor.h>
25 :
26 : #ifdef ENABLE_FP16
27 : #include <half_tensor.h>
28 : #endif
29 :
30 : #ifdef ENABLE_BIQGEMM
31 : #include <bcq_tensor.h>
32 : #endif
33 :
34 : #include <fcntl.h>
35 :
36 : #if defined(__unix__) || defined(__ANDROID__) || defined(__arm__)
37 : #include <sys/mman.h>
38 : #include <sys/stat.h>
39 : #include <unistd.h>
40 : #endif
41 :
42 : namespace nntrainer {
43 :
44 1 : Tensor::Tensor(
45 : std::vector<std::vector<std::vector<std::vector<int16_t>>>> const &d,
46 : std::vector<float> const &scales, ml::train::TensorDim::TensorType t_type,
47 1 : QScheme qscheme_) {
48 : switch (qscheme_) {
49 : case QScheme::PER_TENSOR_AFFINE:
50 : break;
51 : case QScheme::PER_CHANNEL_AFFINE:
52 : break;
53 : default:
54 : break;
55 : }
56 1 : itensor_ = std::make_unique<ShortTensor>(d, scales, t_type.format, qscheme_);
57 1 : }
58 :
59 7 : Tensor::Tensor(
60 : std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
61 : std::vector<float> const &scales, ml::train::TensorDim::TensorType t_type,
62 7 : QScheme qscheme_) {
63 7 : if (t_type.data_type == Tdatatype::QINT4) {
64 : itensor_ =
65 9 : std::make_unique<Int4QTensor>(d, scales, t_type.format, qscheme_);
66 2 : } else if (t_type.data_type == Tdatatype::QINT8) {
67 4 : itensor_ = std::make_unique<CharTensor>(d, scales, t_type.format, qscheme_);
68 : } else {
69 : throw std::invalid_argument(
70 : "Error: Tensor cannot be constructed because the given data type is "
71 0 : "incorrect. The supported d_types are: QINT4, QINT8");
72 : }
73 5 : }
74 :
75 31 : Tensor::Tensor(
76 : std::vector<std::vector<std::vector<std::vector<float>>>> const &d,
77 31 : ml::train::TensorDim::TensorType t_type) {
78 31 : itensor_ = std::make_unique<FloatTensor>(d, t_type.format);
79 30 : }
80 :
81 1 : Tensor::Tensor(
82 : std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
83 : std::vector<float> const &scales,
84 : std::vector<unsigned int> const &zero_points,
85 1 : ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
86 1 : if (t_type.data_type == Tdatatype::UINT4) {
87 0 : itensor_ = std::make_unique<Uint4QTensor>(d, scales, zero_points,
88 : t_type.format, qscheme_);
89 1 : } else if (t_type.data_type == Tdatatype::UINT8) {
90 2 : itensor_ = std::make_unique<UInt8Tensor>(d, scales, zero_points,
91 : t_type.format, qscheme_);
92 : } else {
93 : throw std::invalid_argument(
94 : "Error: Tensor cannot be constructed because the given data type is "
95 0 : "incorrect. The supported d_types are: UINT4, UINT8");
96 : }
97 1 : }
98 :
99 5 : Tensor::Tensor(
100 : std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
101 : std::vector<float> const &scales,
102 : std::vector<unsigned int> const &zero_points,
103 5 : ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
104 5 : itensor_ = std::make_unique<UInt16Tensor>(d, scales, zero_points,
105 : t_type.format, qscheme_);
106 2 : }
107 :
108 1 : Tensor::Tensor(
109 : std::vector<std::vector<std::vector<std::vector<uint32_t>>>> const &d,
110 : std::vector<float> const &scales,
111 : std::vector<unsigned int> const &zero_points,
112 1 : ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
113 1 : itensor_ = std::make_unique<UInt32Tensor>(d, scales, zero_points,
114 : t_type.format, qscheme_);
115 1 : }
116 :
117 282561 : Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) {
118 : itensor_ = nullptr;
119 :
120 : if (d_type == Tdatatype::FP32) {
121 565114 : itensor_ = std::make_unique<FloatTensor>(name_, fm);
122 : } else if (d_type == Tdatatype::FP16) {
123 : #ifdef ENABLE_FP16
124 : itensor_ = std::make_unique<HalfTensor>(name_, fm);
125 : #else
126 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
127 : #endif
128 : } else if (d_type == Tdatatype::Q4_K) {
129 0 : itensor_ = std::make_unique<Q4_K_Tensor>(name_, fm);
130 : } else if (d_type == Tdatatype::Q6_K) {
131 0 : itensor_ = std::make_unique<Q6_K_Tensor>(name_, fm);
132 : } else if (d_type == Tdatatype::Q4_0) {
133 0 : itensor_ = std::make_unique<Q4_0_Tensor>(name_, fm);
134 : } else if (d_type == Tdatatype::UINT4) {
135 0 : itensor_ = std::make_unique<Uint4QTensor>(name_, fm);
136 : } else if (d_type == Tdatatype::UINT8) {
137 2 : itensor_ = std::make_unique<UInt8Tensor>(name_, fm);
138 : } else if (d_type == Tdatatype::UINT16) {
139 2 : itensor_ = std::make_unique<UInt16Tensor>(name_, fm);
140 : } else if (d_type == Tdatatype::UINT32) {
141 2 : itensor_ = std::make_unique<UInt32Tensor>(name_, fm);
142 : } else if (d_type == Tdatatype::QINT16) {
143 0 : itensor_ = std::make_unique<ShortTensor>(name_, fm);
144 : } else if (d_type == Tdatatype::QINT8) {
145 2 : itensor_ = std::make_unique<CharTensor>(name_, fm);
146 : } else if (d_type == Tdatatype::QINT4) {
147 0 : itensor_ = std::make_unique<Int4QTensor>(name_, fm);
148 : } else if (d_type == Tdatatype::BCQ) {
149 : #ifdef ENABLE_BIQGEMM
150 : itensor_ = std::make_unique<BCQTensor>(name_, fm);
151 : #else
152 : throw std::invalid_argument("Error: enable-biqgemm is not activated. "
153 0 : "Enable only if your system supports BiQGEMM.");
154 : #endif
155 : } else {
156 : throw std::invalid_argument(
157 : "Error: Tensor cannot be constructed because the given d_type is not "
158 : "compatible with itensor. The supported d_types are: FP32, FP16 "
159 0 : "(if built with ENABLE_FP16).");
160 : }
161 282561 : }
162 :
163 39609 : Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init,
164 39609 : std::string name, QScheme qscheme, bool is_virtual) {
165 : itensor_ = nullptr;
166 39609 : this->is_virtual = is_virtual;
167 :
168 : if (d.getDataType() == Tdatatype::FP32) {
169 79138 : itensor_ = std::make_unique<FloatTensor>(d, alloc_now, init, name);
170 : } else if (d.getDataType() == Tdatatype::FP16) {
171 : #ifdef ENABLE_FP16
172 : itensor_ = std::make_unique<HalfTensor>(d, alloc_now, init, name);
173 : #else
174 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
175 : #endif
176 : } else if (d.getDataType() == Tdatatype::Q4_K) {
177 6 : itensor_ = std::make_unique<Q4_K_Tensor>(d, alloc_now, init, name);
178 : } else if (d.getDataType() == Tdatatype::Q6_K) {
179 4 : itensor_ = std::make_unique<Q6_K_Tensor>(d, alloc_now, init, name);
180 : } else if (d.getDataType() == Tdatatype::Q4_0) {
181 6 : itensor_ = std::make_unique<Q4_0_Tensor>(d, alloc_now, init, name);
182 : } else if (d.getDataType() == Tdatatype::UINT4) {
183 5 : if (qscheme != QScheme::Q4_Kx8) {
184 : itensor_ =
185 0 : std::make_unique<Uint4QTensor>(d, alloc_now, init, name, qscheme);
186 : } else {
187 : itensor_ =
188 8 : std::make_unique<Q4_K_Tensor>(d, alloc_now, init, name, qscheme);
189 : }
190 : } else if (d.getDataType() == Tdatatype::UINT8) {
191 8 : itensor_ = std::make_unique<UInt8Tensor>(d, alloc_now, init, name);
192 : } else if (d.getDataType() == Tdatatype::UINT16) {
193 8 : itensor_ = std::make_unique<UInt16Tensor>(d, alloc_now, init, name);
194 : } else if (d.getDataType() == Tdatatype::UINT32) {
195 8 : itensor_ = std::make_unique<UInt32Tensor>(d, alloc_now, init, name);
196 : } else if (d.getDataType() == Tdatatype::QINT16) {
197 0 : itensor_ = std::make_unique<ShortTensor>(d, alloc_now, init, name, qscheme);
198 : } else if (d.getDataType() == Tdatatype::QINT8) {
199 18 : itensor_ = std::make_unique<CharTensor>(d, alloc_now, init, name, qscheme);
200 : } else if (d.getDataType() == Tdatatype::QINT4) {
201 14 : itensor_ = std::make_unique<Int4QTensor>(d, alloc_now, init, name, qscheme);
202 : } else if (d.getDataType() == Tdatatype::BCQ) {
203 : #ifdef ENABLE_BIQGEMM
204 : itensor_ = std::make_unique<BCQTensor>(d, alloc_now, init, name);
205 : #else
206 : throw std::invalid_argument("Error: enable-biqgemm is not activated. "
207 0 : "Enable only if your system supports BiQGEMM.");
208 : #endif
209 : } else {
210 : throw std::invalid_argument(
211 : "Error: Tensor cannot be constructed because the given d_type is not "
212 : "compatible with itensor. The supported d_types are: FP32, FP16 "
213 0 : "(if built with ENABLE_FP16).");
214 : }
215 39603 : }
216 :
217 336059 : Tensor::Tensor(const TensorDim &d, const void *buf, QScheme qscheme) {
218 : itensor_ = nullptr;
219 :
220 : if (d.getDataType() == Tdatatype::FP32) {
221 671952 : itensor_ = std::make_unique<FloatTensor>(d, buf);
222 : } else if (d.getDataType() == Tdatatype::FP16) {
223 : #ifdef ENABLE_FP16
224 : itensor_ = std::make_unique<HalfTensor>(d, buf);
225 : #else
226 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
227 : #endif
228 : } else if (d.getDataType() == Tdatatype::Q4_K) {
229 0 : itensor_ = std::make_unique<Q4_K_Tensor>(d, buf);
230 : } else if (d.getDataType() == Tdatatype::Q6_K) {
231 6 : itensor_ = std::make_unique<Q6_K_Tensor>(d, buf);
232 : } else if (d.getDataType() == Tdatatype::Q4_0) {
233 0 : itensor_ = std::make_unique<Q4_0_Tensor>(d, buf);
234 : } else if (d.getDataType() == Tdatatype::UINT4) {
235 0 : if (qscheme != QScheme::Q4_Kx8)
236 0 : itensor_ = std::make_unique<Uint4QTensor>(d, buf, qscheme);
237 : else
238 0 : itensor_ = std::make_unique<Q4_K_Tensor>(d, buf, qscheme);
239 : } else if (d.getDataType() == Tdatatype::UINT8) {
240 16 : itensor_ = std::make_unique<UInt8Tensor>(d, buf);
241 : } else if (d.getDataType() == Tdatatype::UINT16) {
242 46 : itensor_ = std::make_unique<UInt16Tensor>(d, buf);
243 : } else if (d.getDataType() == Tdatatype::UINT32) {
244 18 : itensor_ = std::make_unique<UInt32Tensor>(d, buf);
245 : } else if (d.getDataType() == Tdatatype::QINT16) {
246 12 : itensor_ = std::make_unique<ShortTensor>(d, buf, qscheme);
247 : } else if (d.getDataType() == Tdatatype::QINT8) {
248 64 : itensor_ = std::make_unique<CharTensor>(d, buf, qscheme);
249 : } else if (d.getDataType() == Tdatatype::QINT4) {
250 4 : itensor_ = std::make_unique<Int4QTensor>(d, buf);
251 : } else if (d.getDataType() == Tdatatype::BCQ) {
252 : #ifdef ENABLE_BIQGEMM
253 : itensor_ = std::make_unique<BCQTensor>(d, buf);
254 : #else
255 : throw std::invalid_argument("Error: enable-biqgemm is not activated. "
256 0 : "Enable only if your system supports BiQGEMM.");
257 : #endif
258 : } else {
259 : throw std::invalid_argument(
260 : "Error: Tensor cannot be constructed because the given d_type is not "
261 : "compatible with itensor. The supported d_types are: FP32, FP16 "
262 0 : "(if built with ENABLE_FP16).");
263 : }
264 336059 : }
265 :
266 533568 : Tensor::Tensor(const Tensor &rhs) {
267 533568 : if (rhs.getDataType() == Tdatatype::FP32) {
268 1067108 : itensor_ = std::make_unique<FloatTensor>(*rhs.itensor_);
269 14 : } else if (rhs.getDataType() == Tdatatype::FP16) {
270 : #ifdef ENABLE_FP16
271 : itensor_ = std::make_unique<HalfTensor>(*rhs.itensor_);
272 : #else
273 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
274 : #endif
275 14 : } else if (rhs.getDataType() == Tdatatype::Q4_K) {
276 0 : itensor_ = std::make_unique<Q4_K_Tensor>(*rhs.itensor_);
277 14 : } else if (rhs.getDataType() == Tdatatype::Q6_K) {
278 0 : itensor_ = std::make_unique<Q6_K_Tensor>(*rhs.itensor_);
279 14 : } else if (rhs.getDataType() == Tdatatype::Q4_0) {
280 0 : itensor_ = std::make_unique<Q4_0_Tensor>(*rhs.itensor_);
281 14 : } else if (rhs.getDataType() == Tdatatype::UINT4) {
282 0 : itensor_ = std::make_unique<Uint4QTensor>(*rhs.itensor_);
283 14 : } else if (rhs.getDataType() == Tdatatype::UINT8) {
284 2 : itensor_ = std::make_unique<UInt8Tensor>(*rhs.itensor_);
285 13 : } else if (rhs.getDataType() == Tdatatype::UINT16) {
286 6 : itensor_ = std::make_unique<UInt16Tensor>(*rhs.itensor_);
287 10 : } else if (rhs.getDataType() == Tdatatype::UINT32) {
288 14 : itensor_ = std::make_unique<UInt32Tensor>(*rhs.itensor_);
289 3 : } else if (rhs.getDataType() == Tdatatype::QINT16) {
290 0 : itensor_ = std::make_unique<ShortTensor>(*rhs.itensor_);
291 3 : } else if (rhs.getDataType() == Tdatatype::QINT8) {
292 6 : itensor_ = std::make_unique<CharTensor>(*rhs.itensor_);
293 0 : } else if (rhs.getDataType() == Tdatatype::QINT4) {
294 0 : itensor_ = std::make_unique<Int4QTensor>(*rhs.itensor_);
295 0 : } else if (rhs.getDataType() == Tdatatype::BCQ) {
296 : #ifdef ENABLE_BIQGEMM
297 : itensor_ = std::make_unique<BCQTensor>(*rhs.itensor_);
298 : #else
299 : throw std::invalid_argument("Error: enable-biqgemm is not activated. "
300 0 : "Enable only if your system supports BiQGEMM.");
301 : #endif
302 : }
303 :
304 : /** copy tensor properties */
305 533568 : this->is_virtual = rhs.is_virtual;
306 533568 : this->fd = rhs.fd;
307 533568 : this->read_offset = rhs.read_offset;
308 533568 : this->mapped_ptr = rhs.mapped_ptr;
309 533568 : }
310 :
311 2 : Tensor::Tensor(const std::unique_ptr<TensorBase> &rhs) {
312 3 : NNTR_THROW_IF(rhs.get() == nullptr, std::invalid_argument)
313 : << "Error: received a nullptr. Tensor cannot be constructed";
314 :
315 : if (rhs->getDataType() == Tdatatype::FP32) {
316 2 : itensor_ = std::make_unique<FloatTensor>(*rhs.get());
317 : } else if (rhs->getDataType() == Tdatatype::FP16) {
318 : #ifdef ENABLE_FP16
319 : itensor_ = std::make_unique<HalfTensor>(*rhs.get());
320 : #else
321 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
322 : #endif
323 : } else if (rhs->getDataType() == Tdatatype::UINT4) {
324 0 : itensor_ = std::make_unique<Uint4QTensor>(*rhs.get());
325 : } else if (rhs->getDataType() == Tdatatype::UINT8) {
326 0 : itensor_ = std::make_unique<UInt8Tensor>(*rhs.get());
327 : } else if (rhs->getDataType() == Tdatatype::UINT16) {
328 0 : itensor_ = std::make_unique<UInt16Tensor>(*rhs.get());
329 : } else if (rhs->getDataType() == Tdatatype::UINT32) {
330 0 : itensor_ = std::make_unique<UInt32Tensor>(*rhs.get());
331 : } else if (rhs->getDataType() == Tdatatype::QINT16) {
332 0 : itensor_ = std::make_unique<ShortTensor>(*rhs.get());
333 : } else if (rhs->getDataType() == Tdatatype::QINT8) {
334 0 : itensor_ = std::make_unique<CharTensor>(*rhs.get());
335 : } else if (rhs->getDataType() == Tdatatype::QINT4) {
336 1 : itensor_ = std::make_unique<Int4QTensor>(*rhs.get());
337 : } else if (rhs->getDataType() == Tdatatype::BCQ) {
338 : #ifdef ENABLE_BIQGEMM
339 : itensor_ = std::make_unique<BCQTensor>(*rhs.get());
340 : #else
341 : throw std::invalid_argument("Error: enable-biqgemm is not activated. "
342 0 : "Enable only if your system supports BiQGEMM.");
343 : #endif
344 : }
345 1 : }
346 :
347 6741 : Tensor &Tensor::operator=(const Tensor &rhs) {
348 6741 : if (rhs.getDataType() == Tdatatype::FP32) {
349 13482 : itensor_ = std::make_unique<FloatTensor>(*rhs.itensor_);
350 0 : } else if (rhs.getDataType() == Tdatatype::FP16) {
351 : #ifdef ENABLE_FP16
352 : itensor_ = std::make_unique<HalfTensor>(*rhs.itensor_);
353 : #else
354 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
355 : #endif
356 0 : } else if (rhs.getDataType() == Tdatatype::Q4_K) {
357 0 : itensor_ = std::make_unique<Q4_K_Tensor>(*rhs.itensor_);
358 0 : } else if (rhs.getDataType() == Tdatatype::Q6_K) {
359 0 : itensor_ = std::make_unique<Q6_K_Tensor>(*rhs.itensor_);
360 0 : } else if (rhs.getDataType() == Tdatatype::Q4_0) {
361 0 : itensor_ = std::make_unique<Q4_0_Tensor>(*rhs.itensor_);
362 0 : } else if (rhs.getDataType() == Tdatatype::UINT4) {
363 0 : itensor_ = std::make_unique<Uint4QTensor>(*rhs.itensor_);
364 0 : } else if (rhs.getDataType() == Tdatatype::UINT8) {
365 0 : itensor_ = std::make_unique<UInt8Tensor>(*rhs.itensor_);
366 0 : } else if (rhs.getDataType() == Tdatatype::UINT16) {
367 0 : itensor_ = std::make_unique<UInt16Tensor>(*rhs.itensor_);
368 0 : } else if (rhs.getDataType() == Tdatatype::UINT32) {
369 0 : itensor_ = std::make_unique<UInt32Tensor>(*rhs.itensor_);
370 0 : } else if (rhs.getDataType() == Tdatatype::QINT16) {
371 0 : itensor_ = std::make_unique<ShortTensor>(*rhs.itensor_);
372 0 : } else if (rhs.getDataType() == Tdatatype::QINT8) {
373 0 : itensor_ = std::make_unique<CharTensor>(*rhs.itensor_);
374 0 : } else if (rhs.getDataType() == Tdatatype::QINT4) {
375 0 : itensor_ = std::make_unique<Int4QTensor>(*rhs.itensor_);
376 0 : } else if (rhs.getDataType() == Tdatatype::BCQ) {
377 : #ifdef ENABLE_BIQGEMM
378 : itensor_ = std::make_unique<BCQTensor>(*rhs.itensor_);
379 : #else
380 : throw std::invalid_argument("Error: enable-biqgemm is not activated. "
381 0 : "Enable only if your system supports BiQGEMM.");
382 : #endif
383 : }
384 :
385 : /** copy tensor properties */
386 6741 : this->is_virtual = rhs.is_virtual;
387 6741 : this->fd = rhs.fd;
388 6741 : this->read_offset = rhs.read_offset;
389 6741 : this->mapped_ptr = rhs.mapped_ptr;
390 6741 : return *this;
391 : }
392 :
393 10826 : bool Tensor::operator==(const Tensor &rhs) const {
394 : /// compares tensor information
395 10826 : if (*itensor_.get() == *rhs.itensor_.get()) {
396 : /// compares tensor data
397 10824 : if (getDataType() == Tdatatype::FP32) {
398 10793 : return itensorCompare<FloatTensor>(itensor_.get(), rhs.itensor_.get());
399 31 : } else if (getDataType() == Tdatatype::FP16) {
400 : #ifdef ENABLE_FP16
401 : return itensorCompare<HalfTensor>(itensor_.get(), rhs.itensor_.get());
402 : #else
403 : throw std::invalid_argument(
404 : "Error: HalfTensor cannot be created or used when FP16 is not enabled. "
405 0 : "Please check if the tensor data type is set properly.");
406 : #endif
407 31 : } else if (getDataType() == Tdatatype::Q4_K) {
408 0 : return itensorCompare<Q4_K_Tensor>(itensor_.get(), rhs.itensor_.get());
409 31 : } else if (getDataType() == Tdatatype::Q6_K) {
410 0 : return itensorCompare<Q6_K_Tensor>(itensor_.get(), rhs.itensor_.get());
411 31 : } else if (getDataType() == Tdatatype::Q4_0) {
412 0 : return itensorCompare<Q4_0_Tensor>(itensor_.get(), rhs.itensor_.get());
413 31 : } else if (getDataType() == Tdatatype::UINT4) {
414 1 : return itensorCompare<Uint4QTensor>(itensor_.get(), rhs.itensor_.get());
415 30 : } else if (getDataType() == Tdatatype::UINT8) {
416 6 : return itensorCompare<UInt8Tensor>(itensor_.get(), rhs.itensor_.get());
417 24 : } else if (getDataType() == Tdatatype::UINT16) {
418 7 : return itensorCompare<UInt16Tensor>(itensor_.get(), rhs.itensor_.get());
419 17 : } else if (getDataType() == Tdatatype::UINT32) {
420 4 : return itensorCompare<UInt32Tensor>(itensor_.get(), rhs.itensor_.get());
421 13 : } else if (getDataType() == Tdatatype::QINT16) {
422 3 : return itensorCompare<ShortTensor>(itensor_.get(), rhs.itensor_.get());
423 10 : } else if (getDataType() == Tdatatype::QINT8) {
424 8 : return itensorCompare<CharTensor>(itensor_.get(), rhs.itensor_.get());
425 2 : } else if (getDataType() == Tdatatype::QINT4) {
426 2 : return itensorCompare<Int4QTensor>(itensor_.get(), rhs.itensor_.get());
427 0 : } else if (getDataType() == Tdatatype::BCQ) {
428 : #ifdef ENABLE_BIQGEMM
429 : return itensorCompare<BCQTensor>(itensor_.get(), rhs.itensor_.get());
430 : #else
431 : throw std::invalid_argument(
432 : "Error: enable-biqgemm is not activated. "
433 0 : "Enable only if your system supports BiQGEMM.");
434 : #endif
435 : }
436 : }
437 : return false;
438 : }
439 :
440 31 : void Tensor::allocate() { itensor_->allocate(); }
441 :
442 908 : void Tensor::deallocate() { itensor_->deallocate(); }
443 :
444 246930 : bool Tensor::isAllocated() { return itensor_->isAllocated(); }
445 :
446 81081 : void Tensor::setValue(float value) { itensor_->setValue(value); }
447 :
448 35432469 : void Tensor::setValue(unsigned int b, unsigned int c, unsigned int h,
449 : unsigned int w, float value) {
450 35432469 : itensor_->setValue(b, c, h, w, value);
451 35432469 : }
452 :
453 10925 : void Tensor::addValue(unsigned int b, unsigned int c, unsigned int h,
454 : unsigned int w, float value, float beta) noexcept {
455 10925 : itensor_->addValue(b, c, h, w, value, beta);
456 10925 : }
457 :
458 36711 : void Tensor::setZero() { itensor_->setZero(); }
459 :
460 108 : void Tensor::setRandNormal(float mean, float stddev) {
461 108 : itensor_->setRandNormal(mean, stddev);
462 108 : }
463 :
464 15807 : void Tensor::setRandUniform(float min, float max) {
465 15807 : itensor_->setRandUniform(min, max);
466 15807 : }
467 :
468 3 : void Tensor::setRandBernoulli(float probability) {
469 3 : itensor_->setRandBernoulli(probability);
470 3 : }
471 :
472 19183 : void Tensor::initialize() { itensor_->initialize(); }
473 :
474 18 : void Tensor::initialize(Initializer init) { itensor_->initialize(init); }
475 :
476 0 : Tensor Tensor::apply(std::function<Tensor(Tensor)> f) const { return f(*this); }
477 :
478 883 : Tensor &Tensor::apply(std::function<Tensor &(Tensor, Tensor &)> f,
479 : Tensor &output) const {
480 1766 : return f(*this, output);
481 : }
482 :
483 2846 : int Tensor::multiply_i_strided(Tensor const &m, const float beta) {
484 : try {
485 2846 : this->multiply_strided(m, *this, beta);
486 2 : } catch (std::exception &err) {
487 2 : ml_loge("%s %s", typeid(err).name(), err.what());
488 : return ML_ERROR_INVALID_PARAMETER;
489 2 : }
490 :
491 : return ML_ERROR_NONE;
492 : }
493 :
494 31 : Tensor Tensor::multiply_strided(Tensor const &m, const float beta) const {
495 31 : Tensor t("", getFormat(), getDataType());
496 59 : return this->multiply_strided(m, t, beta);
497 31 : }
498 :
499 12608 : Tensor &Tensor::multiply_strided(Tensor const &m, Tensor &output,
500 : const float beta) const {
501 12608 : itensor_->multiply_strided(m, output, beta);
502 12602 : return output;
503 : }
504 :
505 2608 : int Tensor::multiply_i(float const &value) {
506 2608 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
507 0 : << getName() << " is not contiguous, cannot multiply";
508 :
509 2608 : return itensor_->multiply_i(value);
510 : }
511 :
512 6633 : Tensor Tensor::multiply(float const &value) const {
513 6633 : Tensor t("", getFormat(), getDataType());
514 13266 : return multiply(value, t);
515 6633 : }
516 :
517 7691 : Tensor &Tensor::multiply(float const &value, Tensor &out) const {
518 7691 : itensor_->multiply(value, out);
519 7691 : return out;
520 : }
521 :
522 8602 : int Tensor::multiply_i(Tensor const &m, const float beta) {
523 : try {
524 8602 : this->multiply(m, *this, beta);
525 135 : } catch (std::exception &err) {
526 135 : ml_loge("%s %s", typeid(err).name(), err.what());
527 : return ML_ERROR_INVALID_PARAMETER;
528 135 : }
529 :
530 : return ML_ERROR_NONE;
531 : }
532 :
533 3179 : Tensor Tensor::multiply(Tensor const &m, const float beta) const {
534 3179 : Tensor t("", getFormat(), getDataType());
535 6353 : return multiply(m, t, beta);
536 3179 : }
537 :
538 17216 : Tensor &Tensor::multiply(Tensor const &m, Tensor &output,
539 : const float beta) const {
540 17216 : NNTR_THROW_IF(m.getFormat() != this->getFormat(), std::invalid_argument)
541 0 : << "Tensor Format of " << getName() << ":"
542 0 : << ((bool)(this->getFormat()) ? "NHWC" : "NCHW") << " is not match. ("
543 0 : << ((bool)(m.getFormat()) ? "NHWC" : "NCHW") << ")";
544 :
545 17218 : NNTR_THROW_IF(!getContiguous() || !m.getContiguous() ||
546 : !output.getContiguous(),
547 : std::invalid_argument)
548 2 : << getName() << " is not contiguous, cannot multiply";
549 :
550 17214 : itensor_->multiply(m, output, beta);
551 17075 : return output;
552 : }
553 :
554 6237 : int Tensor::divide_i(float const &value) {
555 6237 : if (value == 0.0f) {
556 : return ML_ERROR_INVALID_PARAMETER;
557 : }
558 6236 : this->divide(value, *this);
559 6236 : return ML_ERROR_NONE;
560 : }
561 :
562 3 : Tensor Tensor::divide(float const &value) const {
563 3 : Tensor output("", getFormat(), getDataType());
564 5 : return divide(value, output);
565 3 : }
566 :
567 6274 : Tensor &Tensor::divide(float const &value, Tensor &output) const {
568 : /// @todo add unittest, ZeroDivisionError
569 6274 : if (value == 0.0f) {
570 1 : std::stringstream ss;
571 1 : ss << "[Tensor] divide by value failed, value: " << value;
572 3 : throw std::invalid_argument(ss.str().c_str());
573 1 : }
574 6273 : itensor_->divide(value, output);
575 6273 : return output;
576 : }
577 :
578 182 : int Tensor::divide_i(Tensor const &m) {
579 : try {
580 182 : this->divide(m, *this);
581 5 : } catch (std::exception &err) {
582 5 : ml_loge("%s %s", typeid(err).name(), err.what());
583 : return ML_ERROR_INVALID_PARAMETER;
584 5 : }
585 :
586 : return ML_ERROR_NONE;
587 : }
588 :
589 11 : Tensor Tensor::divide(Tensor const &m) const {
590 11 : Tensor output("", getFormat(), getDataType());
591 18 : return this->divide(m, output);
592 11 : }
593 :
594 200 : Tensor &Tensor::divide(Tensor const &m, Tensor &output) const {
595 202 : NNTR_THROW_IF(!getContiguous() || !m.getContiguous() ||
596 : !output.getContiguous(),
597 : std::invalid_argument)
598 2 : << getName() << " is not contiguous, cannot divide";
599 198 : itensor_->divide(m, output);
600 190 : return output;
601 : }
602 :
603 207 : int Tensor::add_i_strided(Tensor const &input, const float beta) {
604 : try {
605 207 : this->add_strided(input, *this, beta);
606 0 : } catch (std::exception &err) {
607 0 : ml_loge("%s %s", typeid(err).name(), err.what());
608 : return ML_ERROR_INVALID_PARAMETER;
609 0 : }
610 :
611 : return ML_ERROR_NONE;
612 : }
613 :
614 4 : Tensor Tensor::add_strided(Tensor const &input, const float beta) const {
615 4 : Tensor output("", getFormat(), getDataType());
616 6 : return this->add_strided(input, output, beta);
617 4 : }
618 :
619 212 : Tensor &Tensor::add_strided(Tensor const &input, Tensor &output,
620 : const float beta) const {
621 216 : CREATE_IF_EMPTY_DIMS(output, getDim(), nullptr);
622 :
623 212 : if (size() != input.size() || size() != output.size())
624 : throw std::invalid_argument(
625 1 : "Strided addition does not support broadcasting");
626 :
627 211 : itensor_->add_strided(input, output, beta);
628 :
629 209 : return output;
630 : }
631 :
632 1231 : int Tensor::add_i(float const &value) {
633 1231 : this->add(value, *this);
634 1231 : return ML_ERROR_NONE;
635 : }
636 :
637 6179 : Tensor Tensor::add(float const &value) const {
638 6179 : Tensor t("", getFormat(), getDataType());
639 12358 : return add(value, t);
640 6179 : }
641 :
642 7432 : Tensor &Tensor::add(float const &value, Tensor &output) const {
643 7432 : itensor_->add(value, output);
644 7432 : return output;
645 : }
646 :
647 46231 : int Tensor::add_i(Tensor const &m, float const alpha) {
648 : try {
649 46231 : itensor_->add(m, *this, alpha);
650 23 : } catch (std::exception &err) {
651 23 : ml_loge("%s %s", typeid(err).name(), err.what());
652 : return ML_ERROR_INVALID_PARAMETER;
653 23 : }
654 : return ML_ERROR_NONE;
655 : }
656 :
657 2698 : int Tensor::add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m,
658 : unsigned int incX, unsigned int incY,
659 : const Tensor alphas, unsigned int alpha_idx) {
660 2698 : return itensor_->add_i_partial(len, addr_idx, m, incX, incY, alphas,
661 2698 : alpha_idx);
662 : }
663 :
664 5007 : Tensor Tensor::add(Tensor const &m, float const alpha) const {
665 5007 : Tensor t("", getFormat(), getDataType());
666 10009 : return this->add(m, t, alpha);
667 5007 : }
668 :
669 13158 : Tensor &Tensor::add(Tensor const &m, Tensor &output, float const alpha) const {
670 13158 : NNTR_THROW_IF(m.getFormat() != this->getFormat(), std::invalid_argument)
671 0 : << "Tensor Format of " << getName() << ":"
672 0 : << ((bool)(this->getFormat()) ? "NHWC" : "NCHW") << " is not match. ("
673 0 : << ((bool)(m.getFormat()) ? "NHWC" : "NCHW") << ")";
674 :
675 13162 : NNTR_THROW_IF(!itensor_->getContiguous() || !m.getContiguous() ||
676 : !output.getContiguous(),
677 : std::invalid_argument)
678 4 : << getName() << " is not contiguous, cannot add";
679 13154 : itensor_->add(m, output, alpha);
680 13146 : return output;
681 : }
682 :
683 171 : int Tensor::subtract_i(float const &value) {
684 171 : this->subtract(value, *this);
685 171 : return ML_ERROR_NONE;
686 : }
687 :
688 3 : Tensor Tensor::subtract(float const &value) const {
689 3 : Tensor output("", getFormat(), getDataType());
690 6 : return subtract(value, output);
691 3 : }
692 :
693 174 : Tensor &Tensor::subtract(float const &value, Tensor &output) const {
694 174 : itensor_->subtract(value, output);
695 174 : return output;
696 : }
697 :
698 5698 : int Tensor::subtract_i(Tensor const &m) { return add_i(m, -1); }
699 :
700 5866 : Tensor Tensor::subtract(Tensor const &m) const {
701 5866 : Tensor t("", getFormat(), getDataType());
702 11727 : return this->subtract(m, t);
703 5866 : }
704 :
705 8127 : Tensor &Tensor::subtract(Tensor const &m, Tensor &output) const {
706 8127 : return add(m, output, -1);
707 : }
708 :
709 : /**
710 : * This is to sum the Tensor data according to the dim.batch().
711 : * Therefore the result has M(dim.batch(), 1, 1, 1) dimension.
712 : */
713 453 : Tensor Tensor::sum_by_batch() const {
714 453 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
715 0 : << getName() << " is not contiguous, cannot sum";
716 :
717 453 : Tensor output(batch(), 1, 1, 1, this->getFormat(), getDataType());
718 453 : itensor_->sum_by_batch(output);
719 453 : return output;
720 0 : }
721 :
722 60390 : Tensor Tensor::sum(unsigned int axis, float alpha) const {
723 60390 : Tensor output("", this->getFormat(), this->getDataType());
724 120777 : return sum(axis, output, alpha, 0);
725 60390 : }
726 :
727 126666 : Tensor &Tensor::sum(unsigned int axis, Tensor &output, float alpha,
728 : float beta) const {
729 126666 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
730 0 : << getName() << " is not contiguous, cannot sum";
731 :
732 126666 : itensor_->sum(axis, output, alpha, beta);
733 126663 : return output;
734 : }
735 :
736 52083 : Tensor Tensor::sum(const std::vector<unsigned int> &axes, float alpha) const {
737 52083 : Tensor output("", this->getFormat());
738 104165 : return sum(axes, output, alpha);
739 52083 : }
740 :
741 59867 : Tensor &Tensor::sum(const std::vector<unsigned int> &axes, Tensor &output,
742 : float alpha) const {
743 59867 : if (axes.empty())
744 1 : throw std::invalid_argument("empty axes given");
745 :
746 59866 : if (axes.size() == 1) {
747 776 : this->sum(axes[0], output, alpha);
748 : } else {
749 :
750 : /** club axes together */
751 59090 : Tensor new_reshaped = Tensor(getDim());
752 59090 : new_reshaped.copy(*this);
753 59090 : std::vector<unsigned int> continuous_order = {0, 3, 1, 2};
754 59090 : std::vector<unsigned int> new_axes = {axes[0]};
755 :
756 229295 : for (unsigned int i = 1; i < axes.size(); ++i) {
757 170205 : if (checkContinuous(axes[i - 1], axes[i])) {
758 169889 : new_reshaped.mergeAxis(axes[i - 1], axes[i]);
759 169889 : new_axes.back() = axes[i];
760 : } else {
761 316 : new_axes.push_back(axes[i]);
762 : }
763 : }
764 :
765 59090 : Tensor ret = new_reshaped.sum(new_axes[0]);
766 59093 : for (unsigned int i = 1; i < new_axes.size() - 1; ++i)
767 6 : ret = ret.sum(axes[i]);
768 59090 : ret.sum(new_axes.back(), output, alpha);
769 59090 : }
770 59866 : return output;
771 : }
772 :
773 0 : Tensor &Tensor::abs(Tensor &output) const {
774 0 : if (size() != output.size() || getDataType() != output.getDataType() ||
775 0 : getFormat() != output.getFormat())
776 : throw std::invalid_argument(
777 : "Error: Tensor::abs requires output tensor to be same size, data type "
778 0 : "and format as input tensor.");
779 0 : return itensor_->abs(output);
780 : }
781 :
782 5009 : Tensor Tensor::average(unsigned int axis) const {
783 5009 : Tensor output("", this->getFormat(), this->getDataType());
784 10016 : return average(axis, output);
785 5009 : }
786 :
787 5009 : Tensor &Tensor::average(unsigned int axis, Tensor &output) const {
788 5009 : if (axis >= TensorDim::MAXDIM)
789 : throw std::out_of_range(
790 2 : "negative axis or axis more then MAXDIM is invalid");
791 :
792 5007 : unsigned int axis_size = getDim()[axis];
793 5007 : if (axis_size == 1)
794 0 : output.copy(*this);
795 : else
796 5007 : this->sum(axis, output, 1.0 / ((float)axis_size));
797 :
798 5007 : return output;
799 : }
800 :
801 5 : Tensor Tensor::average(const std::vector<unsigned int> &axes) const {
802 5 : Tensor output("", this->getFormat(), this->getDataType());
803 9 : return average(axes, output);
804 5 : }
805 :
806 948 : Tensor &Tensor::average(const std::vector<unsigned int> &axes,
807 : Tensor &output) const {
808 948 : if (axes.empty())
809 0 : return this->average(output);
810 :
811 948 : TensorDim ret_shape(getTensorType());
812 :
813 2207 : for (const auto &idx : axes) {
814 1260 : if (idx >= TensorDim::MAXDIM) {
815 1 : throw std::out_of_range("axis more then MAXDIM is invalid");
816 : }
817 1259 : ret_shape.setTensorDim(idx, getDim().getTensorDim(idx));
818 : }
819 :
820 947 : return this->sum(axes, output, 1.0 / (float)ret_shape.getDataLen());
821 : }
822 :
823 5003 : Tensor Tensor::average() const {
824 5003 : Tensor output = *this;
825 : unsigned int axis = 0;
826 5003 : if (this->getFormat() == Tformat::NHWC) {
827 0 : output.reshape({1, getDim().getDataLen(), 1, 1, this->getTensorType()});
828 : axis = 1;
829 : } else {
830 5003 : output.reshape({1, 1, 1, getDim().getDataLen(), this->getTensorType()});
831 : axis = 3;
832 : }
833 10006 : return output.average(axis);
834 5003 : }
835 :
836 0 : Tensor &Tensor::average(Tensor &output) const {
837 0 : Tensor result = *this;
838 0 : result.reshape({1, 1, 1, getDim().getDataLen()});
839 0 : return result.average(3, output);
840 0 : }
841 :
842 22 : int Tensor::pow_i(float exponent) {
843 22 : pow(exponent, *this);
844 22 : return ML_ERROR_NONE;
845 : }
846 :
847 13 : Tensor Tensor::pow(float exponent) const {
848 13 : Tensor output("", getFormat(), getDataType());
849 26 : return pow(exponent, output);
850 13 : }
851 :
852 790 : Tensor &Tensor::pow(float exponent, Tensor &output) const {
853 790 : itensor_->pow(exponent, output);
854 790 : return output;
855 : }
856 :
857 0 : int Tensor::sqrt_i() {
858 0 : this->sqrt(*this);
859 0 : return ML_ERROR_NONE;
860 : }
861 :
862 0 : Tensor Tensor::sqrt() const {
863 0 : Tensor output("", getFormat(), getDataType());
864 0 : return sqrt(output);
865 0 : };
866 :
867 6 : Tensor &Tensor::sqrt(Tensor &output) const {
868 12 : if (size() != output.size() || getDataType() != output.getDataType() ||
869 6 : getFormat() != output.getFormat())
870 : throw std::invalid_argument(
871 : "Error: Tensor::sqrt requires output tensor to be same size, data type "
872 0 : "and format as input tensor.");
873 :
874 6 : itensor_->sqrt(output);
875 6 : return output;
876 : };
877 :
878 0 : Tensor Tensor::neg() const {
879 0 : Tensor output("", getFormat(), getDataType());
880 0 : return neg(output);
881 0 : };
882 :
883 0 : Tensor &Tensor::neg(Tensor &output) const {
884 0 : if (size() != output.size() || getDataType() != output.getDataType() ||
885 0 : getFormat() != output.getFormat())
886 : throw std::invalid_argument(
887 : "Error: Tensor::sqrt requires output tensor to be same size, data type "
888 0 : "and format as input tensor.");
889 :
890 0 : itensor_->multiply(-1, output);
891 0 : return output;
892 : };
893 :
894 0 : int Tensor::erf_i() {
895 0 : erf(*this);
896 0 : return ML_ERROR_NONE;
897 : }
898 :
899 1 : Tensor Tensor::erf() const {
900 1 : Tensor output("", getFormat(), getDataType());
901 2 : return erf(output);
902 1 : }
903 :
904 1 : Tensor &Tensor::erf(Tensor &output) const {
905 1 : itensor_->erf(output);
906 1 : return output;
907 : }
908 :
909 12 : void Tensor::sin(Tensor &out, float alpha) const {
910 12 : if (size() != out.size())
911 1 : throw std::invalid_argument("Error: Size of out of Tensor::sin must match");
912 :
913 11 : itensor_->sin(out, alpha);
914 11 : }
915 :
916 14 : void Tensor::cos(Tensor &out, float alpha) const {
917 14 : if (size() != out.size())
918 0 : throw std::invalid_argument("Error: Size of out of Tensor::cos must match");
919 :
920 14 : itensor_->cos(out, alpha);
921 14 : }
922 :
923 6 : void Tensor::tan(Tensor &output, float alpha) const {
924 12 : if (size() != output.size() || getDataType() != output.getDataType() ||
925 6 : getFormat() != output.getFormat())
926 : throw std::invalid_argument(
927 : "Error: Tensor::abs requires output tensor to be same size, data type "
928 0 : "and format as input tensor.");
929 :
930 6 : itensor_->tan(output, alpha);
931 6 : }
932 :
933 1 : void Tensor::inv_sqrt_i() { itensor_->inv_sqrt(*this); }
934 :
935 3 : Tensor Tensor::inv_sqrt(Tensor &out) const {
936 3 : itensor_->inv_sqrt(out);
937 3 : return out;
938 : }
939 :
940 5019 : LazyTensor Tensor::chain() const { return LazyTensor(*this); }
941 :
942 1931 : float Tensor::l2norm() const { return itensor_->l2norm(); }
943 :
944 167 : void Tensor::normalization_i() {
945 167 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
946 0 : << getName() << " is not contiguous, cannot do normalization.";
947 :
948 167 : const float min = minValue();
949 167 : const float max = maxValue();
950 :
951 167 : if (max == min) {
952 1 : Tensor tmp = *this;
953 1 : this->subtract_i(tmp);
954 1 : } else {
955 166 : this->subtract_i(min);
956 166 : this->divide_i(max - min);
957 : }
958 167 : }
959 :
960 0 : void Tensor::standardization_i() {
961 0 : Tensor mean_by_batch = this->sum_by_batch();
962 0 : mean_by_batch.divide_i(static_cast<float>(getDim().getFeatureLen()));
963 :
964 0 : this->subtract_i(mean_by_batch);
965 0 : Tensor std_dev_by_batch(batch(), 1, 1, 1, getFormat(), getDataType());
966 0 : std_dev_by_batch.setZero();
967 :
968 : /// @todo remove conditional statement
969 0 : if (getDataType() == ml::train::TensorDim::DataType::FP32) {
970 : float *std_dev = std_dev_by_batch.getData<float>();
971 :
972 0 : for (unsigned int k = 0; k < batch(); ++k) {
973 0 : Tensor sub_this = this->getBatchSlice(k, 1);
974 0 : std_dev[k] = sub_this.l2norm();
975 0 : }
976 0 : } else if (getDataType() == ml::train::TensorDim::DataType::FP16) {
977 : #ifdef ENABLE_FP16
978 : _FP16 *std_dev = std_dev_by_batch.getData<_FP16>();
979 :
980 : for (unsigned int k = 0; k < batch(); ++k) {
981 : Tensor sub_this = this->getBatchSlice(k, 1);
982 : std_dev[k] = static_cast<_FP16>(sub_this.l2norm());
983 : }
984 : #else
985 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
986 : #endif
987 : }
988 :
989 0 : std_dev_by_batch.divide_i(static_cast<float>(getDim().getFeatureLen()));
990 0 : this->divide_i(std_dev_by_batch);
991 0 : }
992 :
993 0 : void Tensor::dot(std::vector<Tensor *> input, std::vector<Tensor *> output,
994 : bool trans, bool trans_in, float beta) const {
995 0 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
996 0 : << getName() << " is not contiguous. Cannot dot product.";
997 :
998 0 : itensor_->dot(input, output, trans, trans_in, beta);
999 0 : }
1000 :
1001 2079 : Tensor Tensor::dot(Tensor const &input, bool trans, bool trans_in) const {
1002 2079 : Tensor output("", getFormat(), getDataType());
1003 2079 : dot(input, output, trans, trans_in);
1004 :
1005 2076 : return output;
1006 3 : }
1007 :
1008 : /**
1009 : * @note: This dot product flattens the fist 3 axis for the purpose of
1010 : * computation. So, while performing, these matrices are behaving as 2-D
1011 : * matrices. The dimensions are restored while returning back the tensor
1012 : * in case of trans is false.
1013 : */
1014 36830 : Tensor &Tensor::dot(Tensor const &input, Tensor &output, bool trans,
1015 : bool trans_in, float beta) const {
1016 36830 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1017 0 : << getName() << " is not contiguous. Cannot dot product.";
1018 :
1019 36830 : itensor_->dot(input, output, trans, trans_in, beta);
1020 36827 : return output;
1021 : }
1022 :
1023 1457 : Tensor &Tensor::dot_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv,
1024 : bool trans, bool trans_m, float beta) {
1025 : bool deriv_trans_m = true;
1026 : bool deriv_trans = false;
1027 : /** @todo handle all cases of trans and trans_m */
1028 1457 : if (!trans && trans_m) {
1029 : deriv_trans_m = false;
1030 : }
1031 :
1032 1457 : return output_deriv.dot(m, *this, deriv_trans, deriv_trans_m, beta);
1033 : }
1034 :
1035 : /**
1036 : * @brief compute the derivative wrt m in the m tensor
1037 : * @note The caller tensor must be the same tensor as the one which called the
1038 : * dot() product.
1039 : */
1040 6636 : Tensor &Tensor::dot_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv,
1041 : bool trans, bool trans_m, float beta) const {
1042 : bool deriv_trans_m = false;
1043 : bool deriv_trans = true;
1044 : /** @todo handle all cases of trans and trans_m */
1045 :
1046 6636 : if (!trans && trans_m) {
1047 0 : output_deriv.dot(*this, m_deriv, deriv_trans, deriv_trans_m, beta);
1048 0 : return m_deriv;
1049 : } else {
1050 6636 : return dot(output_deriv, m_deriv, deriv_trans, deriv_trans_m, beta);
1051 : }
1052 : }
1053 :
1054 759 : Tensor &Tensor::dotBatched(Tensor const &m, Tensor &result, bool trans,
1055 : bool trans_m, float beta) const {
1056 759 : if (!result.isAllocated())
1057 : throw std::invalid_argument(
1058 0 : "Output tensor must be preallocated for dotBatched operation");
1059 :
1060 759 : size_t lcm = std::lcm(batch(), m.batch());
1061 759 : size_t group_size = lcm / batch();
1062 759 : size_t m_group_size = lcm / m.batch();
1063 :
1064 761 : NNTR_THROW_IF(!((lcm == batch() || lcm == m.batch())), std::invalid_argument)
1065 : << "The batch size of the given twon tensors must be the same"
1066 : "or the bigger one should be a multiple of the smaller one";
1067 :
1068 4941 : for (unsigned int b = 0; b < lcm; b++) {
1069 : /** @todo try using transpose to speedup the operation */
1070 4184 : const Tensor this_b = this->getBatchSlice(b / group_size, 1);
1071 4184 : Tensor m_b = m.getBatchSlice(b / m_group_size, 1);
1072 4184 : Tensor result_b = result.getBatchSlice(b, 1);
1073 :
1074 4184 : this_b.dot(m_b, result_b, trans, trans_m, beta);
1075 4184 : }
1076 :
1077 757 : return result;
1078 : }
1079 :
1080 176 : Tensor &Tensor::dot_batched_deriv_wrt_1(Tensor const &m,
1081 : Tensor const &output_deriv, bool trans,
1082 : bool trans_m, float beta) {
1083 : bool deriv_trans_m = true;
1084 : bool deriv_trans = false;
1085 : /** @todo handle all cases of trans and trans_m */
1086 176 : if (!trans && trans_m) {
1087 : deriv_trans_m = false;
1088 : }
1089 :
1090 176 : return output_deriv.dotBatched(m, *this, deriv_trans, deriv_trans_m, beta);
1091 : }
1092 :
1093 176 : Tensor &Tensor::dot_batched_deriv_wrt_2(Tensor &m_deriv,
1094 : Tensor const &output_deriv, bool trans,
1095 : bool trans_m, float beta) const {
1096 : bool deriv_trans_m = false;
1097 : bool deriv_trans = true;
1098 : /** @todo handle all cases of trans and trans_m */
1099 :
1100 176 : if (!trans && trans_m) {
1101 88 : output_deriv.dotBatched(*this, m_deriv, deriv_trans, deriv_trans_m, beta);
1102 88 : return m_deriv;
1103 : } else {
1104 88 : return dotBatched(output_deriv, m_deriv, deriv_trans, deriv_trans_m, beta);
1105 : }
1106 : }
1107 :
1108 0 : Tensor Tensor::dropout_mask(float dropout) const {
1109 0 : Tensor output(getDim());
1110 0 : output.dropout_mask(dropout);
1111 0 : return output;
1112 0 : }
1113 :
1114 10 : void Tensor::dropout_mask(float dropout) {
1115 : /// @todo add unittest
1116 10 : NNTR_THROW_IF(dropout < 0 || dropout > 1, std::invalid_argument)
1117 : << "[Tensor::dropout_mask] Dropout rate should be between 0 and 1";
1118 :
1119 : // if the rate is zero, no change is needed
1120 : if (std::fpclassify(dropout) == FP_ZERO)
1121 : return;
1122 :
1123 10 : setRandUniform(0.0, 1.0);
1124 10 : itensor_->dropout_mask(dropout);
1125 : }
1126 :
1127 0 : void Tensor::filter_mask(const Tensor &mask_len, bool reverse) {
1128 : /// @todo add unittest
1129 0 : itensor_->filter_mask(mask_len, reverse);
1130 0 : }
1131 :
1132 3 : Tensor Tensor::zoneout_mask(float zoneout) {
1133 3 : Tensor output(getDim());
1134 3 : zoneout_mask(output, zoneout);
1135 3 : return output;
1136 0 : }
1137 :
1138 4 : void Tensor::zoneout_mask(Tensor &opposite, float zoneout) {
1139 5 : NNTR_THROW_IF(getDim() != opposite.getDim(), std::invalid_argument)
1140 : << "[Tensor::zoneout_mask] opposite dimension does not match";
1141 :
1142 3 : NNTR_THROW_IF(zoneout < 0 || zoneout > 1, std::invalid_argument)
1143 : << "[Tensor::zoneout_mask] Zoneout rate should be between 0 and 1";
1144 :
1145 : // if the rate is zero, no change is needed
1146 : if (std::fpclassify(zoneout) == FP_ZERO)
1147 : return;
1148 :
1149 3 : itensor_->zoneout_mask(opposite, zoneout);
1150 : }
1151 :
1152 8 : std::vector<Tensor> Tensor::split(unsigned num_size, int axis) {
1153 10 : NNTR_THROW_IF(num_size == 0, std::invalid_argument)
1154 : << "num size cannot be zero";
1155 :
1156 6 : if (axis == -1) {
1157 : axis = 3;
1158 : }
1159 :
1160 6 : NNTR_THROW_IF(!(0 <= axis && axis < 4), std::invalid_argument)
1161 : << "cannot split axis of axis: " << axis;
1162 :
1163 7 : NNTR_THROW_IF(getDim().getTensorDim(axis) % num_size != 0,
1164 : std::invalid_argument)
1165 : << "axis is not divisible by num_size, axis: " << axis
1166 : << " num size: " << num_size;
1167 :
1168 : std::vector<size_t> sizes;
1169 5 : sizes.resize(num_size);
1170 :
1171 5 : unsigned int sz = getDim().getTensorDim(axis) / num_size;
1172 : std::fill(sizes.begin(), sizes.end(), sz);
1173 :
1174 10 : return split(sizes, axis);
1175 5 : }
1176 :
1177 14 : std::vector<Tensor> Tensor::split(std::vector<size_t> sizes, int axis) {
1178 14 : NNTR_THROW_IF(sizes.size() == 0, std::invalid_argument)
1179 : << "num size cannot be zero";
1180 :
1181 14 : NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument)
1182 : << "cannot split axis of axis: " << axis;
1183 :
1184 15 : NNTR_THROW_IF(
1185 : std::any_of(sizes.begin(), sizes.end(), [](size_t sz) { return !sz; }),
1186 : std::invalid_argument)
1187 : << "among given sizes at least one of size is 0";
1188 :
1189 13 : return itensor_->split(sizes, axis);
1190 : }
1191 :
1192 5 : Tensor Tensor::concat(const std::vector<Tensor> &tensors, int axis,
1193 : Tensor &output) {
1194 5 : return itensor_->concat(tensors, axis, output);
1195 : }
1196 :
1197 11 : Tensor Tensor::cat(const std::vector<Tensor> &tensors, int axis) {
1198 11 : if (axis == -1) {
1199 : axis = 3;
1200 : }
1201 :
1202 : // Create an output tensor to store the concatenation result
1203 11 : TensorDim out_dim = Tensor::calculateConcatOutputDim(tensors, axis);
1204 5 : Tensor output = Tensor(out_dim);
1205 :
1206 10 : return output.concat(tensors, axis, output);
1207 5 : }
1208 :
1209 1 : Tensor Tensor::cat(const std::vector<Tensor> &tensors, int axis,
1210 : Tensor &output) {
1211 1 : if (axis == -1) {
1212 : axis = 3;
1213 : }
1214 :
1215 : // Check if the given output tensor dimension is valid
1216 1 : TensorDim out_dim = Tensor::calculateConcatOutputDim(tensors, axis);
1217 :
1218 1 : NNTR_THROW_IF(out_dim != output.getDim(), std::invalid_argument)
1219 2 : << "invalid output dim for concatenation " << output.getDim()
1220 : << "expected output dim " << out_dim;
1221 :
1222 0 : return output.concat(tensors, axis, output);
1223 : }
1224 :
1225 11 : void Tensor::print(std::ostream &out) const {
1226 11 : printInstance(out, this);
1227 11 : itensor_->print(out);
1228 11 : }
1229 :
1230 0 : void Tensor::putData() const { itensor_->putData(); }
1231 :
1232 138468 : void Tensor::setData(const std::shared_ptr<MemoryData> buf, size_t off,
1233 : bool init) {
1234 276936 : itensor_->setMemoryData(buf, off);
1235 :
1236 138468 : if (buf && init) {
1237 19084 : initialize();
1238 : }
1239 138468 : }
1240 :
1241 34174 : const std::shared_ptr<MemoryData> Tensor::getMemoryData() const {
1242 34174 : return itensor_->getMemoryData();
1243 : }
1244 :
1245 34173 : size_t Tensor::getOffset() const { return itensor_->getOffset(); }
1246 :
1247 97458 : void Tensor::copy(const Tensor &from) {
1248 : /// @todo enable copy to non-contiguous tensor
1249 97458 : if (!itensor_->getContiguous() || !from.getContiguous()) {
1250 3 : throw std::runtime_error("Cannot copy non-contiguous tensor");
1251 : }
1252 :
1253 279418 : if (from.size() != 0 && size() == from.size() &&
1254 266471 : scale_size() == from.scale_size() &&
1255 84508 : getDataType() == from.getDataType()) {
1256 : // if tensor size and data type match, copy data
1257 84508 : itensor_->copy(from);
1258 : } else {
1259 12947 : Tensor t = Tensor(from.getDim(), from.getData<char>());
1260 : swap(t, *this);
1261 12947 : }
1262 97455 : }
1263 :
1264 13734 : void Tensor::copyData(const Tensor &from) { itensor_->copyData(from); }
1265 :
1266 3511 : void Tensor::copy_with_stride(const Tensor &from) {
1267 3511 : if (itensor_->getDim() == from.getDim()) {
1268 : // If the tensor dim matches, copy the data. This also applies to
1269 : // uncontigous tensor.
1270 5 : itensor_->copy_with_stride(from, *this);
1271 : } else {
1272 : // replace with a new tensor that has the same data as the given tensor
1273 3506 : Tensor t = Tensor(from.getDim(), true);
1274 3506 : itensor_->copy_with_stride(from, t);
1275 : swap(t, *this);
1276 3506 : }
1277 3511 : }
1278 :
1279 230846 : Tensor Tensor::getBatchSlice(size_t offset, unsigned int size) const {
1280 230846 : TensorDim dim_ = getDim();
1281 230846 : dim_.batch(size);
1282 :
1283 461692 : return getSharedDataTensor(dim_, offset * this->getDim().getFeatureLen(),
1284 692536 : true, "");
1285 : }
1286 :
1287 107 : Tensor Tensor::getBatchSlice(const std::vector<unsigned int> &indices) const {
1288 :
1289 : // Validate tensor contiguity
1290 107 : NNTR_THROW_IF(!this->getContiguous(), std::runtime_error)
1291 : << "getBatchSlice requires contiguous tensor layer";
1292 :
1293 : // Validate indices vector is not empty
1294 108 : NNTR_THROW_IF(indices.empty(), std::invalid_argument)
1295 : << "Indices vector cannot be empty";
1296 :
1297 : // Validate indices
1298 106 : const unsigned batch_size = getDim().batch();
1299 217 : for (auto idx : indices) {
1300 113 : NNTR_THROW_IF(idx >= batch_size, std::out_of_range)
1301 : << "Batch index " << idx << " out of range [0," << batch_size << ")";
1302 : }
1303 :
1304 : // Get original tensor dimensions
1305 105 : const TensorDim &orig_dim = this->getDim();
1306 105 : const size_t element_size = orig_dim.getDataTypeSize();
1307 :
1308 : // Calculate single batch size in elements
1309 105 : const size_t single_batch_size = orig_dim.getFeatureLen();
1310 :
1311 : // Create output tensor with selected batches
1312 105 : TensorDim new_dim = orig_dim;
1313 105 : new_dim.batch(indices.size());
1314 105 : Tensor output(new_dim);
1315 :
1316 : // Validate output tensor size
1317 105 : const size_t output_bytes = output.bytes();
1318 105 : const size_t single_batch_bytes = single_batch_size * element_size;
1319 :
1320 : // Get raw data pointers
1321 : const unsigned char *src_data =
1322 : static_cast<const unsigned char *>(this->getData<unsigned char>());
1323 : unsigned char *dst_data =
1324 : static_cast<unsigned char *>(output.getData<void>());
1325 :
1326 : // Parallel copy using OpenMP
1327 105 : #pragma omp parallel for schedule(static)
1328 : for (int i = 0; i < static_cast<int>(indices.size()); ++i) {
1329 : const unsigned batch_idx = indices[i];
1330 :
1331 : // Calculate memory offsets
1332 : const size_t src_offset =
1333 : static_cast<size_t>(batch_idx) * single_batch_bytes;
1334 : const size_t dst_offset = static_cast<size_t>(i) * single_batch_bytes;
1335 :
1336 : // Bounds check for destination buffer
1337 : NNTR_THROW_IF(dst_offset + single_batch_bytes > output_bytes,
1338 : std::runtime_error)
1339 : << "Destination buffer overflow detected";
1340 :
1341 : // Perform memory copy
1342 : std::memcpy(dst_data + dst_offset, src_data + src_offset,
1343 : single_batch_bytes);
1344 : }
1345 :
1346 105 : return output;
1347 0 : }
1348 :
1349 7492 : Tensor Tensor::clone() const {
1350 14984 : Tensor output(getName(), getFormat(), getDataType());
1351 7492 : output.copy(*this);
1352 7492 : return output;
1353 0 : }
1354 :
1355 1086 : Tensor Tensor::clone(ml::train::TensorDim::DataType type) const {
1356 1086 : if (getDataType() == type)
1357 1078 : return clone();
1358 8 : TensorDim dim = getDim();
1359 : dim.setDataType(type);
1360 8 : Tensor output(dim, true);
1361 8 : output.copyData(*this);
1362 8 : output.setName(getName());
1363 : return output;
1364 8 : }
1365 :
1366 0 : void Tensor::readFSU() { itensor_->readFSU(); }
1367 :
1368 2133 : void Tensor::save(std::ostream &file) {
1369 2133 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1370 0 : << getName() << " is not contiguous, cannot save.";
1371 :
1372 2133 : itensor_->save(file);
1373 2133 : }
1374 :
1375 23197 : void Tensor::read(std::ifstream &file, size_t start_offset,
1376 : bool read_from_offset, int file_fd) {
1377 23197 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1378 0 : << getName() << " is not contiguous, cannot read.";
1379 :
1380 : // save the start_offset_info
1381 23197 : read_offset = start_offset;
1382 :
1383 : // Do not read now but save file_fd in tensor
1384 23197 : if (is_virtual) {
1385 0 : fd = file_fd;
1386 0 : return;
1387 : }
1388 :
1389 23197 : itensor_->read(file, start_offset, read_from_offset);
1390 : }
1391 :
1392 0 : void Tensor::read(ReadSource src, size_t start_offset, bool read_from_offset) {
1393 0 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1394 0 : << getName() << " is not contiguous, cannot read.";
1395 :
1396 0 : itensor_->read(src, start_offset, read_from_offset);
1397 0 : }
1398 :
1399 650 : std::vector<unsigned int> Tensor::argmax() const {
1400 650 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1401 0 : << getName() << " is not contiguous, cannot get argmax.";
1402 650 : return itensor_->argmax();
1403 : }
1404 :
1405 0 : std::vector<unsigned int> Tensor::argmin() const {
1406 0 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1407 0 : << getName() << " is not contiguous, cannot get argmin.";
1408 0 : return itensor_->argmin();
1409 : }
1410 :
1411 9 : std::pair<Tensor, Tensor> Tensor::topK(unsigned int k) const {
1412 :
1413 : // Create output tensor with modified W dimension
1414 9 : TensorDim output_dim = getDim();
1415 9 : TensorDim indices_dim = getDim();
1416 : Tformat format = output_dim.getFormat();
1417 :
1418 : // Validate k is within width dimension size
1419 9 : unsigned int width_size = output_dim.width();
1420 11 : NNTR_THROW_IF(k == 0 || k > width_size, std::invalid_argument)
1421 : << "k must be between 1 and width dimension size (" << width_size << ")";
1422 :
1423 : // Set new width dimension to k
1424 7 : output_dim.width(k);
1425 7 : indices_dim.width(k);
1426 : indices_dim.setDataType(Tdatatype::UINT32); // Set indices data type to UINT32
1427 :
1428 : // Create output tensor
1429 7 : Tensor output(output_dim);
1430 7 : output.allocate();
1431 7 : Tensor indices(indices_dim);
1432 7 : indices.allocate();
1433 :
1434 : // Prepare output buffer
1435 : void *output_data = output.getData<void>();
1436 : uint32_t *indices_data = indices.getData<uint32_t>();
1437 :
1438 : // Call TopK implementation
1439 7 : itensor_->topK(k, output_data, indices_data);
1440 :
1441 12 : return {output, indices};
1442 8 : }
1443 :
1444 7 : float Tensor::max_abs() const {
1445 7 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1446 0 : << getName() << " is not contiguous, cannot get max_abs.";
1447 7 : return itensor_->max_abs();
1448 : }
1449 :
1450 168 : float Tensor::maxValue() const { return itensor_->maxValue(); }
1451 :
1452 175 : float Tensor::minValue() const { return itensor_->minValue(); }
1453 :
1454 1107 : Tensor Tensor::transpose(const std::string &direction) const {
1455 1107 : Tensor output(getDim());
1456 1107 : transpose(direction, output);
1457 1107 : return output;
1458 0 : }
1459 :
1460 1114 : Tensor &Tensor::transpose(const std::string &direction, Tensor &output) const {
1461 1114 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1462 0 : << getName() << " is not contiguous. Cannot transpose.";
1463 :
1464 1114 : if (output.getData<char>() == getData<char>()) {
1465 0 : Tensor result = clone();
1466 0 : return result.transpose(direction, output);
1467 0 : }
1468 :
1469 1114 : itensor_->transpose(direction, output);
1470 :
1471 1113 : return output;
1472 : }
1473 :
1474 29183 : void Tensor::reshape(const TensorDim &d) { itensor_->reshape(d); }
1475 :
1476 3261 : void Tensor::fill(const Tensor &from, bool allocate) {
1477 3261 : if (allocate && this->empty()) {
1478 1 : this->copy(from);
1479 1 : return;
1480 : }
1481 :
1482 3260 : if (!from.getContiguous() || !getContiguous()) {
1483 : /// @todo enable this if needed
1484 : throw nntrainer::exception::not_supported(
1485 0 : "[Tensor::fill] non-contiguous tensors are not supported");
1486 : }
1487 :
1488 3260 : if (getDim() != from.getDim()) {
1489 2 : throw std::invalid_argument("[Tensor::fill] dimension must be the same");
1490 : }
1491 :
1492 3258 : if (getStrides() != from.getStrides()) {
1493 : /// @todo length does not represent buffer size, there should be way to
1494 : /// get the buffer size
1495 0 : throw std::invalid_argument("[Tensor::fill] buffer size must be the same");
1496 : }
1497 :
1498 3258 : copyData(from);
1499 : }
1500 :
1501 876169 : TensorDim Tensor::getDim() const { return itensor_->getDim(); }
1502 :
1503 15500 : TensorDim::TensorType Tensor::getTensorType() const {
1504 15500 : return itensor_->getTensorType();
1505 : };
1506 :
1507 1434 : Initializer Tensor::getInitializer() const {
1508 1434 : return itensor_->getInitializer();
1509 : }
1510 :
1511 421497 : TensorDim::Format Tensor::getFormat() const { return itensor_->getFormat(); }
1512 :
1513 963682 : Tdatatype Tensor::getDataType() const { return itensor_->getDataType(); }
1514 :
1515 20419 : void Tensor::updateBatch(unsigned int batch) { itensor_->updateBatch(batch); }
1516 :
1517 0 : void Tensor::updateDimension(TensorDim dimension) {
1518 0 : itensor_->updateDimension(dimension);
1519 0 : }
1520 :
1521 679340 : const bool Tensor::getContiguous() const noexcept {
1522 679340 : return itensor_->getContiguous();
1523 : }
1524 :
1525 : const std::array<size_t, TensorDim::MAXDIM>
1526 81265 : Tensor::getStrides() const noexcept {
1527 81265 : return itensor_->getStrides();
1528 : }
1529 :
1530 170205 : bool Tensor::checkContinuous(unsigned int np1, unsigned int np2) const {
1531 170205 : if (np1 > 3 || np2 > 3) {
1532 : throw std::invalid_argument(
1533 0 : "Error: Input value must be within the range of 0 to 3.");
1534 : }
1535 :
1536 170205 : if (getFormat() == Tformat::NCHW) {
1537 170205 : if (np1 + 1 == np2)
1538 : return true;
1539 : } else {
1540 0 : std::vector<unsigned int> continuous_order_nhwc = {0, 3, 1, 2};
1541 0 : if (continuous_order_nhwc[np2] == continuous_order_nhwc[np1] + 1)
1542 : return true;
1543 0 : }
1544 :
1545 : return false;
1546 : }
1547 :
1548 0 : void Tensor::setFileOffset(const size_t file_offset) {
1549 0 : itensor_->setFileOffset(file_offset);
1550 0 : }
1551 :
1552 0 : size_t Tensor::getFileOffset() const { return itensor_->getFileOffset(); }
1553 :
1554 1199 : void Tensor::setName(const std::string &name_) { itensor_->setName(name_); }
1555 :
1556 85744 : const std::string &Tensor::getName() const { return itensor_->getName(); }
1557 :
1558 61724067 : size_t Tensor::getIndex(unsigned int b, unsigned int c, unsigned int h,
1559 : unsigned int w) const noexcept {
1560 61724067 : return itensor_->getIndex(b, c, h, w);
1561 : }
1562 :
1563 19476946 : size_t Tensor::size() const { return itensor_->size(); }
1564 :
1565 645013 : bool Tensor::empty() const { return itensor_->empty(); }
1566 :
1567 129 : size_t Tensor::bytes() const { return itensor_->bytes(); }
1568 :
1569 38188 : size_t Tensor::getMemoryBytes() const { return itensor_->getMemoryBytes(); }
1570 :
1571 118275 : size_t Tensor::batch() const { return itensor_->batch(); }
1572 :
1573 56588 : size_t Tensor::channel() const { return itensor_->channel(); }
1574 :
1575 75988 : size_t Tensor::height() const { return itensor_->height(); }
1576 :
1577 234536 : size_t Tensor::width() const { return itensor_->width(); }
1578 :
1579 169020 : size_t Tensor::scale_size() const { return itensor_->scale_size(); }
1580 :
1581 5 : QScheme Tensor::q_scheme() const { return itensor_->q_scheme(); }
1582 :
1583 169889 : void Tensor::mergeAxis(unsigned int axis1, unsigned int axis2) {
1584 169889 : NNTR_THROW_IF(!getContiguous(), std::invalid_argument)
1585 0 : << getName() << " is not contiguous, cannot merge axis";
1586 :
1587 169889 : if (axis2 != axis1 + 1)
1588 0 : if (!checkContinuous(axis1, axis2))
1589 0 : throw std::invalid_argument("axis2 must be axis1 + 1 for merging.");
1590 :
1591 169889 : itensor_->mergeAxis(axis1, axis2);
1592 169889 : }
1593 :
1594 0 : void Tensor::createSharedDataTensor(const Tensor &src, Tensor &dest,
1595 : size_t offset) const {
1596 0 : itensor_->createSharedDataTensor(src.itensor_.get(), dest.itensor_.get(),
1597 : offset);
1598 0 : }
1599 :
1600 301856 : Tensor Tensor::getSharedDataTensor(const TensorDim dim_, size_t offset,
1601 : bool reset_stride,
1602 : const std::string &name_) const {
1603 301856 : Tensor ret = *this;
1604 301856 : itensor_->getSharedDataTensor(dim_, offset, reset_stride, name_,
1605 : ret.itensor_.get());
1606 301854 : return ret;
1607 2 : }
1608 :
1609 0 : void Tensor::activate() {
1610 :
1611 0 : NNTR_THROW_IF(!is_virtual, std::invalid_argument)
1612 : << "non-virtual tensor cannot call activate()";
1613 : #if defined(_WIN32)
1614 : NNTR_THROW_IF(true, std::invalid_argument)
1615 : << "[Error/VirtualTensor] virtual tensor is not supported on Windows";
1616 : #else
1617 :
1618 0 : auto file_offset = getFileOffset();
1619 0 : size_t off = (file_offset / 4096) * 4096;
1620 0 : size_t diff = file_offset - off;
1621 0 : size_t len = getMemoryBytes() + diff;
1622 :
1623 0 : mapped_ptr = mmap(NULL, len, PROT_READ, MAP_PRIVATE, this->fd, off);
1624 : #ifdef __ANDROID__
1625 : madvise(mapped_ptr, len, MADV_WILLNEED);
1626 : #endif
1627 0 : if (mapped_ptr == MAP_FAILED) {
1628 0 : std::cerr << "[activate] mmap failed: " << strerror(errno) << std::endl;
1629 : }
1630 0 : itensor_->activate((void *)&((uint8_t *)mapped_ptr)[diff]);
1631 : #endif
1632 0 : }
1633 :
1634 0 : void Tensor::deactivate() {
1635 :
1636 0 : NNTR_THROW_IF(!is_virtual, std::invalid_argument)
1637 : << "non-virtual tensor cannot call deactivate()";
1638 : #if defined(_WIN32)
1639 : NNTR_THROW_IF(true, std::invalid_argument)
1640 : << "[Error/VirtualTensor] virtual tensor is not supported on Windows";
1641 : #else
1642 :
1643 0 : if (mapped_ptr == nullptr) {
1644 0 : return;
1645 : };
1646 :
1647 0 : auto file_offset = getFileOffset();
1648 : size_t off = (file_offset / 4096) * 4096;
1649 : size_t diff = file_offset - off;
1650 0 : size_t len = getMemoryBytes() + diff;
1651 :
1652 0 : auto ret_munmap = munmap((void *)mapped_ptr, len);
1653 : const size_t error_buflen = 100;
1654 : char error_buf[error_buflen];
1655 0 : NNTR_THROW_IF(ret_munmap == -1, std::runtime_error)
1656 : << "[deactivate] munmap failed: "
1657 0 : << SAFE_STRERROR(errno, error_buf, error_buflen);
1658 :
1659 0 : mapped_ptr = nullptr;
1660 0 : itensor_->deactivate();
1661 : #endif
1662 : }
1663 :
1664 46136 : void Tensor::setTensorVar(TensorDim d, void *buf, size_t offset) {
1665 46136 : itensor_->setTensorVar(d, buf, offset);
1666 46136 : }
1667 :
1668 12 : TensorDim Tensor::calculateConcatOutputDim(const std::vector<Tensor> &tensors,
1669 : int axis) {
1670 : // Check axis, in which the tensors are concatenated, is valid.
1671 12 : NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument)
1672 : << "cannot concatenate tensors along an axis: " << axis;
1673 :
1674 : // Check if the number of input tensors is valid.
1675 14 : NNTR_THROW_IF(tensors.size() <= 1, std::invalid_argument)
1676 : << "received an invalid tensor vector. size must be greater than 1.";
1677 :
1678 10 : auto out_dim = tensors.front().getDim();
1679 :
1680 : // Check if all tensor data types are the same.
1681 30 : for (auto &t : tensors) {
1682 22 : NNTR_THROW_IF(t.getDataType() != out_dim.getDataType(),
1683 : std::invalid_argument)
1684 : << "cannot concatenate tensors with different data types.";
1685 : }
1686 :
1687 : // Compute the dimensions of an output tensor.
1688 9 : out_dim.setTensorDim(axis, 1);
1689 31 : NNTR_THROW_IF(!std::all_of(tensors.begin(), tensors.end(),
1690 : [&out_dim, axis](const Tensor &t) {
1691 : auto cur_dim = t.getDim();
1692 : cur_dim.setTensorDim(axis, 1);
1693 : return out_dim == cur_dim;
1694 : }),
1695 : std::invalid_argument)
1696 : << " all tensor must have the same dimension except for the axis, out_dim: "
1697 : << out_dim << " axis : " << axis;
1698 :
1699 6 : auto axis_dim = std::accumulate(tensors.begin(), tensors.end(), 0u,
1700 13 : [axis](unsigned cur, const Tensor &t) {
1701 13 : return cur += t.getDim().getTensorDim(axis);
1702 : });
1703 :
1704 6 : out_dim.setTensorDim(axis, axis_dim);
1705 6 : return out_dim;
1706 : }
1707 :
1708 10 : std::ostream &operator<<(std::ostream &out, Tensor const &input) {
1709 10 : input.print(out);
1710 10 : return out;
1711 : }
1712 :
1713 : } // namespace nntrainer
|