Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * @file int4_tensor.cpp
4 : * @date 23 January 2025
5 : * @brief This is Int4QTensor class for quantized 4-bit integer calculation
6 : * @see https://github.com/nnstreamer/nntrainer
7 : * @author Donghyeon Jeong <dhyeon.jeong@samsung.com>
8 : * @bug No known bugs except for NYI items
9 : */
10 :
11 : #include <iomanip>
12 : #include <iostream>
13 :
14 : #include <cpu_backend.h>
15 : #include <int4_tensor.h>
16 : #include <tensor.h>
17 :
18 : namespace nntrainer {
19 :
20 : size_t Int4QTensor::group_size = 32;
21 :
22 0 : Int4QTensor::Int4QTensor(std::string name_, Tformat fm, QScheme qscheme_,
23 0 : size_t g_size) :
24 0 : TensorBase(name_, fm, Tdatatype::QINT4), qscheme(qscheme_) {
25 0 : group_size = g_size;
26 0 : }
27 :
28 6 : Int4QTensor::Int4QTensor(const TensorDim &d, bool alloc_now, Initializer init,
29 6 : std::string name, QScheme qscheme_, size_t g_size) :
30 6 : TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
31 6 : group_size = g_size;
32 6 : if (alloc_now)
33 6 : allocate();
34 6 : }
35 :
36 2 : Int4QTensor::Int4QTensor(const TensorDim &d, const void *buf, QScheme qscheme_,
37 2 : size_t g_size) :
38 2 : Int4QTensor(d, true, Initializer::NONE, "", qscheme_, g_size) {
39 2 : if (d.getDataLen() != 0) {
40 2 : if (buf != nullptr)
41 0 : copy(buf);
42 : }
43 2 : }
44 :
45 5 : Int4QTensor::Int4QTensor(
46 : std::vector<std::vector<std::vector<std::vector<int8_t>>>> const &d,
47 : std::vector<float> const &scales, Tformat fm, QScheme qscheme_,
48 5 : size_t g_size) :
49 5 : qscheme(qscheme_) {
50 5 : group_size = g_size;
51 5 : if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
52 : throw std::out_of_range(
53 0 : "[Tensor] trying to initialize Int4QTensor from empty vector");
54 : }
55 :
56 6 : NNTR_THROW_IF(scales.size() != scale_size(), std::invalid_argument)
57 : << "invalid scale factor size " << scales.size();
58 :
59 4 : dim.setTensorDim(0, d.size());
60 4 : if (fm == Tformat::NCHW) {
61 4 : dim.setTensorDim(1, d[0].size());
62 4 : dim.setTensorDim(2, d[0][0].size());
63 4 : dim.setTensorDim(3, d[0][0][0].size());
64 : } else {
65 0 : dim.setTensorDim(2, d[0].size());
66 0 : dim.setTensorDim(3, d[0][0].size());
67 0 : dim.setTensorDim(1, d[0][0][0].size());
68 : }
69 :
70 : dim.setTensorType({fm, Tdatatype::QINT4});
71 :
72 4 : strides = dim.computeStrides();
73 4 : contiguous = true;
74 4 : initializer = Initializer::NONE;
75 4 : qscheme = qscheme_;
76 :
77 : /// @note sizeof(float) * scale_size() assumes scale factors are in
78 : /// full-precision fp.
79 : MemoryData *mem_data =
80 4 : new MemoryData((void *)(new int8_t[(dim.getDataLen() + 1) / 2 +
81 104 : sizeof(float) * scale_size()]()));
82 5 : data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
83 4 : delete[] mem_data->getAddr<int8_t>();
84 4 : delete mem_data;
85 : });
86 :
87 4 : offset = 0;
88 :
89 4 : if (fm == Tformat::NCHW) {
90 10 : for (unsigned int i = 0; i < batch(); ++i)
91 17 : for (unsigned int j = 0; j < channel(); ++j)
92 48 : for (unsigned int k = 0; k < height(); ++k)
93 195 : for (unsigned int l = 0; l < width(); ++l)
94 158 : this->setValue(i, j, k, l, d[i][j][k][l]);
95 : } else {
96 0 : for (unsigned int i = 0; i < batch(); ++i)
97 0 : for (unsigned int j = 0; j < height(); ++j)
98 0 : for (unsigned int k = 0; k < width(); ++k)
99 0 : for (unsigned int l = 0; l < channel(); ++l)
100 0 : this->setValue(i, l, j, k, d[i][j][k][l]);
101 : }
102 :
103 : // copy scale factors
104 4 : scopy(scale_size(), scales.data(), 1, (float *)getScale(), 1);
105 5 : }
106 :
107 2 : bool Int4QTensor::operator==(const Int4QTensor &rhs) const {
108 2 : if (qscheme != rhs.qscheme)
109 : return false;
110 :
111 : // compare quantized data
112 2 : const int8_t *_data = (int8_t *)getData();
113 2 : const int8_t *_rdata = (int8_t *)rhs.getData();
114 14 : for (size_t i = 0; i < (size() + 1) / 2; ++i) {
115 13 : if (_data[i] != _rdata[i])
116 : return false;
117 : }
118 :
119 : // compare scale factors
120 1 : const float *_scales = (float *)getScale();
121 1 : const float *_rscales = (float *)rhs.getScale();
122 2 : for (size_t i = 0; i < scale_size(); ++i) {
123 1 : if (std::fabs(_scales[i] - _rscales[i]) > 1e-5)
124 : return false;
125 : }
126 :
127 : return true;
128 : }
129 :
130 6 : void Int4QTensor::allocate() {
131 6 : if (empty() || data)
132 : return;
133 :
134 6 : if (src_tensor) {
135 : /// allocate data based on the source tensor
136 0 : allocateSrcTensor();
137 : /** as this memory is shared, do NOT initialize */
138 : } else {
139 : /// allocate new memory for the tensor data
140 : MemoryData *mem_data;
141 :
142 : /// quantized 4-bit is stored as a 8-bit signed integer (int4x2)
143 : mem_data =
144 6 : new MemoryData((void *)(new int8_t[(dim.getDataLen() + 1) / 2 +
145 2748 : sizeof(float) * scale_size()]{}));
146 6 : data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
147 6 : delete[] mem_data->template getAddr<int8_t>();
148 6 : delete mem_data;
149 : });
150 :
151 6 : offset = 0;
152 6 : initialize();
153 : }
154 : }
155 :
156 0 : void Int4QTensor::deallocate() {
157 : data = nullptr;
158 0 : offset = 0;
159 0 : }
160 :
161 496 : void *Int4QTensor::getData() const {
162 496 : if (!data)
163 : return nullptr;
164 :
165 : data->validate();
166 496 : return data->getAddr<int8_t>() + offset;
167 : }
168 :
169 0 : void *Int4QTensor::getData(size_t idx) const {
170 0 : if (!data)
171 : return nullptr;
172 :
173 : data->validate();
174 0 : return data->getAddr<int8_t>() + offset + (idx / 2);
175 : }
176 :
177 7 : void *Int4QTensor::getScale() const {
178 7 : if (!data)
179 : return nullptr;
180 :
181 : data->validate();
182 7 : return ((int8_t *)getData()) + (size() + 1) / 2;
183 : }
184 :
185 0 : void *Int4QTensor::getScale(size_t idx) const {
186 0 : NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
187 : << "Tensor::getScale() index is not valid";
188 :
189 0 : if (!data)
190 : return nullptr;
191 :
192 : data->validate();
193 0 : return ((float *)getScale()) + idx;
194 : }
195 :
196 0 : void *Int4QTensor::getAddress(unsigned int i) {
197 0 : size_t index = getIndex(batch(), channel(), height(), width());
198 0 : if (i > index) {
199 : return nullptr;
200 : }
201 0 : return &((int8_t *)getData())[i / 2];
202 : }
203 :
204 0 : const void *Int4QTensor::getAddress(unsigned int i) const {
205 0 : size_t index = getIndex(batch(), channel(), height(), width());
206 0 : if (i > index) {
207 : return nullptr;
208 : }
209 0 : return &((int8_t *)getData())[i / 2];
210 : }
211 :
212 125 : const int8_t Int4QTensor::getValue(unsigned int i) const {
213 125 : int8_t value = ((int8_t *)getData())[i / 2];
214 125 : return (i % 2 == 0) ? value >> 4 : ((int8_t)(value << 4) >> 4);
215 : }
216 :
217 2 : int8_t Int4QTensor::getValue(unsigned int i) {
218 2 : int8_t value = ((int8_t *)getData())[i / 2];
219 2 : return (i % 2 == 0) ? value >> 4 : ((int8_t)(value << 4) >> 4);
220 : }
221 :
222 0 : const int8_t Int4QTensor::getValue(unsigned int b, unsigned int c,
223 : unsigned int h, unsigned int w) const {
224 0 : return getValue(getIndex(b, c, h, w));
225 : }
226 :
227 0 : int8_t Int4QTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
228 : unsigned int w) {
229 0 : return getValue(getIndex(b, c, h, w));
230 : }
231 :
232 : /// @todo this func should be template function
233 5 : void Int4QTensor::setValue(float value) {
234 6 : NNTR_THROW_IF(value < -8 || value > 7, std::out_of_range)
235 : << "Value must be in range [-8, 7]. Input value: " << value;
236 :
237 4 : int8_t val = static_cast<int8_t>(value);
238 4 : int8_t *data = (int8_t *)getData();
239 4 : std::fill(data, data + (size() + 1) / 2, (val << 4) | (val & 0x0f));
240 4 : }
241 :
242 : /// @todo this func should be template function
243 2 : void Int4QTensor::addValue(unsigned int b, unsigned int c, unsigned int h,
244 : unsigned int w, float value, float beta) {
245 2 : auto const &idx = getIndex(b, c, h, w);
246 2 : float output = getValue(idx);
247 2 : output *= beta;
248 2 : output += value;
249 :
250 : // if result value is out of range, clamp to max/min value
251 2 : int8_t val = static_cast<int8_t>(std::trunc(std::clamp((int)output, -8, 7)));
252 :
253 : // encode result value to int8 data
254 2 : ((int8_t *)getData())[idx / 2] =
255 2 : (idx % 2 == 0) ? (val << 4) | (((int8_t *)getData())[idx / 2] & 0x0f)
256 0 : : (((int8_t *)getData())[idx / 2] & 0xf0) | (val & 0x0f);
257 2 : }
258 :
259 : /// @todo this func should be template function
260 158 : void Int4QTensor::setValue(unsigned int b, unsigned int c, unsigned int h,
261 : unsigned int w, float value) {
262 158 : NNTR_THROW_IF(value < -8 || value > 7, std::out_of_range)
263 : << "Value must be in range [-8, 7]. Input value: " << value;
264 :
265 158 : auto const &idx = getIndex(b, c, h, w);
266 158 : int8_t val = static_cast<int8_t>(value);
267 :
268 158 : ((int8_t *)getData())[idx / 2] =
269 158 : (idx % 2 == 0) ? (val << 4) | (((int8_t *)getData())[idx / 2] & 0x0f)
270 78 : : (((int8_t *)getData())[idx / 2] & 0xf0) | (val & 0x0f);
271 158 : }
272 :
273 1 : void Int4QTensor::setZero() {
274 : /// @todo accelerate with SIMD
275 1 : setValue(0);
276 1 : }
277 :
278 9 : void Int4QTensor::initialize() {
279 9 : if (empty() || !isAllocated())
280 : return;
281 :
282 : /// @note Sampling from the normal/uniform distribution is invalid
283 9 : switch (initializer) {
284 1 : case Initializer::ZEROS:
285 1 : setZero();
286 1 : break;
287 3 : case Initializer::ONES:
288 3 : setValue(1.0f);
289 3 : break;
290 : case Initializer::NONE:
291 : break;
292 1 : default:
293 : throw std::invalid_argument(
294 1 : "Initializer other than zero and one is not valid for " +
295 3 : getStringDataType());
296 : break;
297 : }
298 :
299 8 : putData();
300 : }
301 :
302 3 : void Int4QTensor::initialize(Initializer init) {
303 3 : initializer = init;
304 3 : initialize();
305 2 : }
306 :
307 0 : void Int4QTensor::copy(const Tensor &from) {
308 0 : reshape(from.getDim());
309 0 : copy(from.getData());
310 0 : }
311 :
312 0 : void Int4QTensor::copyData(const Tensor &from) {
313 0 : NNTR_THROW_IF(!contiguous, std::invalid_argument)
314 : << getName() << " is not contiguous, cannot copy.";
315 :
316 0 : NNTR_THROW_IF(size() != from.size(), std::invalid_argument)
317 : << "Size of the tensor to copy must match.";
318 :
319 : /// @todo support copy from float32 & float16 to int8 data
320 0 : switch (from.getDataType()) {
321 : case ml::train::TensorDim::DataType::QINT4:
322 0 : copy(from.getData());
323 : break;
324 0 : default:
325 0 : throw std::invalid_argument("Error: Unsupported data type");
326 : break;
327 : }
328 0 : }
329 :
330 0 : void Int4QTensor::copy_with_stride(const Tensor &input, Tensor &output) {
331 0 : for (unsigned int b = 0; b < output.batch(); ++b) {
332 0 : for (unsigned int c = 0; c < output.channel(); ++c) {
333 0 : for (unsigned int h = 0; h < output.height(); ++h) {
334 0 : for (unsigned int w = 0; w < output.width(); ++w) {
335 0 : output.setValue(b, c, h, w, input.getValue<int8_t>(b, c, h, w));
336 : }
337 : }
338 : }
339 : }
340 0 : }
341 :
342 0 : void Int4QTensor::save(std::ostream &file) {
343 : /// @note Save quantization information
344 0 : save_quantization_info(file);
345 :
346 0 : std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
347 :
348 0 : NNTR_THROW_IF(sz < 0, std::invalid_argument)
349 0 : << "save size: " << getMemoryBytes()
350 : << " is too big. It cannot be represented by std::streamsize";
351 :
352 0 : checkedWrite(file, (char *)getData(), sz,
353 : "[Int4QTensor::save] operation failed");
354 0 : putData();
355 0 : }
356 :
357 0 : void Int4QTensor::read(std::ifstream &file, size_t start_offset,
358 : bool read_from_offset) {
359 0 : if (start_offset == std::numeric_limits<size_t>::max()) {
360 0 : start_offset = file_offset;
361 : }
362 0 : read_quantization_info(file, start_offset, read_from_offset);
363 :
364 0 : std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
365 :
366 0 : NNTR_THROW_IF(sz < 0, std::invalid_argument)
367 0 : << "read size: " << getMemoryBytes()
368 : << " is too big. It cannot be represented by std::streamsize";
369 :
370 0 : if (read_from_offset) {
371 0 : start_offset += sizeof(uint16_t);
372 : }
373 :
374 0 : checkedRead(file, (char *)getData(), sz,
375 : "[Int4QTensor::read] operation failed", start_offset,
376 : read_from_offset);
377 0 : putData();
378 0 : }
379 :
380 0 : void Int4QTensor::read(ReadSource src, size_t start_offset,
381 : bool read_from_offset) {
382 0 : if (start_offset == std::numeric_limits<size_t>::max()) {
383 0 : start_offset = file_offset;
384 : }
385 0 : read_quantization_info(src, start_offset, read_from_offset);
386 :
387 0 : std::streamsize sz = static_cast<std::streamsize>(getMemoryBytes());
388 :
389 0 : NNTR_THROW_IF(sz < 0, std::invalid_argument)
390 0 : << "read size: " << getMemoryBytes()
391 : << " is too big. It cannot be represented by std::streamsize";
392 :
393 0 : if (read_from_offset) {
394 0 : start_offset += sizeof(uint16_t);
395 : }
396 :
397 0 : checkedRead(src, (char *)getData(), sz,
398 : "[Int4QTensor::read] operation failed", start_offset,
399 : read_from_offset);
400 0 : putData();
401 0 : }
402 :
403 1 : std::vector<unsigned int> Int4QTensor::argmax() const {
404 : std::vector<unsigned int> result;
405 1 : const int8_t *data = (int8_t *)getData();
406 : size_t batch_size = batch();
407 1 : size_t feature_len = dim.getFeatureLen();
408 1 : result.resize(batch_size);
409 :
410 4 : for (unsigned int b = 0; b < batch_size; ++b) {
411 : int8_t curr_val, max_val = -8;
412 : unsigned int max_element_idx = 0;
413 30 : for (unsigned int idx = 0; idx < feature_len; ++idx) {
414 27 : curr_val = getValue(idx + b * feature_len);
415 :
416 27 : if (curr_val > max_val) {
417 : max_val = curr_val;
418 : max_element_idx = idx;
419 : }
420 : }
421 3 : result[b] = max_element_idx;
422 : }
423 1 : return result;
424 0 : }
425 :
426 0 : std::vector<unsigned int> Int4QTensor::argmin() const {
427 : std::vector<unsigned int> result;
428 0 : const int8_t *data = (int8_t *)getData();
429 : size_t batch_size = batch();
430 0 : size_t feature_len = dim.getFeatureLen();
431 0 : result.resize(batch_size);
432 :
433 0 : for (unsigned int b = 0; b < batch_size; ++b) {
434 : int8_t curr_val, min_val = 7;
435 : unsigned int min_element_idx = 0;
436 0 : for (unsigned int idx = 0; idx < feature_len; ++idx) {
437 0 : curr_val = getValue(idx + b * feature_len);
438 :
439 0 : if (curr_val < min_val) {
440 : min_val = curr_val;
441 : min_element_idx = idx;
442 : }
443 : }
444 0 : result[b] = min_element_idx;
445 : }
446 0 : return result;
447 0 : }
448 :
449 1 : float Int4QTensor::max_abs() const {
450 : int8_t abs_max_val = 0;
451 : int8_t curr_val;
452 23 : for (unsigned int idx = 0; idx < size(); ++idx) {
453 23 : curr_val = std::abs(getValue(idx));
454 23 : abs_max_val = (curr_val > abs_max_val) ? curr_val : abs_max_val;
455 :
456 : // Terminate search when abs_max_val is an Int4 absolute max value 8
457 23 : if (abs_max_val == 8)
458 : return abs_max_val;
459 : }
460 :
461 0 : return abs_max_val;
462 : }
463 :
464 0 : float Int4QTensor::maxValue() const {
465 : int8_t max_val = -8;
466 : int8_t curr_val;
467 0 : for (unsigned int idx = 0; idx < size(); ++idx) {
468 0 : curr_val = getValue(idx);
469 0 : max_val = (curr_val > max_val) ? curr_val : max_val;
470 :
471 : // Terminate search when max_val is an Int4 max value 7
472 0 : if (max_val == 7)
473 : return max_val;
474 : }
475 :
476 0 : return max_val;
477 : }
478 :
479 3 : float Int4QTensor::minValue() const {
480 : int8_t min_val = 7;
481 : int8_t curr_val;
482 77 : for (unsigned int idx = 0; idx < size(); ++idx) {
483 75 : curr_val = getValue(idx);
484 75 : min_val = (curr_val < min_val) ? curr_val : min_val;
485 :
486 : // Terminate search when min_val is an Int4 min value -8
487 75 : if (min_val == -8)
488 : return min_val;
489 : }
490 :
491 2 : return min_val;
492 : }
493 :
494 0 : void Int4QTensor::print(std::ostream &out) const {
495 0 : const int8_t *data = (int8_t *)getData();
496 0 : unsigned int len = size();
497 0 : out << "data addr: " << reinterpret_cast<const float *>(data) << '\n';
498 0 : out << dim;
499 :
500 0 : if (len > 100) {
501 0 : out << '[' << (int)getValue(0) << ' ' << (int)getValue(1) << ' '
502 0 : << (int)getValue(2) << " ... " << (int)getValue(len - 3) << ' '
503 0 : << (int)getValue(len - 2) << ' ' << (int)getValue(len - 1) << ']'
504 : << std::endl;
505 0 : return;
506 : }
507 :
508 0 : std::ios init(NULL);
509 0 : init.copyfmt(out);
510 0 : if (getFormat() == Tformat::NCHW) {
511 0 : for (unsigned int k = 0; k < batch(); k++) {
512 0 : for (unsigned int l = 0; l < channel(); l++) {
513 0 : for (unsigned int i = 0; i < height(); i++) {
514 0 : for (unsigned int j = 0; j < width(); j++) {
515 0 : out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
516 : }
517 : out << std::endl;
518 : }
519 : out << std::endl;
520 : }
521 : out << "-------" << std::endl;
522 : }
523 : } else {
524 0 : for (unsigned int k = 0; k < batch(); k++) {
525 0 : for (unsigned int i = 0; i < height(); i++) {
526 0 : for (unsigned int j = 0; j < width(); j++) {
527 0 : for (unsigned int l = 0; l < channel(); l++) {
528 0 : out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
529 : }
530 : out << std::endl;
531 : }
532 : out << std::endl;
533 : }
534 : out << "-------" << std::endl;
535 : }
536 0 : out.copyfmt(init);
537 : }
538 :
539 : /// print quantization information
540 0 : const float *q_scales = (float *)getScale();
541 :
542 0 : if (scale_size() > 50) {
543 0 : out << "Scale factors: [" << q_scales[0] << ' ' << q_scales[1] << ' '
544 0 : << q_scales[2] << " ... " << q_scales[len - 3] << ' '
545 0 : << q_scales[len - 2] << ' ' << q_scales[len - 1] << ']' << std::endl;
546 : return;
547 : }
548 :
549 0 : out << "Scale factors: ";
550 0 : for (unsigned i = 0; i < scale_size(); ++i) {
551 0 : out << q_scales[i] << " ";
552 : }
553 : out << std::endl;
554 : }
555 :
556 0 : size_t Int4QTensor::getMemoryBytes() const {
557 0 : return ((size() + 1) / 2) * dim.getDataTypeSize() +
558 0 : scale_size() * sizeof(uint16_t);
559 : }
560 :
561 21 : size_t Int4QTensor::scale_size() const {
562 21 : switch (qscheme) {
563 : case QScheme::PER_TENSOR_AFFINE:
564 : return 1;
565 : break;
566 3 : case QScheme::PER_CHANNEL_AFFINE:
567 3 : return height() * width() / group_size;
568 : break;
569 : default:
570 : break;
571 : }
572 0 : return 0;
573 : }
574 :
575 0 : QScheme Int4QTensor::q_scheme() const { return qscheme; }
576 :
577 0 : void Int4QTensor::copy(const void *buf) {
578 0 : NNTR_THROW_IF(!contiguous, std::invalid_argument)
579 : << getName() << " is not contiguous, cannot copy.";
580 :
581 0 : if (buf == getData()) {
582 : return;
583 : }
584 : // copy tensor data
585 0 : scopy((size() + 1) / 2, (int8_t *)buf, 1, (int8_t *)getData(), 1);
586 :
587 : // copy scale factor data
588 0 : float *scales = (float *)(((int8_t *)buf) + (size() + 1) / 2);
589 0 : scopy(scale_size(), scales, 1, (float *)getScale(), 1);
590 : }
591 :
592 0 : void Int4QTensor::save_quantization_info(std::ostream &file) {
593 0 : checkedWrite(file, (char *)&qscheme, sizeof(uint16_t),
594 : "[Int4QTensor::save] failed to write quantization information");
595 0 : }
596 :
597 0 : void Int4QTensor::read_quantization_info(std::ifstream &file,
598 : size_t start_offset,
599 : bool read_from_offset) {
600 0 : checkedRead(file, (char *)&qscheme, sizeof(uint16_t),
601 : "[Int4QTensor::read] failed to read quantization information",
602 : start_offset, read_from_offset);
603 0 : group_size = 32; /// Remove me
604 0 : }
605 :
606 0 : void Int4QTensor::read_quantization_info(ReadSource src, size_t start_offset,
607 : bool read_from_offset) {
608 0 : checkedRead(src, (char *)&qscheme, sizeof(uint16_t),
609 : "[Int4QTensor::read] failed to read quantization information",
610 : start_offset, read_from_offset);
611 0 : group_size = 32; /// Remove me
612 0 : }
613 :
614 0 : size_t Int4QTensor::getGroupSize() { return group_size; }
615 :
616 : } // namespace nntrainer
|