Line data Source code
1 : /**
2 : * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
3 : *
4 : * Licensed under the Apache License, Version 2.0 (the "License");
5 : * you may not use this file except in compliance with the License.
6 : * You may obtain a copy of the License at
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : * Unless required by applicable law or agreed to in writing, software
9 : * distributed under the License is distributed on an "AS IS" BASIS,
10 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 : * See the License for the specific language governing permissions and
12 : * limitations under the License.
13 : *
14 : *
15 : * @file neuralnet.cpp
16 : * @date 04 December 2019
17 : * @brief This is Neural Network Class
18 : * @see https://github.com/nnstreamer/nntrainer
19 : * @author Jijoong Moon <jijoong.moon@samsung.com>
20 : * @bug No known bugs except for NYI items
21 : *
22 : */
23 :
24 : #include "layer_context.h"
25 : #include "model.h"
26 : #include "model_common_properties.h"
27 : #include <cmath>
28 : #include <cstring>
29 : #include <fstream>
30 : #include <future>
31 : #include <iomanip>
32 : #include <sstream>
33 :
34 : #include <activation_realizer.h>
35 : #include <adamw.h>
36 : #include <common_properties.h>
37 : #include <databuffer.h>
38 : #include <flatten_realizer.h>
39 : #include <ini_interpreter.h>
40 : #include <ini_wrapper.h>
41 : #include <input_realizer.h>
42 : #include <model_loader.h>
43 : #include <multiout_realizer.h>
44 : #include <neuralnet.h>
45 : #include <nntrainer_error.h>
46 : #include <nntrainer_log.h>
47 : #include <node_exporter.h>
48 : #include <optimizer_context.h>
49 : #include <optional>
50 : #include <previous_input_realizer.h>
51 : #include <profiler.h>
52 : #include <recurrent_realizer.h>
53 : #include <remap_realizer.h>
54 : #include <slice_realizer.h>
55 : #include <util_func.h>
56 :
57 : #ifdef ENABLE_TFLITE_INTERPRETER
58 : #include <tflite_interpreter.h>
59 : #endif
60 :
61 : /**
62 : * @brief Internal enum values for nntrainer to summarize model accuracy & loss
63 : */
64 : #define ML_TRAIN_SUMMARY_MODEL_TRAIN_LOSS 101
65 : #define ML_TRAIN_SUMMARY_MODEL_VALID_LOSS 102
66 : #define ML_TRAIN_SUMMARY_MODEL_VALID_ACCURACY 103
67 :
68 : namespace nntrainer {
69 :
70 835 : NeuralNetwork::NeuralNetwork() :
71 835 : model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm(),
72 1670 : props::LossScale()),
73 2505 : model_flex_props(props::Epochs(), props::TrainingBatchSize(),
74 1670 : props::SavePath(), props::ContinueTrain(),
75 1670 : props::SaveBestPath(), props::MemoryOptimization(),
76 2505 : props::Fsu(), props::FsuPath(), props::FsuLookahead(),
77 1670 : props::TensorFormat(), props::ModelTensorDataType()),
78 : load_path(std::string()),
79 835 : epoch_idx(0),
80 835 : iter(0),
81 835 : loss(0.0f),
82 835 : data_buffers({nullptr, nullptr, nullptr}),
83 835 : initialized(false),
84 835 : compiled(false),
85 835 : loadedFromConfig(false),
86 835 : exec_mode(ExecutionMode::TRAIN),
87 1670 : ct_engine(&Engine::Global()) {}
88 :
89 1 : NeuralNetwork::NeuralNetwork(const Engine *ct_engine_) :
90 1 : model_props(props::LossType(), {}, {}, props::ClipGradByGlobalNorm(),
91 2 : props::LossScale()),
92 3 : model_flex_props(props::Epochs(), props::TrainingBatchSize(),
93 2 : props::SavePath(), props::ContinueTrain(),
94 2 : props::SaveBestPath(), props::MemoryOptimization(),
95 3 : props::Fsu(), props::FsuPath(), props::FsuLookahead(),
96 2 : props::TensorFormat(), props::ModelTensorDataType()),
97 : load_path(std::string()),
98 1 : epoch_idx(0),
99 1 : iter(0),
100 1 : loss(0.0f),
101 1 : data_buffers({nullptr, nullptr, nullptr}),
102 1 : initialized(false),
103 1 : compiled(false),
104 1 : loadedFromConfig(false),
105 1 : exec_mode(ExecutionMode::TRAIN),
106 2 : ct_engine(ct_engine_) {}
107 :
108 696 : int NeuralNetwork::loadFromConfig(const std::string &config) {
109 696 : if (loadedFromConfig == true) {
110 30 : ml_loge("can not do loadFromConfig twice");
111 30 : return ML_ERROR_INVALID_PARAMETER;
112 : }
113 :
114 666 : ModelLoader loader(ct_engine);
115 666 : NeuralNetwork tempNet(*this);
116 :
117 666 : int status = loader.loadFromContext(tempNet);
118 666 : if (status != ML_ERROR_NONE) {
119 : return status;
120 : }
121 :
122 666 : status = loader.loadFromConfig(config, tempNet);
123 666 : if (status != ML_ERROR_NONE) {
124 : return status;
125 : }
126 :
127 540 : tempNet.loadedFromConfig = true;
128 540 : swap(tempNet, *this);
129 :
130 : return ML_ERROR_NONE;
131 666 : }
132 :
133 0 : unsigned int NeuralNetwork::getCurrentEpoch() {
134 : #ifdef DEBUG
135 : ml_logd("[NNTrainer] Current epoch: %d", epoch_idx);
136 : #endif
137 0 : return epoch_idx;
138 : };
139 :
140 1490 : void NeuralNetwork::setProperty(const std::vector<std::string> &values) {
141 1490 : auto left_props = loadProperties(values, model_props);
142 1487 : setTrainConfig(left_props);
143 1487 : }
144 :
145 1514 : void NeuralNetwork::setTrainConfig(const std::vector<std::string> &values) {
146 1514 : auto left_props = loadProperties(values, model_flex_props);
147 1512 : NNTR_THROW_IF(left_props.size(), std::invalid_argument)
148 : << "Model has unparsed properties, size: " << left_props.size()
149 : << " of first element: " << left_props.front();
150 1511 : }
151 :
152 697 : int NeuralNetwork::compile(ExecutionMode mode) {
153 :
154 697 : exec_mode = mode;
155 :
156 : std::string loss_type = std::get<props::LossType>(model_props).empty()
157 697 : ? std::string()
158 311 : : std::get<props::LossType>(model_props);
159 :
160 : auto &input_conn = std::get<std::vector<props::InputConnection>>(model_props);
161 : /// @note label layer might need to be treated in the similar way as well
162 :
163 : /// @todo make NetworkGraph compiled at the construction instead of having
164 : /// graph.compile(), neuralnetwork have ownership of list of layer nodes,
165 : /// which will be passed at compile time.
166 :
167 : std::vector<std::unique_ptr<GraphRealizer>> realizers;
168 :
169 697 : realizers.emplace_back(new PreviousInputRealizer(
170 1394 : std::vector<Connection>(input_conn.begin(), input_conn.end())));
171 697 : realizers.emplace_back(new MultioutRealizer());
172 697 : realizers.emplace_back(new FlattenRealizer());
173 697 : realizers.emplace_back(new ActivationRealizer());
174 :
175 3475 : for (auto &realizer : realizers) {
176 2781 : graph_representation = realizer->realize(graph_representation);
177 : }
178 :
179 694 : bool fsu = std::get<props::Fsu>(model_flex_props);
180 694 : const std::string fsu_path = std::get<props::FsuPath>(model_flex_props);
181 694 : unsigned int lookahead = std::get<props::FsuLookahead>(model_flex_props);
182 :
183 : const std::string tensor_format =
184 : to_string(std::get<props::TensorFormat>(model_flex_props));
185 :
186 : const std::string tensor_type =
187 : to_string(std::get<props::ModelTensorDataType>(model_flex_props));
188 :
189 : model_graph =
190 694 : NetworkGraph(fsu, mode, fsu_path, lookahead, tensor_format, tensor_type);
191 :
192 694 : model_graph.setMemoryOptimizations(
193 : std::get<props::MemoryOptimization>(model_flex_props));
194 5027 : for (auto &node : graph_representation) {
195 4333 : if (auto &prop = std::get<props::ClipGradByGlobalNorm>(model_props);
196 : !prop.empty()) {
197 0 : node->setProperty({"clip_grad_by_norm=" + to_string(prop)});
198 : }
199 : if (auto &prop = std::get<props::LossScale>(model_props); !prop.empty()) {
200 12999 : node->setProperty({"loss_scale=" + to_string(prop)});
201 : }
202 8666 : model_graph.addLayer(node);
203 : }
204 :
205 694 : int status = model_graph.compile(loss_type);
206 694 : NN_RETURN_STATUS();
207 :
208 624 : compiled = true;
209 :
210 624 : return status;
211 697 : }
212 :
213 839 : int NeuralNetwork::initialize(ExecutionMode mode) {
214 : int status = ML_ERROR_NONE;
215 :
216 839 : if (mode != exec_mode) {
217 0 : if (mode == ExecutionMode::INFERENCE) {
218 0 : ml_logd("Execution mode mismatch : train mode @compile & inference mode "
219 : "@ initialize");
220 0 : exec_mode = mode;
221 : } else {
222 0 : NNTR_THROW_IF(exec_mode == ExecutionMode::TRAIN, std::invalid_argument)
223 : << "Execution mode mismatch : trying to train with compiled for "
224 : "inference";
225 : }
226 : }
227 :
228 839 : if (initialized) {
229 81 : ml_loge("Error: Initializing the model again");
230 81 : return ML_ERROR_NOT_SUPPORTED;
231 : }
232 :
233 758 : if (!compiled) {
234 137 : ml_loge("Error: Need to compile first");
235 137 : return ML_ERROR_NOT_SUPPORTED;
236 : }
237 :
238 : unsigned int n_layers = (unsigned int)model_graph.size();
239 :
240 1242 : ml_logd("initializing neural network, layer size: %d", n_layers);
241 : PROFILE_MEM_ANNOTATE("Initialize");
242 :
243 : auto &input_conn_prop =
244 : std::get<std::vector<props::InputConnection>>(model_props);
245 : auto &label_layer_prop =
246 : std::get<std::vector<props::LabelLayer>>(model_props);
247 :
248 : std::vector<Connection> input_conn(input_conn_prop.begin(),
249 621 : input_conn_prop.end());
250 : std::vector<std::string> label_layers;
251 :
252 621 : if (!label_layer_prop.empty()) {
253 136 : label_layers = std::vector<std::string>(label_layer_prop.begin(),
254 68 : label_layer_prop.end());
255 : }
256 :
257 621 : status = model_graph.initialize(
258 : exec_mode, input_conn,
259 1242 : std::vector<Connection>(label_layers.begin(), label_layers.end()));
260 618 : NN_RETURN_STATUS();
261 :
262 618 : model_graph.setBatchSize(
263 : std::get<props::TrainingBatchSize>(model_flex_props));
264 :
265 : // If the execution mode is `train`, the optimizer and its relevant variables
266 : // are initialized. Throws an error if the optimizer is not set for training;
267 : // otherwise, it initializes
268 618 : if (exec_mode == ExecutionMode::TRAIN) {
269 :
270 617 : if (!opt) {
271 1 : ml_loge("Optimizer should be set before initialization for training.");
272 1 : return ML_ERROR_INVALID_PARAMETER;
273 : }
274 : /** TODO: update request of optimizer to be of same format as
275 : * Layer::requestTensor */
276 616 : opt->finalize();
277 : std::function<std::vector<TensorDim>(const TensorDim &)> cb =
278 : [this](const TensorDim &dim) {
279 3722 : return opt->getOptimizerVariableDim(dim);
280 : };
281 1232 : model_graph.requestOptimizerVariable(cb, true);
282 : }
283 :
284 : // Allocate weights
285 617 : model_graph.allocateWeights(exec_mode != ExecutionMode::INFERENCE);
286 : // enable this to save initialized weights for INFERENCE
287 : // model_graph.allocateWeights(true);
288 :
289 617 : initialized = true;
290 :
291 617 : if (!load_path.empty()) {
292 0 : load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
293 : }
294 :
295 : return status;
296 624 : }
297 :
298 1 : int NeuralNetwork::reinitialize() {
299 : int status = ML_ERROR_NONE;
300 :
301 1 : if (!initialized) {
302 0 : ml_loge("Error: Need to initialize first");
303 0 : return ML_ERROR_NOT_SUPPORTED;
304 : }
305 :
306 : unsigned int n_layers = (unsigned int)model_graph.size();
307 :
308 2 : ml_logd("reinitializing neural network, layer size: %d", n_layers);
309 : PROFILE_MEM_ANNOTATE("Reinitialize");
310 :
311 : auto &input_conn_prop =
312 : std::get<std::vector<props::InputConnection>>(model_props);
313 : auto &label_layer_prop =
314 : std::get<std::vector<props::LabelLayer>>(model_props);
315 :
316 : std::vector<Connection> input_conn(input_conn_prop.begin(),
317 1 : input_conn_prop.end());
318 : std::vector<std::string> label_layers;
319 :
320 1 : if (!label_layer_prop.empty()) {
321 0 : label_layers = std::vector<std::string>(label_layer_prop.begin(),
322 0 : label_layer_prop.end());
323 : }
324 :
325 1 : status = model_graph.reinitialize(
326 : input_conn,
327 2 : std::vector<Connection>(label_layers.begin(), label_layers.end()));
328 : NN_RETURN_STATUS();
329 :
330 : return status;
331 1 : }
332 :
333 : /**
334 : * @brief free layers
335 : */
336 2086 : NeuralNetwork::~NeuralNetwork() {
337 : try {
338 1534 : deallocate();
339 0 : } catch (const std::runtime_error &e) {
340 0 : std::cerr << "Error occurred during destroying NeuralNetwork: " << e.what()
341 : << std::endl;
342 0 : }
343 :
344 : /** if neuralnet open fd */
345 1534 : if (model_file_fd != -1)
346 0 : close(model_file_fd);
347 5154 : }
348 :
349 : /**
350 : * @brief forward propagation using layers object which has layer
351 : */
352 6821 : sharedConstTensors NeuralNetwork::forwarding(
353 : bool training, std::function<bool(void *userdata)> stop_cb, void *userdata) {
354 :
355 6821 : unsigned int lookahead = std::get<props::FsuLookahead>(model_flex_props);
356 6821 : bool fsu_mode = std::get<props::Fsu>(model_flex_props);
357 6821 : if (fsu_mode) {
358 0 : for (unsigned int i = 0; i < lookahead; ++i) {
359 0 : model_graph.LoadTensors(i);
360 : }
361 : }
362 : std::function<void(std::shared_ptr<LayerNode>, bool)> forwarding_op =
363 61408 : [this, stop_cb, lookahead, fsu_mode](std::shared_ptr<LayerNode> node,
364 : bool training) -> void {
365 : (void)this;
366 : PROFILE_MEM_ANNOTATE("Forwarding for layer: " + node->getName());
367 :
368 : auto f = std::get<0>(node->getExecutionOrder());
369 27303 : if (exec_mode == ExecutionMode::TRAIN or
370 5 : (exec_mode == ExecutionMode::INFERENCE and !fsu_mode)) {
371 27303 : model_graph.flushCacheExcept(f);
372 27303 : node->forwarding(training);
373 : } else {
374 : /**
375 : currently, it supports FSU asynch mode for inference. The prcedure of
376 : FSU is below,
377 :
378 : Prerequests : This function is called node by node at the forwarding
379 : function in network graph.
380 :
381 : Step 1. If the execution order is the first (f==0) then, it will try
382 : to load tensors which used at layer 0.
383 :
384 : Step 2. It check whether these tensors from Step 1, then do the
385 : forwarding of the first node.
386 :
387 : Step 3. Then check the look a head which says how many layer weights
388 : need to be loaded before running to hide overehad due to FSU,
389 :
390 : Step 4. Try to get the tesors by asking tensors for layers which is
391 : done by thread pool
392 :
393 : Step 5. Try to release the weights which has execution order less
394 : then f.
395 :
396 : Step n. repeat next layer starting with checking the tenosrs are
397 : loaded, and if it is loaded, then run forwarding. Every time it
398 : finishes the forwarding, ask load tensors for next n layers.
399 :
400 : **/
401 0 : model_graph.checkLoadComplete(f);
402 0 : node->forwarding(training);
403 0 : model_graph.inActive(f);
404 0 : model_graph.LoadTensors(f + lookahead);
405 : }
406 6821 : };
407 :
408 27284 : return model_graph.forwarding(training, forwarding_op, stop_cb, userdata);
409 : }
410 :
411 : /**
412 : * @brief forward propagation using layers object which has layer
413 : */
414 1148 : sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input,
415 : sharedConstTensors label,
416 : bool training) {
417 1148 : auto current_batch = model_graph.getBatchSize();
418 1148 : if (current_batch != input[0]->batch()) {
419 0 : model_graph.setBatchSize(input[0]->batch());
420 0 : current_batch = model_graph.getBatchSize();
421 : }
422 :
423 1148 : NNTR_THROW_IF(input[0]->batch() != current_batch ||
424 : (!label.empty() && label[0]->batch() != current_batch),
425 : std::logic_error)
426 : << "Error: mismatch in batchsize for data and model."
427 0 : << " input_batch: " << input[0]->batch()
428 0 : << " label_batch: " << label[0]->batch()
429 : << " target_batch: " << current_batch;
430 :
431 1148 : model_graph.setInputsLabels(input, label);
432 :
433 2296 : return forwarding(training);
434 : }
435 :
436 0 : sharedConstTensors NeuralNetwork::incremental_forwarding(
437 : unsigned int from, unsigned int to, bool training,
438 : std::function<bool(void *userdata)> stop_cb, void *userdata) {
439 :
440 0 : unsigned int lookahead = std::get<props::FsuLookahead>(model_flex_props);
441 0 : bool fsu_mode = std::get<props::Fsu>(model_flex_props);
442 :
443 0 : if (fsu_mode) {
444 0 : for (unsigned int i = 0; i < lookahead; ++i) {
445 0 : model_graph.LoadTensors(i);
446 : }
447 : }
448 :
449 : std::function<void(std::shared_ptr<LayerNode>, bool)> forwarding_op =
450 0 : [this, from, to, stop_cb, fsu_mode,
451 0 : lookahead](std::shared_ptr<LayerNode> node, bool training) -> void {
452 : PROFILE_MEM_ANNOTATE("Forwarding for layer: " + node->getName());
453 :
454 : auto f = std::get<0>(node->getExecutionOrder());
455 0 : if (exec_mode == ExecutionMode::TRAIN or
456 0 : (exec_mode == ExecutionMode::INFERENCE and !fsu_mode)) {
457 : // auto start_layer =
458 : // std::chrono::high_resolution_clock::now(); // log the
459 : // start_prefill time
460 0 : model_graph.flushCacheExcept(f);
461 0 : node->incremental_forwarding(from, to, training);
462 : // auto end_layer =
463 : // std::chrono::high_resolution_clock::now(); // log th
464 : // auto duration_ =
465 : // std::chrono::duration_cast<std::chrono::nanoseconds>(end_layer-start_layer);
466 : // std::cout << node->getName() <<" : "<< duration_.count()<<"
467 : // ns"<<std::endl;
468 : } else {
469 0 : model_graph.checkLoadComplete(f);
470 0 : node->incremental_forwarding(from, to, training);
471 0 : model_graph.inActive(f);
472 0 : model_graph.LoadTensors(f + lookahead);
473 : }
474 0 : };
475 :
476 : return model_graph.incremental_forwarding(from, to, training, forwarding_op,
477 0 : stop_cb, userdata);
478 : }
479 :
480 : sharedConstTensors
481 0 : NeuralNetwork::incremental_forwarding(unsigned int from, unsigned int to,
482 : sharedConstTensors input,
483 : sharedConstTensors label, bool training) {
484 0 : auto current_batch = model_graph.getBatchSize();
485 0 : NNTR_THROW_IF(input[0]->batch() != current_batch ||
486 : (!label.empty() && label[0]->batch() != current_batch),
487 : std::logic_error)
488 : << "Error: mismatch in batchsize for data and model."
489 0 : << " input_batch: " << input[0]->batch()
490 0 : << " label_batch: " << label[0]->batch()
491 : << " target_batch: " << current_batch;
492 :
493 0 : model_graph.setInputsLabels(input, label);
494 :
495 0 : return incremental_forwarding(from, to, training);
496 : }
497 :
498 : /**
499 : * @brief back propagation
500 : * Call backwarding function of layer in reverse order
501 : * No need to call at first Input Layer (No data to be updated)
502 : */
503 6132 : void NeuralNetwork::backwarding(int iteration,
504 : std::function<bool(void *userdata)> stop_cb,
505 : void *userdata) {
506 :
507 : #ifdef DEBUG
508 : NNTR_THROW_IF(!opt, std::invalid_argument) << "optimizer is null!";
509 : #endif
510 :
511 : std::function<void(std::shared_ptr<LayerNode>, bool)> forwarding_op =
512 18396 : [this, stop_cb](std::shared_ptr<LayerNode> node, bool training) -> void {
513 : (void)this;
514 : PROFILE_MEM_ANNOTATE("Forwarding for layer: " + node->getName());
515 :
516 : auto f = std::get<0>(node->getExecutionOrder());
517 0 : model_graph.flushCacheExcept(f);
518 :
519 0 : node->forwarding(training);
520 6132 : };
521 :
522 : std::function<bool(std::shared_ptr<LayerNode>, int)> backwarding_op =
523 24528 : [this, stop_cb, userdata](std::shared_ptr<LayerNode> node,
524 : int iteration) -> bool {
525 : /**
526 : * Do not change this order:
527 : * 1. calcGradient
528 : * 2. calcDerivative
529 : * 3. applyGradient
530 : * 4. gradientClippingOnLastAccess
531 : */
532 :
533 22292 : model_graph.flushCacheExcept(std::get<1>(node->getExecutionOrder()));
534 : PROFILE_MEM_ANNOTATE("CalcGradient: " + node->getName());
535 :
536 : bool apply_gradient = true;
537 22292 : if (node->getTrainable()) {
538 : /** If gradient optimization mode, then calculate gradient first */
539 7467 : if (dynamic_training_opt.isGradientMode())
540 0 : node->calcGradient();
541 :
542 : /**
543 : * If optimization off, or gradient must be applied, then this will be
544 : * true
545 : * @todo This apply gradient should be passed to the each weight and later
546 : * be queried when updating gradient at once. (after moving apply_gradient
547 : * out of this function)
548 : *
549 : */
550 : // auto &layer = node->getObject();
551 : // apply_gradient = dynamic_training_opt.checkIfApply(
552 : // layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0],
553 : // opt, iteration);
554 :
555 : /** If gradient must be applied and its not gradient mode, calculate
556 : * gradient
557 : */
558 7467 : if (!dynamic_training_opt.isGradientMode() && apply_gradient) {
559 7467 : node->calcGradient();
560 :
561 7467 : RunLayerContext &rc = node->getRunContext();
562 7467 : if (model_graph.isMixedPrecision()) {
563 0 : for (auto w : rc.getWeights()) {
564 0 : if (w->hasGradient())
565 0 : if (!w->getGradientRef().isValid())
566 : return false;
567 0 : }
568 : }
569 : }
570 : }
571 :
572 22292 : model_graph.flushCacheExcept(std::get<2>(node->getExecutionOrder()));
573 : PROFILE_MEM_ANNOTATE("CalcDerivative: " + node->getName());
574 :
575 44584 : if (stop_cb(userdata)) {
576 : return true;
577 : }
578 :
579 22292 : if (node->needsCalcDerivative()) {
580 9774 : node->calcDerivative();
581 : }
582 :
583 22292 : model_graph.flushCacheExcept(std::get<3>(node->getExecutionOrder()));
584 : PROFILE_MEM_ANNOTATE("ApplyGradient: " + node->getName());
585 :
586 : if (apply_gradient) {
587 : /// Apply gradient only at the end of the last shared weight access
588 22292 : model_graph.applyGradients(
589 37875 : node.get(), [iteration, opt_ = opt.get()](Weight &w) {
590 : w.calcRegularizationGradient();
591 31166 : if (opt_->getType() != AdamW::type) {
592 : w.calcWeightDecayGradient();
593 : }
594 : RunOptimizerContext opt_context(&w, iteration,
595 15583 : opt_->getLearningRate(iteration));
596 15583 : opt_->applyGradient(opt_context);
597 15583 : });
598 : }
599 22292 : return true;
600 6132 : };
601 :
602 : std::function<void(Weight &, int)> lazy_apply_grad_op =
603 44 : [opt_ = opt.get()](Weight &w, int iteration) -> void {
604 : w.calcRegularizationGradient();
605 : w.calcWeightDecayGradient();
606 : RunOptimizerContext opt_context(&w, iteration,
607 44 : opt_->getLearningRate(iteration));
608 44 : opt_->applyGradient(opt_context);
609 44 : };
610 :
611 : // return false if the gradient is not valid
612 : bool ret = false;
613 :
614 12264 : while (!ret) {
615 12264 : ret = model_graph.backwarding(iteration, forwarding_op, backwarding_op,
616 : lazy_apply_grad_op, stop_cb, userdata);
617 : }
618 6132 : }
619 :
620 1263 : void NeuralNetwork::save(const std::string &file_path,
621 : ml::train::ModelFormat format) {
622 1265 : NNTR_THROW_IF(!initialized, std::runtime_error)
623 : << "Cannot save model if not initialized yet, path: " << file_path
624 2 : << " format: " << static_cast<unsigned>(format);
625 :
626 : /// @todo this switch case should be delegating the function call only. It's
627 : /// not delegating for now as required logics are manageable for now.
628 1261 : switch (format) {
629 1021 : case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
630 : auto model_file = checkedOpenStream<std::ofstream>(
631 1021 : file_path, std::ios::out | std::ios::binary | std::ios::trunc);
632 :
633 4088 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
634 6134 : (*iter)->save(model_file, false, exec_mode);
635 : }
636 :
637 3063 : if (opt && istrequal(opt->getType(), "adam")) {
638 21 : std::string adam = "adam";
639 21 : model_file.write(adam.c_str(), 4);
640 88 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
641 : iter++) {
642 134 : (*iter)->save(model_file, true);
643 : }
644 : }
645 :
646 1021 : if (exec_mode == ml::train::ExecutionMode::TRAIN) {
647 1021 : model_file.write((char *)&epoch_idx, sizeof(epoch_idx));
648 1021 : model_file.write((char *)&iter, sizeof(iter));
649 : }
650 :
651 1021 : model_file.close();
652 : break;
653 1021 : }
654 240 : case ml::train::ModelFormat::MODEL_FORMAT_INI:
655 240 : saveModelIni(file_path);
656 240 : break;
657 0 : case ml::train::ModelFormat::MODEL_FORMAT_INI_WITH_BIN: {
658 : auto old_save_path = std::get<props::SavePath>(model_flex_props);
659 : auto bin_file_name =
660 0 : file_path.substr(0, file_path.find_last_of('.')) + ".bin";
661 :
662 0 : std::get<props::SavePath>(model_flex_props).set(bin_file_name);
663 0 : save(file_path, ml::train::ModelFormat::MODEL_FORMAT_INI);
664 0 : save(bin_file_name, ml::train::ModelFormat::MODEL_FORMAT_BIN);
665 : std::get<props::SavePath>(model_flex_props) = old_save_path;
666 : break;
667 : }
668 0 : case ml::train::ModelFormat::MODEL_FORMAT_ONNX: {
669 : throw nntrainer::exception::not_supported(
670 0 : "saving with ONNX format is not supported yet.");
671 : break;
672 : }
673 0 : default:
674 : throw nntrainer::exception::not_supported(
675 0 : "saving with given format is not supported yet");
676 : }
677 1261 : }
678 :
679 345 : void NeuralNetwork::load(const std::string &file_path,
680 : ml::train::ModelFormat format) {
681 : /// @todo this switch case should be delegating the function call only. It's
682 : /// not delegating for now as required logics are manageable for now.
683 :
684 345 : bool fsu_mode = std::get<props::Fsu>(model_flex_props);
685 :
686 345 : const std::regex reg_("\\s*\\;\\s*");
687 345 : auto v = split(file_path, reg_);
688 :
689 : size_t start_from = 0;
690 : std::vector<std::pair<size_t, size_t>> file_offset;
691 345 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
692 4 : auto weights = (*iter)->getRunContext().getWeights();
693 0 : for (auto weight : weights) {
694 0 : size_t size = weight->getVariable().getMemoryBytes();
695 0 : auto tensor_data_type = weight->getDim().getDataType();
696 0 : weight->getVariableRef().setFileOffset(start_from);
697 : ///@todo instead of checking the data type,
698 : /// we may need to create a common parent class for
699 : /// quantized tensors, requiring qparam to be saved
700 : /// and creating a common interface to check if qparam is needed
701 : /// this kind of type checking should be avoided
702 0 : if (tensor_data_type != TensorDim::DataType::FP32 &&
703 0 : tensor_data_type != TensorDim::DataType::FP16 &&
704 0 : tensor_data_type != TensorDim::DataType::Q6_K &&
705 : tensor_data_type != TensorDim::DataType::Q4_0) {
706 : // for tensor with qparam
707 0 : size += sizeof(uint16_t);
708 : }
709 0 : file_offset.emplace_back(std::make_pair(start_from, size));
710 0 : start_from += size;
711 : }
712 0 : }
713 :
714 343 : if (exec_mode == ExecutionMode::INFERENCE && fsu_mode) {
715 0 : model_graph.setFsuWeightPath((v.size() == 2) ? v[1] : v[0]);
716 0 : model_graph.setWeightOffset(file_offset);
717 : }
718 :
719 343 : switch (format) {
720 0 : case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
721 0 : NNTR_THROW_IF(!initialized, std::runtime_error)
722 : << "Cannot load if not initialized yet, path: " << file_path
723 : << " format: " << static_cast<unsigned>(format);
724 0 : auto f_path = (v.size() == 2) ? v[1] : v[0];
725 :
726 : auto model_file =
727 0 : checkedOpenStream<std::ifstream>(f_path, std::ios::in | std::ios::binary);
728 :
729 : #if defined(_WIN32)
730 : HANDLE hFile, hMap;
731 : #endif
732 :
733 0 : if (exec_mode == ml::train::ExecutionMode::INFERENCE) {
734 : if (!MMAP_READ) {
735 : ///@note for slim-tensor. This should be removed.
736 : model_file_fd = open(f_path.c_str(), O_RDONLY);
737 : NNTR_THROW_IF((model_file_fd == -1), std::invalid_argument)
738 : << "Cannot open file : " << f_path;
739 : }
740 : // std::vector<std::future<void>> futures;
741 : std::vector<std::thread> threads;
742 0 : threads.reserve(model_graph.size());
743 0 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
744 : ++iter) {
745 : auto node = *iter;
746 0 : auto exec_order = std::get<0>((*iter)->getExecutionOrder());
747 :
748 0 : threads.emplace_back([&, node]() {
749 : if (!MMAP_READ) {
750 : auto local_model_file = checkedOpenStream<std::ifstream>(
751 : (v.size() == 2) ? v[1] : v[0], std::ios::in | std::ios::binary);
752 : node->read(local_model_file, false, exec_mode, fsu_mode,
753 : std::numeric_limits<size_t>::max(), true, model_file_fd);
754 : } else {
755 : #if defined(_WIN32)
756 : // Map per-ask, then unmap immediately after: enables early release
757 : // of pages
758 : HANDLE hFile =
759 : CreateFileA(f_path.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL,
760 : OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
761 : NNTR_THROW_IF((hFile == INVALID_HANDLE_VALUE), std::runtime_error)
762 : << "CreateFileA failed";
763 :
764 : HANDLE hMap =
765 : CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
766 : NNTR_THROW_IF((hMap == NULL), std::runtime_error)
767 : << "CreateFileMapping failed";
768 :
769 : char *view =
770 : static_cast<char *>(MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0));
771 : NNTR_THROW_IF((view == nullptr), std::runtime_error)
772 : << "MapViewOfFile failed";
773 :
774 : node->read(view, false, exec_mode, fsu_mode,
775 : std::numeric_limits<size_t>::max(), true);
776 :
777 : // Early unmap: let the OS reclaim the working set ASAP
778 : UnmapViewOfFile(view);
779 : CloseHandle(hMap);
780 : CloseHandle(hFile);
781 : #else
782 : // POSIX: map per-task, advise kernel, drop pages, unmap
783 0 : int fd = ::open(f_path.c_str(), O_RDONLY);
784 0 : NNTR_THROW_IF((fd == -1), std::invalid_argument)
785 0 : << "Cannot open file : " << f_path;
786 :
787 0 : struct stat st {};
788 0 : NNTR_THROW_IF((::fstat(fd, &st) == -1), std::invalid_argument)
789 0 : << "Cannot get file info (fstat): " << f_path;
790 :
791 0 : size_t f_size = static_cast<size_t>(st.st_size);
792 : void *mmap_ptr =
793 0 : ::mmap(nullptr, f_size, PROT_READ, MAP_PRIVATE, fd, 0);
794 0 : ::close(fd); // fd not needed after mmap
795 0 : NNTR_THROW_IF((mmap_ptr == MAP_FAILED), std::runtime_error)
796 : << "mmap failed";
797 :
798 : // Hint: many model loads touch scattered regions -> RANDOM helps
799 : // reduce readahead
800 0 : (void)::posix_madvise(mmap_ptr, f_size, POSIX_MADV_RANDOM);
801 :
802 : char *view = static_cast<char *>(mmap_ptr);
803 0 : node->read(view, false, exec_mode, fsu_mode,
804 : std::numeric_limits<size_t>::max(), true);
805 :
806 : // Early drop: pages no longer needed; helps lower peak RSS during
807 : // overlap
808 0 : (void)::posix_madvise(mmap_ptr, f_size, POSIX_MADV_DONTNEED);
809 :
810 0 : ::munmap(mmap_ptr, f_size);
811 : #endif
812 : }
813 0 : });
814 : }
815 0 : for (auto &t : threads) {
816 0 : if (t.joinable())
817 0 : t.join();
818 : }
819 0 : } else {
820 0 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
821 : ++iter) {
822 0 : (*iter)->read(model_file, false, exec_mode, fsu_mode);
823 : }
824 :
825 : try {
826 : /// this is assuming that the failure is allowed at the end of the file
827 : /// read. so, after this line, additional read shouldn't be called
828 0 : if (opt && istrequal(opt->getType(), "adam")) {
829 : std::string opt_type;
830 : opt_type.resize(4);
831 0 : model_file.read((char *)&opt_type[0], 4);
832 :
833 0 : if (istrequal(opt_type, "adam")) {
834 0 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
835 : iter++) {
836 0 : (*iter)->read(model_file, true, exec_mode);
837 : }
838 : }
839 : }
840 :
841 0 : if (!fsu_mode && exec_mode == ml::train::ExecutionMode::TRAIN) {
842 :
843 0 : checkedRead(model_file, (char *)&epoch_idx, sizeof(epoch_idx),
844 : "[NeuralNetwork::readModel] failed to read epoch_idx");
845 0 : checkedRead(model_file, (char *)&iter, sizeof(iter),
846 : "[NeuralNetwork::readModel] failed to read iteration");
847 : }
848 0 : } catch (...) {
849 : std::cerr << "failed to read additional data like optimizer variable, "
850 0 : "iteration, proceeding with default\n";
851 0 : }
852 : }
853 :
854 0 : ml_logi("read modelfile: %s",
855 : (v.size() == 2) ? v[1].c_str() : v[0].c_str());
856 : break;
857 0 : }
858 :
859 1 : case ml::train::ModelFormat::MODEL_FORMAT_INI_WITH_BIN: {
860 1 : int ret = loadFromConfig((v.size() == 2) ? v[1] : v[0]);
861 1 : throw_status(ret);
862 : auto &save_path = std::get<props::SavePath>(model_flex_props);
863 1 : if (!save_path.empty()) {
864 0 : checkedOpenStream<std::ifstream>(save_path,
865 : std::ios::in | std::ios::binary);
866 0 : load_path = save_path;
867 : }
868 : break;
869 : }
870 342 : case ml::train::ModelFormat::MODEL_FORMAT_INI: {
871 342 : int ret = loadFromConfig((v.size() == 2) ? v[1] : v[0]);
872 342 : throw_status(ret);
873 : break;
874 : }
875 : case ml::train::ModelFormat::MODEL_FORMAT_FLATBUFFER: {
876 : break;
877 : }
878 :
879 0 : case ml::train::ModelFormat::MODEL_FORMAT_ONNX: {
880 0 : int ret = loadFromConfig((v.size() == 2) ? v[1] : v[0]);
881 0 : throw_status(ret);
882 : break;
883 : }
884 :
885 0 : case ml::train::ModelFormat::MODEL_FORMAT_QNN: {
886 : // for now, we only support to QNN binary format for Inference mode.
887 : // expect to have the file path for qnn bin and nntrainer bin seperated by
888 : // ":" QNN bin ( graph ) : NNTrainer bin (weight)
889 0 : NNTR_THROW_IF(exec_mode != ExecutionMode::INFERENCE, std::invalid_argument)
890 : << "Only support QNN biarny for Infernece";
891 0 : NNTR_THROW_IF(!isFileExist(props::FilePath(v[0])), std::invalid_argument)
892 : << "Cannot open QNN context bin file";
893 :
894 2 : std::thread qnn_load([this, &v]() {
895 : int ret =
896 0 : ct_engine->getRegisteredContext("qnn")->load(props::FilePath(v[0]));
897 0 : throw_status(ret);
898 0 : });
899 :
900 0 : if (!fsu_mode && v.size() > 1) {
901 0 : NNTR_THROW_IF(!isFileExist(props::FilePath(v[1])), std::invalid_argument)
902 : << "Cannot open weight bin file";
903 0 : load(props::FilePath(v[1]), ml::train::ModelFormat::MODEL_FORMAT_BIN);
904 0 : } else if (fsu_mode) {
905 0 : NNTR_THROW_IF(v.size() <= 1, std::invalid_argument)
906 : << "Swap mode should run with loading a weight-bin file";
907 0 : NNTR_THROW_IF(!isFileExist(props::FilePath(v[1])), std::invalid_argument)
908 : << "Cannot open weight bin file";
909 : // model_graph.setFsuWeightPath(v[1]);
910 : }
911 :
912 0 : qnn_load.join();
913 : break;
914 : }
915 0 : default:
916 : throw nntrainer::exception::not_supported(
917 0 : "loading with given format is not supported yet");
918 : }
919 349 : }
920 :
921 11073 : float NeuralNetwork::getLoss() {
922 11073 : loss = 0.0f;
923 :
924 47747 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
925 73348 : loss += (*iter)->getLoss();
926 : }
927 11073 : return loss;
928 : }
929 :
930 0 : void NeuralNetwork::setLoss(float l) { loss = l; }
931 :
932 0 : NeuralNetwork &NeuralNetwork::copy(NeuralNetwork &from) {
933 0 : if (this != &from) {
934 : model_props = from.model_props;
935 : model_flex_props = from.model_flex_props;
936 0 : loss = from.loss;
937 : opt = from.opt;
938 :
939 : model_graph.copy(from.model_graph);
940 : }
941 0 : return *this;
942 : }
943 :
944 240 : void NeuralNetwork::saveModelIni(const std::string &file_path) {
945 480 : NNTR_THROW_IF(isFileExist(file_path), std::invalid_argument)
946 : << "There is already a file, overriding to the existing file is not "
947 : "permitted, path: "
948 : << file_path;
949 :
950 : std::vector<IniSection> sections;
951 :
952 240 : IniSection model_section = IniSection::FromExportable("model", *this);
953 480 : model_section.setEntry("type", "NeuralNetwork");
954 240 : sections.push_back(model_section);
955 :
956 960 : auto add_section_if_any = [§ions](const std::string §ion_name,
957 : auto obj_ptr, auto pred) {
958 240 : if (pred(obj_ptr)) {
959 242 : IniSection s = IniSection::FromExportable(section_name, *obj_ptr);
960 484 : s.setEntry("type", obj_ptr->getType());
961 242 : sections.push_back(s);
962 : }
963 1200 : };
964 :
965 720 : add_section_if_any("optimizer", opt,
966 : [](const auto &obj) { return static_cast<bool>(obj); });
967 :
968 : auto &[train_buffer, valid_buffer, test_buffer] = data_buffers;
969 : auto data_buffer_valid = [](const auto &buffer) {
970 722 : return buffer && buffer->isSerializable(
971 722 : ml::train::ExportMethods::METHOD_STRINGVECTOR);
972 : };
973 :
974 481 : add_section_if_any("train_set", train_buffer, data_buffer_valid);
975 481 : add_section_if_any("valid_set", valid_buffer, data_buffer_valid);
976 480 : add_section_if_any("test_set", test_buffer, data_buffer_valid);
977 :
978 240 : IniWrapper wrapper("model_saver", sections);
979 240 : wrapper.save_ini(file_path);
980 :
981 240 : IniGraphInterpreter interpreter;
982 240 : interpreter.serialize(graph_representation, file_path);
983 480 : }
984 :
985 415 : bool NeuralNetwork::validateInput(sharedConstTensors X) {
986 415 : auto input_dim = getInputDimension();
987 415 : if (X.size() != input_dim.size()) {
988 0 : ml_loge("Error: provided number of inputs %d, required %d", (int)X.size(),
989 : (int)input_dim.size());
990 0 : return false;
991 : }
992 :
993 1506 : for (unsigned int dim = 0; dim < input_dim.size(); dim++) {
994 676 : if (input_dim[dim] != X[dim]->getDim()) {
995 0 : ml_loge("Error: provided input shape does not match required shape");
996 0 : std::stringstream ss;
997 0 : ss << X[dim]->getDim();
998 0 : ml_loge("Provided tensor summary : %s", ss.str().c_str());
999 :
1000 0 : ss.str(std::string());
1001 0 : ss << input_dim[dim];
1002 0 : ml_loge("Required tensor summary : %s", ss.str().c_str());
1003 : return false;
1004 0 : }
1005 : }
1006 :
1007 : return true;
1008 415 : }
1009 :
1010 415 : sharedConstTensors NeuralNetwork::inference(sharedConstTensors X,
1011 : bool free_mem) {
1012 415 : return inference(X, {}, free_mem);
1013 : }
1014 :
1015 415 : sharedConstTensors NeuralNetwork::inference(sharedConstTensors X,
1016 : sharedConstTensors label,
1017 : bool free_mem) {
1018 415 : if (model_graph.getBatchSize() != X[0]->batch()) {
1019 0 : model_graph.setBatchSize(X[0]->batch());
1020 : }
1021 :
1022 : sharedConstTensors out;
1023 415 : if (!validateInput(X))
1024 0 : throw std::invalid_argument("Input validation failed.");
1025 :
1026 415 : allocate(ExecutionMode::INFERENCE);
1027 :
1028 : int nn_foward;
1029 : PROFILE_TIME_REGISTER_EVENT(nn_foward, "nn_forward");
1030 : PROFILE_TIME_START(nn_foward);
1031 415 : out = forwarding(X, label, false);
1032 : PROFILE_TIME_END(nn_foward);
1033 :
1034 415 : if (free_mem)
1035 : /**
1036 : * Free the memory needed for training before exiting.
1037 : * Note that this does not free the weights for the model.
1038 : * Weights of the model will be freed when the model is destroyed.
1039 : */
1040 : model_graph.deallocateTensors(false);
1041 :
1042 : /** Clear the set inputs and labels */
1043 415 : model_graph.setInputsLabels({}, {});
1044 :
1045 415 : return out;
1046 0 : }
1047 :
1048 : std::vector<float *>
1049 5 : NeuralNetwork::inference(unsigned int batch_size,
1050 : const std::vector<float *> &input,
1051 : const std::vector<float *> &label) {
1052 : sharedConstTensors input_tensors, output_tensors;
1053 5 : auto in_dim = getInputDimension();
1054 :
1055 5 : input_tensors.reserve(input.size());
1056 10 : for (unsigned int idx = 0; idx < in_dim.size(); idx++) {
1057 5 : in_dim[idx].batch(batch_size);
1058 15 : input_tensors.emplace_back(MAKE_SHARED_TENSOR(Tensor::Map(
1059 : input[idx], in_dim[idx].getDataLen() * sizeof(float), in_dim[idx], 0)));
1060 : }
1061 :
1062 5 : if (!label.empty()) {
1063 : sharedConstTensors label_tensors;
1064 0 : auto label_dim = getOutputDimension();
1065 0 : label_tensors.reserve(label.size());
1066 0 : for (unsigned int idx = 0; idx < label_dim.size(); idx++) {
1067 0 : label_dim[idx].batch(batch_size);
1068 0 : label_tensors.emplace_back(MAKE_SHARED_TENSOR(
1069 : Tensor::Map(label[idx], label_dim[idx].getDataLen() * sizeof(float),
1070 : label_dim[idx], 0)));
1071 : }
1072 0 : output_tensors = inference(input_tensors, label_tensors, false);
1073 0 : } else {
1074 5 : output_tensors = inference(input_tensors, false);
1075 : }
1076 :
1077 : std::vector<float *> output;
1078 5 : output.reserve(output_tensors.size());
1079 :
1080 10 : for (auto &out : output_tensors) {
1081 5 : auto out_t = *out.get();
1082 5 : output.push_back(out_t.getData());
1083 5 : }
1084 :
1085 5 : return output;
1086 5 : }
1087 :
1088 : sharedConstTensors
1089 0 : NeuralNetwork::incremental_inference(sharedConstTensors X,
1090 : unsigned int init_seq_len,
1091 : unsigned int from, unsigned int to) {
1092 0 : return incremental_inference(X, {}, init_seq_len, from, to);
1093 : }
1094 :
1095 0 : sharedConstTensors NeuralNetwork::incremental_inference(
1096 : sharedConstTensors X, sharedConstTensors label, unsigned int init_seq_len,
1097 : unsigned int from, unsigned int to) {
1098 0 : if (model_graph.getBatchSize() != X[0]->batch()) {
1099 0 : model_graph.setBatchSize(X[0]->batch());
1100 : }
1101 :
1102 : sharedConstTensors out;
1103 0 : if (!validateInput(X))
1104 0 : throw std::invalid_argument("Input validation failed.");
1105 :
1106 0 : if (!from) {
1107 0 : model_graph.allocateTensors(ExecutionMode::INFERENCE);
1108 : }
1109 :
1110 : int nn_foward;
1111 : PROFILE_TIME_REGISTER_EVENT(nn_foward, "nn_forward");
1112 : PROFILE_TIME_START(nn_foward);
1113 :
1114 0 : out = incremental_forwarding(from, to, X, label, false);
1115 :
1116 : PROFILE_TIME_END(nn_foward);
1117 :
1118 : /** @todo: deallocate tensor after incremental inference **/
1119 : /** Clear the set inputs and labels */
1120 0 : model_graph.setInputsLabels({}, {});
1121 :
1122 0 : return out;
1123 0 : }
1124 :
1125 0 : std::vector<float *> NeuralNetwork::incremental_inference(
1126 : unsigned int batch_size, const std::vector<float *> &input,
1127 : const std::vector<float *> &label, unsigned int init_seq_len,
1128 : unsigned int from, unsigned int to, bool output_hidden_state) {
1129 :
1130 : // auto start_in_neuralnet = std::chrono::high_resolution_clock::now();
1131 :
1132 : sharedConstTensors input_tensors, output_tensors;
1133 0 : auto in_dim = getInputDimension();
1134 :
1135 0 : input_tensors.reserve(input.size());
1136 0 : for (unsigned int idx = 0; idx < in_dim.size(); idx++) {
1137 0 : in_dim[idx].batch(batch_size);
1138 0 : input_tensors.emplace_back(MAKE_SHARED_TENSOR(Tensor::Map(
1139 : input[idx], in_dim[idx].getDataLen() * sizeof(float), in_dim[idx], 0)));
1140 : }
1141 :
1142 : // auto start_increment = std::chrono::high_resolution_clock::now();
1143 0 : if (!label.empty()) {
1144 : sharedConstTensors label_tensors;
1145 0 : auto label_dim = getOutputDimension();
1146 0 : label_tensors.reserve(label.size());
1147 0 : for (unsigned int idx = 0; idx < label_dim.size(); idx++) {
1148 0 : label_dim[idx].batch(batch_size);
1149 0 : label_tensors.emplace_back(MAKE_SHARED_TENSOR(
1150 : Tensor::Map(label[idx], label_dim[idx].getDataLen() * sizeof(float),
1151 : label_dim[idx], 0)));
1152 : }
1153 0 : output_tensors = incremental_inference(input_tensors, label_tensors,
1154 0 : init_seq_len, from, to);
1155 0 : } else {
1156 : output_tensors =
1157 0 : incremental_inference(input_tensors, init_seq_len, from, to);
1158 : }
1159 : // auto end_increment = std::chrono::high_resolution_clock::now();
1160 : std::vector<float *> output;
1161 :
1162 0 : unsigned int step = ((to - from) == 0) ? 0 : (to - from) - 1;
1163 :
1164 0 : for (auto &out : output_tensors) {
1165 0 : auto out_t = *out.get();
1166 : float *last_out_buf_data;
1167 :
1168 0 : if (output_hidden_state) {
1169 0 : last_out_buf_data = out_t.getData();
1170 : } else {
1171 0 : last_out_buf_data = new float[batch_size * out_t.width()];
1172 :
1173 0 : for (unsigned int batch = 0; batch < batch_size; ++batch) {
1174 0 : if (out->getDataType() == ml::train::TensorDim::DataType::FP16) {
1175 : #ifdef ENABLE_FP16
1176 : const _FP16 *out_t_batch_ptr =
1177 : out_t.getData<_FP16>() + batch * out_t.getDim().getFeatureLen() +
1178 : step * out_t.width();
1179 : scopy(out_t.width(), out_t_batch_ptr, 1,
1180 : last_out_buf_data + batch * out_t.width(), 1);
1181 :
1182 : #else
1183 0 : throw std::invalid_argument("Error: enable-fp16 is not set");
1184 : #endif
1185 0 : } else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) {
1186 : const float *out_t_batch_ptr =
1187 0 : out_t.getData() + batch * out_t.getDim().getFeatureLen() +
1188 0 : step * out_t.width();
1189 : // std::memcpy( last_out_buf_data + batch * out_t.width(),
1190 : // out_t_batch_ptr, out_t.width()*sizeof(float));
1191 0 : scopy(out_t.width(), out_t_batch_ptr, 1,
1192 0 : last_out_buf_data + batch * out_t.width(), 1);
1193 : }
1194 : }
1195 : }
1196 :
1197 0 : output.push_back(last_out_buf_data);
1198 0 : }
1199 : // auto end_net_inference = std::chrono::high_resolution_clock::now();
1200 : // auto prepare =
1201 : // std::chrono::duration_cast<std::chrono::nanoseconds>(start_increment-start_in_neuralnet);
1202 : // auto run_inf =
1203 : // std::chrono::duration_cast<std::chrono::nanoseconds>(end_increment-start_increment);;
1204 : // auto out_gen =
1205 : // std::chrono::duration_cast<std::chrono::nanoseconds>(end_net_inference-end_increment);;
1206 : // auto net_gen =
1207 : // std::chrono::duration_cast<std::chrono::nanoseconds>(end_net_inference-start_in_neuralnet);
1208 :
1209 : // std::cout <<"prepare : "<< prepare.count() << " run_inf : "<<
1210 : // run_inf.count() << " out_gen : "<< out_gen.count()<<std::endl; std::cout <<
1211 : // "-------- net_inference: "<< net_gen.count() << std::endl;
1212 :
1213 0 : return output;
1214 0 : }
1215 :
1216 0 : void NeuralNetwork::resetInputDimension(std::vector<TensorDim> dims) {
1217 0 : model_graph.resetInputDimension(dims);
1218 0 : }
1219 :
1220 30 : int NeuralNetwork::setDataset(const DatasetModeType &mode,
1221 : std::shared_ptr<ml::train::Dataset> dataset) {
1222 60 : return setDataBuffer(mode, std::static_pointer_cast<DataBuffer>(dataset));
1223 : }
1224 :
1225 678 : int NeuralNetwork::allocate(ExecutionMode mode) {
1226 : model_graph.deallocateTensors();
1227 678 : model_graph.allocateTensors(mode);
1228 :
1229 678 : return ML_ERROR_NONE;
1230 : }
1231 :
1232 1534 : int NeuralNetwork::deallocate() {
1233 : try {
1234 : model_graph.deallocateTensors(true);
1235 : return ML_ERROR_NONE;
1236 0 : } catch (const std::exception &e) {
1237 : std::cerr << "Error occurred during deallocation of NeuralNetwork: "
1238 0 : << e.what() << std::endl;
1239 : return ML_ERROR_UNKNOWN;
1240 0 : }
1241 : }
1242 :
1243 28 : int NeuralNetwork::train(const std::vector<std::string> &values,
1244 : std::function<bool(void *)> stop_cb,
1245 : void *stop_user_data,
1246 : std::function<void(void *)> epoch_complete_cb,
1247 : void *epoch_user_data) {
1248 : int status = ML_ERROR_NONE;
1249 :
1250 28 : if (data_buffers[static_cast<int>(DatasetModeType::MODE_TRAIN)] == nullptr) {
1251 0 : ml_loge("Cannot initialize the model without the train data buffer.");
1252 0 : return ML_ERROR_INVALID_PARAMETER;
1253 : }
1254 :
1255 28 : if (!opt) {
1256 1 : ml_loge("Cannot train network without optimizer.");
1257 1 : return ML_ERROR_INVALID_PARAMETER;
1258 : }
1259 :
1260 27 : setTrainConfig(values);
1261 :
1262 : /** set batch size just before training */
1263 23 : model_graph.setBatchSize(
1264 : std::get<props::TrainingBatchSize>(model_flex_props));
1265 :
1266 23 : status = allocate(ExecutionMode::TRAIN);
1267 23 : NN_RETURN_STATUS();
1268 :
1269 : status =
1270 46 : train_run(stop_cb, stop_user_data, epoch_complete_cb, epoch_user_data);
1271 23 : NN_RETURN_STATUS();
1272 :
1273 : /**
1274 : * Free the memory needed for training before exiting.
1275 : * Note that this does not free the weights for the model.
1276 : * Weights of the model will be freed when the model is destroyed.
1277 : */
1278 : model_graph.deallocateTensors(false);
1279 23 : return status;
1280 : }
1281 :
1282 : /**
1283 : * @brief Run NeuralNetwork train with callback function by user
1284 : */
1285 23 : int NeuralNetwork::train_run(
1286 : std::function<bool(void *userdata)> stop_cb, void *stop_user_data,
1287 : std::function<void(void *userdata)> epoch_complete_cb,
1288 : void *epoch_user_data) {
1289 : int status = ML_ERROR_NONE;
1290 :
1291 23 : if (!std::get<props::ContinueTrain>(model_flex_props)) {
1292 23 : epoch_idx = 0;
1293 23 : iter = 0;
1294 115 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
1295 184 : (*iter)->clearOptVar();
1296 : }
1297 : }
1298 :
1299 : auto batch_size = std::get<props::TrainingBatchSize>(model_flex_props);
1300 :
1301 23 : auto const &outputs = model_graph.getOutputTensors();
1302 23 : auto in_dims = model_graph.getInputDimension();
1303 23 : auto label_dims = model_graph.getOutputDimension();
1304 :
1305 : auto &[train_buffer, valid_buffer, test_buffer] = data_buffers;
1306 :
1307 23 : if (train_buffer == nullptr) {
1308 0 : ml_loge("[NeuralNetworks] there is no train dataset!");
1309 0 : return ML_ERROR_INVALID_PARAMETER;
1310 : }
1311 :
1312 : /**
1313 : * @brief run a single epoch with given callback, @a auto is used instead of
1314 : * std::function for performance measure
1315 : * @param buffer buffer to run
1316 : * @param shuffle whether to shuffle or not
1317 : * @param on_iteration_fetch function that will receive reference to stat,
1318 : * buffer which will be called every time data is fetched and set
1319 : * @param on_epoch_end function that will receive reference to stat,
1320 : * buffer which will be called on the epoch end
1321 : */
1322 1387 : auto run_epoch = [this, &in_dims, &label_dims, &outputs, batch_size](
1323 : DataBuffer *buffer, bool shuffle,
1324 : auto &&on_iteration_fetch, auto &&on_iteration_update_stat,
1325 : auto &&on_epoch_end, RunStats &stat) {
1326 : /// @todo managing metrics must be handled here as well!! for now it is
1327 : /// handled in individual callbacks
1328 : // RunStats stat;
1329 :
1330 1364 : stat.accuracy = 0.0;
1331 1364 : stat.loss = 0.0;
1332 1364 : stat.num_iterations = 0;
1333 1364 : stat.num_correct_predictions = 0;
1334 1364 : stat.max_epoch = getEpochs();
1335 1364 : stat.epoch_idx = epoch_idx;
1336 :
1337 1364 : std::future<std::shared_ptr<IterationQueue>> future_iq =
1338 : buffer->startFetchWorker(in_dims, label_dims, shuffle);
1339 1637 : while (true) {
1340 8297 : ScopedView<Iteration> iter_view = buffer->fetch();
1341 8297 : if (iter_view.isEmpty()) {
1342 : break;
1343 : }
1344 : auto &iteration = iter_view.get();
1345 6933 : if (iteration.batch() != static_cast<unsigned int>(batch_size)) {
1346 : /// @todo support partial batch
1347 : continue;
1348 : }
1349 :
1350 : auto const &labels = iteration.getLabelsRef();
1351 : auto const &inputs = iteration.getInputsRef();
1352 5673 : model_graph.setInputsLabels(inputs, labels);
1353 :
1354 5673 : on_iteration_fetch(stat, *buffer);
1355 273 : on_iteration_update_stat(stat, outputs, labels);
1356 : }
1357 1364 : future_iq.get();
1358 1364 : on_epoch_end(stat, *buffer);
1359 :
1360 1364 : if (stat.num_iterations == 0) {
1361 0 : throw std::runtime_error("No data came while buffer ran");
1362 : }
1363 :
1364 1364 : return stat;
1365 23 : };
1366 :
1367 : auto train_for_iteration =
1368 23 : [this, stop_cb, stop_user_data](RunStats &stat, DataBuffer &buffer) {
1369 5400 : ml_logi("train for iteration");
1370 5400 : forwarding(true, stop_cb, stop_user_data);
1371 5400 : backwarding(iter++, stop_cb, stop_user_data);
1372 :
1373 : // To avoid unconsidered memory leak, we need to clear the cache
1374 5400 : model_graph.flushCache();
1375 :
1376 10800 : if (!stop_cb(stop_user_data)) {
1377 10800 : std::cout << "#" << epoch_idx << "/" << getEpochs();
1378 5400 : ml_logi("# %d / %d", epoch_idx, getEpochs());
1379 5400 : auto loss = getLoss();
1380 5400 : buffer.displayProgress(stat.num_iterations, loss);
1381 : }
1382 5423 : };
1383 :
1384 : auto update_train_stat = [this](RunStats &stat,
1385 : const std::vector<Tensor> &outputs,
1386 : const std::vector<Tensor> &labels) {
1387 5400 : stat.loss += getLoss();
1388 5400 : stat.num_iterations++;
1389 23 : };
1390 :
1391 23 : auto train_epoch_end = [this, stop_cb, stop_user_data](RunStats &stat,
1392 : DataBuffer &buffer) {
1393 1233 : if (stat.num_iterations != 0) {
1394 1233 : stat.loss /= static_cast<float>(stat.num_iterations);
1395 : } else {
1396 : std::cerr << "stat.num_iterations is 0" << std::endl;
1397 0 : return;
1398 : }
1399 1233 : auto &save_path = std::get<props::SavePath>(model_flex_props);
1400 2466 : if (!stop_cb(stop_user_data)) {
1401 1233 : if (!save_path.empty()) {
1402 1021 : save(save_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
1403 : }
1404 :
1405 2466 : std::cout << "#" << epoch_idx << "/" << getEpochs()
1406 1233 : << " - Training Loss: " << stat.loss;
1407 1233 : ml_logi("# %d / %d - Training Loss: %f", epoch_idx, getEpochs(),
1408 : stat.loss);
1409 2466 : ml_logd("[NNTrainer] Training epoch %d / %d finished successfully.",
1410 : epoch_idx, getEpochs());
1411 : } else {
1412 0 : ml_logd("[NNTrainer] Training stopped by stop callback function during "
1413 : "epoch %d.",
1414 : epoch_idx);
1415 : }
1416 23 : };
1417 :
1418 273 : auto eval_for_iteration = [this, batch_size, stop_cb, stop_user_data](
1419 : RunStats &stat, DataBuffer &buffer) {
1420 273 : forwarding(false, stop_cb, stop_user_data);
1421 296 : };
1422 :
1423 273 : auto update_eval_stat = [batch_size, &update_train_stat](
1424 : RunStats &stat, const std::vector<Tensor> &outputs,
1425 : const std::vector<Tensor> &labels) {
1426 273 : auto model_out = outputs[0].argmax();
1427 273 : auto label_out = labels[0].argmax();
1428 :
1429 3996 : for (unsigned int b = 0; b < batch_size; b++) {
1430 3723 : if (model_out[b] == label_out[b])
1431 2498 : stat.num_correct_predictions++;
1432 : }
1433 :
1434 273 : update_train_stat(stat, outputs, labels);
1435 273 : };
1436 :
1437 154 : auto eval_epoch_end = [this, batch_size, max_acc = 0.0f,
1438 23 : min_loss = std::numeric_limits<float>::max()](
1439 : RunStats &stat, DataBuffer &buffer) mutable {
1440 131 : if (stat.num_iterations != 0) {
1441 131 : stat.loss /= static_cast<float>(stat.num_iterations);
1442 : } else {
1443 : std::cerr << "stat.num_iterations is 0" << std::endl;
1444 0 : return;
1445 : }
1446 262 : stat.accuracy = stat.num_correct_predictions /
1447 131 : static_cast<float>(stat.num_iterations * batch_size) *
1448 : 100.0f;
1449 :
1450 131 : if (stat.accuracy > max_acc ||
1451 104 : (stat.accuracy == max_acc && stat.loss < min_loss)) {
1452 48 : max_acc = stat.accuracy;
1453 : /// @note this is not actually 'the' min loss for whole time but records
1454 : /// when data change
1455 48 : min_loss = stat.loss;
1456 48 : auto &save_best_path = std::get<props::SaveBestPath>(model_flex_props);
1457 48 : if (!save_best_path.empty()) {
1458 0 : save(save_best_path);
1459 : }
1460 : }
1461 131 : std::cout << " >> [ Accuracy: " << stat.accuracy
1462 131 : << "% - Validation Loss : " << stat.loss << " ]";
1463 262 : ml_logi("[ Accuracy: %.2f %% - Validation Loss: %.5f", stat.accuracy,
1464 : stat.loss);
1465 23 : };
1466 :
1467 : PROFILE_MEM_ANNOTATE("TRAIN START");
1468 : auto epochs = getEpochs();
1469 23 : ml_logd("[NNTrainer] Starts training. Current epoch: %d. Total epochs: %d.",
1470 : epoch_idx + 1, getEpochs());
1471 1256 : for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
1472 1233 : if (stop_cb(stop_user_data)) {
1473 0 : --epoch_idx;
1474 0 : break;
1475 : }
1476 1233 : training = run_epoch(train_buffer.get(), true, train_for_iteration,
1477 1233 : update_train_stat, train_epoch_end, training);
1478 1233 : if (valid_buffer) {
1479 131 : validation = run_epoch(valid_buffer.get(), false, eval_for_iteration,
1480 131 : update_eval_stat, eval_epoch_end, validation);
1481 : }
1482 1233 : std::cout << '\n';
1483 1233 : epoch_complete_cb(epoch_user_data);
1484 : }
1485 : PROFILE_MEM_ANNOTATE("TRAIN END");
1486 :
1487 23 : if (test_buffer) {
1488 0 : std::cout << "Evaluation with test data...\n";
1489 0 : testing = run_epoch(test_buffer.get(), false, eval_for_iteration,
1490 0 : update_eval_stat, eval_epoch_end, testing);
1491 : }
1492 :
1493 : /** Clear the set inputs and labels */
1494 23 : model_graph.setInputsLabels({}, {});
1495 :
1496 : return status;
1497 46 : }
1498 :
1499 540 : void swap(NeuralNetwork &lhs, NeuralNetwork &rhs) {
1500 : {
1501 : using std::swap;
1502 :
1503 : swap(lhs.model_props, rhs.model_props);
1504 : swap(lhs.model_flex_props, rhs.model_flex_props);
1505 540 : swap(lhs.load_path, rhs.load_path);
1506 : swap(lhs.epoch_idx, rhs.epoch_idx);
1507 : swap(lhs.iter, rhs.iter);
1508 : swap(lhs.loss, rhs.loss);
1509 : swap(lhs.opt, rhs.opt);
1510 : swap(lhs.data_buffers, rhs.data_buffers);
1511 : swap(lhs.initialized, rhs.initialized);
1512 : swap(lhs.model_graph, rhs.model_graph);
1513 : swap(lhs.graph_representation, rhs.graph_representation);
1514 : swap(lhs.compiled, rhs.compiled);
1515 : swap(lhs.loadedFromConfig, rhs.loadedFromConfig);
1516 : }
1517 540 : }
1518 :
1519 4113 : int NeuralNetwork::addLayer(NodeType layer) {
1520 : int status = ML_ERROR_NONE;
1521 :
1522 4113 : if (initialized) {
1523 : return ML_ERROR_NOT_SUPPORTED;
1524 : }
1525 :
1526 : /** Insert the layer to the graph */
1527 4112 : model_graph.addLayer(layer);
1528 4111 : graph_representation.push_back(layer);
1529 :
1530 4111 : return status;
1531 : }
1532 :
1533 3 : NeuralNetwork &NeuralNetwork::copyConfiguration(NeuralNetwork &from) {
1534 3 : if (this != &from) {
1535 : model_props = from.model_props;
1536 : model_flex_props = from.model_flex_props;
1537 3 : loss = from.loss;
1538 : opt = from.opt;
1539 :
1540 : NetworkGraph f_graph = from.getNetworkGraph();
1541 7 : for (auto &l_node : f_graph.getLayerNodes()) {
1542 9 : addLayer(static_cast<std::shared_ptr<ml::train::Layer>>(
1543 9 : l_node->cloneConfiguration()));
1544 3 : }
1545 3 : }
1546 2 : return *this;
1547 : }
1548 :
1549 : NeuralNetwork::GraphType
1550 0 : NeuralNetwork::getUnsortedLayers(const std::string &input_layer,
1551 : const std::string &output_layer) {
1552 0 : return model_graph.getUnsortedLayers(input_layer, output_layer);
1553 : }
1554 :
1555 741 : int NeuralNetwork::setOptimizer(
1556 : std::shared_ptr<ml::train::Optimizer> optimizer) {
1557 741 : if (initialized) {
1558 0 : ml_loge("Cannot set optimizer if already initialized");
1559 0 : return ML_ERROR_NOT_SUPPORTED;
1560 : }
1561 :
1562 741 : opt = std::static_pointer_cast<OptimizerWrapped>(optimizer);
1563 :
1564 741 : return ML_ERROR_NONE;
1565 : }
1566 :
1567 30 : int NeuralNetwork::setDataBuffer(const DatasetModeType &mode,
1568 : std::shared_ptr<DataBuffer> data_buffer) {
1569 30 : if (data_buffer == nullptr) {
1570 : return ML_ERROR_INVALID_PARAMETER;
1571 : }
1572 :
1573 30 : this->data_buffers[static_cast<int>(mode)] = data_buffer;
1574 :
1575 30 : return ML_ERROR_NONE;
1576 : }
1577 :
1578 12 : int NeuralNetwork::getLayer(const char *name,
1579 : std::shared_ptr<ml::train::Layer> *layer) {
1580 : // We provide the layer change through the api with user's responsibility.
1581 : //
1582 : // if (compiled) {
1583 : // ml_loge("Cannot get compiled layer.");
1584 : // return ML_ERROR_NOT_SUPPORTED;
1585 : // }
1586 :
1587 9 : *layer = std::static_pointer_cast<ml::train::Layer>(
1588 12 : model_graph.getLayerNode(std::string(name)));
1589 9 : return ML_ERROR_NONE;
1590 : }
1591 :
1592 19 : void NeuralNetwork::printMetrics(std::ostream &out, unsigned int flags) {
1593 19 : switch (flags) {
1594 4 : case ML_TRAIN_SUMMARY_MODEL_TRAIN_LOSS:
1595 4 : out << training.loss << std::endl;
1596 : break;
1597 :
1598 4 : case ML_TRAIN_SUMMARY_MODEL_VALID_LOSS:
1599 4 : out << validation.loss << std::endl;
1600 : break;
1601 :
1602 4 : case ML_TRAIN_SUMMARY_MODEL_VALID_ACCURACY:
1603 4 : out << validation.accuracy << std::endl;
1604 : break;
1605 :
1606 : default:
1607 : break;
1608 : }
1609 19 : }
1610 :
1611 19 : void NeuralNetwork::printPreset(std::ostream &out, unsigned int preset) {
1612 : /** print neuralnet metrics */
1613 19 : printMetrics(out, preset);
1614 19 : if (preset > ML_TRAIN_SUMMARY_TENSOR)
1615 : return;
1616 :
1617 : LayerNode::PrintPreset layer_preset = LayerNode::PrintPreset::PRINT_NONE;
1618 :
1619 : ///@todo match flags with preset
1620 : unsigned int flags = PRINT_INST_INFO | PRINT_GRAPH_INFO | PRINT_PROP |
1621 : PRINT_OPTIMIZER | PRINT_METRIC;
1622 :
1623 7 : switch (preset) {
1624 1 : case ML_TRAIN_SUMMARY_TENSOR:
1625 : layer_preset = LayerNode::PrintPreset::PRINT_ALL;
1626 1 : break;
1627 0 : case ML_TRAIN_SUMMARY_LAYER:
1628 0 : layer_preset = initialized ? LayerNode::PrintPreset::PRINT_SUMMARY
1629 : : LayerNode::PrintPreset::PRINT_SUMMARY_META;
1630 : break;
1631 : case ML_TRAIN_SUMMARY_MODEL:
1632 : break;
1633 : default:
1634 : throw std::invalid_argument("given verbosity is invalid");
1635 : }
1636 :
1637 7 : print(out, flags, layer_preset);
1638 : }
1639 :
1640 2 : void NeuralNetwork::addWithReferenceLayers(
1641 : const std::vector<std::shared_ptr<ml::train::Layer>> &reference,
1642 : const std::string &scope, const std::vector<std::string> &input_layers,
1643 : const std::vector<std::string> &start_layers,
1644 : const std::vector<std::string> &end_layers,
1645 : ml::train::ReferenceLayersType type,
1646 : const std::vector<std::string> &type_properties) {
1647 : std::vector<NodeType> casted_reference;
1648 2 : casted_reference.reserve(reference.size());
1649 6 : for (auto &node : reference) {
1650 8 : casted_reference.emplace_back(std::static_pointer_cast<LayerNode>(node));
1651 : }
1652 :
1653 2 : addWithReferenceLayers(casted_reference, scope, input_layers, start_layers,
1654 : end_layers, type, type_properties);
1655 2 : }
1656 :
1657 52 : void NeuralNetwork::addWithReferenceLayers(
1658 : const std::vector<std::shared_ptr<LayerNode>> &reference,
1659 : const std::string &scope, const std::vector<std::string> &input_layers,
1660 : const std::vector<std::string> &start_layers,
1661 : const std::vector<std::string> &end_layers,
1662 : ml::train::ReferenceLayersType type,
1663 : const std::vector<std::string> &type_properties) {
1664 : /// @todo below configuration should be extracted as a free function to make
1665 : /// it more testable, and reused inside graph interpreter
1666 :
1667 : /// @note we can exploit connection to connection more fine grained, for now
1668 : /// it is not supported but we can easily make this supported
1669 : std::vector<std::shared_ptr<LayerNode>> nodes;
1670 52 : nodes.reserve(reference.size());
1671 316 : for (auto &node : reference) {
1672 528 : nodes.push_back(node->cloneConfiguration());
1673 : }
1674 :
1675 : auto start_conns =
1676 52 : std::vector<Connection>(start_layers.begin(), start_layers.end());
1677 : auto input_conns =
1678 52 : std::vector<Connection>(input_layers.begin(), input_layers.end());
1679 : auto end_conns =
1680 52 : std::vector<Connection>(end_layers.begin(), end_layers.end());
1681 :
1682 : std::vector<std::unique_ptr<GraphRealizer>> realizers;
1683 :
1684 52 : realizers.emplace_back(new PreviousInputRealizer(start_conns));
1685 52 : realizers.emplace_back(new SliceRealizer(start_conns, end_conns));
1686 :
1687 52 : if (!input_conns.empty()) {
1688 51 : realizers.emplace_back(new InputRealizer(start_conns, input_conns));
1689 : }
1690 :
1691 52 : if (type == ml::train::ReferenceLayersType::RECURRENT) {
1692 51 : realizers.emplace_back(
1693 51 : new RecurrentRealizer(type_properties, input_conns, end_conns));
1694 : }
1695 :
1696 52 : if (!scope.empty()) {
1697 52 : realizers.emplace_back(
1698 52 : new RemapRealizer([&scope, &input_conns](std::string &name) {
1699 4394 : for (auto &i : input_conns) {
1700 3578 : if (i.getName() == name) {
1701 : return;
1702 : }
1703 : }
1704 1632 : name = scope + "/" + name;
1705 104 : }));
1706 : }
1707 :
1708 310 : for (auto &realizer : realizers) {
1709 258 : nodes = realizer->realize(nodes);
1710 : }
1711 :
1712 314 : for (auto &node : nodes) {
1713 524 : addLayer(node);
1714 : }
1715 52 : }
1716 :
1717 240 : void NeuralNetwork::exportTo(Exporter &exporter,
1718 : const ml::train::ExportMethods &method) const {
1719 240 : exporter.saveResult(model_props, method, this);
1720 240 : exporter.saveResult(model_flex_props, method, this);
1721 240 : }
1722 :
1723 7 : void NeuralNetwork::print(std::ostream &out, unsigned int flags,
1724 : LayerNode::PrintPreset layerPrintPreset) {
1725 : if (flags & PRINT_INST_INFO) {
1726 : /// @todo uncomment this after implement getProperty (#1875)
1727 : // out << "===================";
1728 : // printInstance(out, this);
1729 : }
1730 :
1731 7 : if (flags & PRINT_GRAPH_INFO) {
1732 : unsigned int total_col_size = 80;
1733 7 : std::vector<unsigned int> column_size = {20, 20, 20, 20};
1734 : auto print_graph_layer_info =
1735 7 : [column_size](std::ostream &out, std::vector<std::string> layer_info) {
1736 140 : const auto &trim_string = [](std::string str,
1737 : unsigned int column_width) {
1738 140 : return str.size() < column_width ? str
1739 14 : : str.substr(0, column_width - 1);
1740 : };
1741 :
1742 175 : for (unsigned int i = 0; i < column_size.size(); ++i) {
1743 : out << std::setw(column_size[i])
1744 280 : << trim_string(layer_info[i], column_size[i]);
1745 : }
1746 35 : out << "\n";
1747 42 : };
1748 :
1749 7 : out << std::string(total_col_size, '=') << '\n';
1750 7 : print_graph_layer_info(
1751 : out, {"Layer name", "Layer type", "Output dimension", "Input layer"});
1752 7 : out << std::string(total_col_size, '=') << '\n';
1753 7 : if (compiled) {
1754 : props::GenericShape dim_property;
1755 :
1756 25 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
1757 : iter++) {
1758 : std::string first_dim;
1759 44 : if (iter->getOutputDimensions().empty()) {
1760 : first_dim = "";
1761 : } else {
1762 44 : dim_property.set(iter->getOutputDimensions()[0]);
1763 44 : first_dim = to_string(dim_property);
1764 : }
1765 : const std::vector<std::string> &input_layer_names =
1766 22 : iter->getInputConnections();
1767 : std::string first_input_name =
1768 41 : input_layer_names.empty() ? "" : input_layer_names[0];
1769 110 : print_graph_layer_info(
1770 : out, {iter->getName(), iter->getType(), first_dim, first_input_name});
1771 22 : for (unsigned int i = 1; i < input_layer_names.size(); ++i) {
1772 0 : dim_property.set(iter->getInputDimensions()[i]);
1773 0 : print_graph_layer_info(out, {"", "", "", input_layer_names[i]});
1774 : }
1775 44 : out << std::string(total_col_size,
1776 : iter == model_graph.cend() - 1 ? '=' : '-')
1777 22 : << '\n';
1778 22 : }
1779 : } else {
1780 : auto &input_connection =
1781 : std::get<std::vector<props::InputConnection>>(model_props);
1782 : auto model_input = std::vector<Connection>(input_connection.begin(),
1783 4 : input_connection.end());
1784 : auto is_actually_an_input_node =
1785 4 : [model_input](graph_const_iterator<LayerNode> node) {
1786 : return node->hasInputShapeProperty() or
1787 : std::any_of(model_input.begin(), model_input.end(),
1788 : [node](auto &conn) {
1789 : return node->getName() == conn.getName();
1790 : });
1791 4 : };
1792 :
1793 10 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
1794 : iter++) {
1795 : const std::vector<std::string> &input_layer_names =
1796 6 : iter->getInputConnections();
1797 :
1798 : /// @brief connection information.
1799 : // Intended comment.
1800 : // std::string first_input_name =
1801 : // input_layer_names.empty()
1802 : // ? (is_actually_an_input_node(iter) || iter ==
1803 : // model_graph.cbegin()
1804 : // ? ""
1805 : // : (iter - 1)->getName())
1806 : // : input_layer_names[0];
1807 30 : print_graph_layer_info(out, {iter->getName(), iter->getType(), "", ""});
1808 6 : for (unsigned int i = 1; i < input_layer_names.size(); ++i) {
1809 0 : print_graph_layer_info(out, {"", "", "", ""});
1810 : }
1811 6 : out << std::string(total_col_size,
1812 : iter == model_graph.cend() - 1 ? '=' : '-')
1813 6 : << '\n';
1814 6 : }
1815 4 : }
1816 7 : }
1817 :
1818 : if (flags & PRINT_PROP) {
1819 : /// @todo print neuralnet property
1820 : /// @todo print mode (if it is eval or training)
1821 : }
1822 :
1823 : if (flags & PRINT_OPTIMIZER) {
1824 : /// @todo print optimizer (with print optimizer prop)
1825 : }
1826 :
1827 : if (flags & PRINT_METRIC) {
1828 : /// @todo print metric (currently it is done at printPreset as a
1829 : /// workaround)
1830 : /// @todo print loss function when it is not initialized. (if it is
1831 : /// initialized, loss layer will be printed)
1832 : }
1833 :
1834 7 : if (model_graph.empty()) {
1835 : out << "model is empty!" << std::endl;
1836 0 : return;
1837 : }
1838 :
1839 : /** print layer properties */
1840 35 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++)
1841 56 : (*iter)->printPreset(out, layerPrintPreset);
1842 :
1843 : /// @todo Add status to check neuralnet has been run. #290
1844 118 : }
1845 :
1846 0 : void NeuralNetwork::forEachLayer(
1847 : std::function<void(ml::train::Layer &, RunLayerContext &, void *)> fn,
1848 : void *user_data) {
1849 0 : for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
1850 0 : auto ln = std::static_pointer_cast<LayerNode>(*iter).get();
1851 0 : fn(*ln, std::forward<RunLayerContext &>(ln->getRunContext()), user_data);
1852 : };
1853 0 : }
1854 :
1855 4 : void NeuralNetwork::exports(const ml::train::ExportMethods &method,
1856 : const std::string file_path) {
1857 4 : switch (method) {
1858 4 : case ml::train::ExportMethods::METHOD_TFLITE: {
1859 : #ifdef ENABLE_TFLITE_INTERPRETER
1860 4 : nntrainer::TfliteInterpreter interpreter;
1861 :
1862 : /// We will call "serialize" method for the model which is already trained
1863 : /// or allocated. So, we need to call deallocateTensors first to make sure
1864 : /// `dealloc_weights == false`
1865 : model_graph.deallocateTensors();
1866 4 : model_graph.allocateTensors(ExecutionMode::INFERENCE);
1867 4 : model_graph.setBatchSize(1); // For now, to inference batch size to be 1
1868 4 : interpreter.serialize(graph_representation, file_path);
1869 : model_graph.deallocateTensors();
1870 : #else
1871 : throw std::runtime_error{
1872 : "Export methods METHOD_TFLITE is not supported. Please enable tflite "
1873 : "interpreter by set ENABLE_TFLITE_INTERPRETER=1"};
1874 : #endif
1875 : break;
1876 : }
1877 0 : case ml::train::ExportMethods::METHOD_FLATBUFFER: {
1878 :
1879 : /**
1880 : * @todo The current FLATBUFFER exporter only supports TRAIN execution mode.
1881 : * It should be updated to support both train and inference mode.
1882 : * It would be more natural to support inference by default since tflite is
1883 : * typically used solely for inference
1884 : */
1885 : model_graph.deallocateTensors();
1886 0 : model_graph.allocateTensors(ExecutionMode::TRAIN);
1887 0 : break;
1888 : }
1889 0 : default:
1890 0 : throw std::runtime_error{"Unsupported export method"};
1891 : }
1892 4 : }
1893 : } /* namespace nntrainer */
|