Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file manager.h
6 : * @date 30 Nov 2020
7 : * @brief This is NNtrainer manager for all weights, i/o and intermediate
8 : * tensors
9 : * @see https://github.com/nnstreamer/nntrainer
10 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
11 : * @author Jihoon Lee <jhoon.it.lee@samsung.com>
12 : * @bug No known bugs except for NYI items
13 : *
14 : *
15 : * @details Manager assumes that the layer inouts are being tracked by the
16 : * manager in the order of the execution. If the order is not maintained, then
17 : * the optimizations cannot be performed and will result in wrong values.
18 : */
19 :
20 : #ifndef __MANAGER_H__
21 : #define __MANAGER_H__
22 : #include "tensor.h"
23 : #include "tensor_wrap_specs.h"
24 : #ifdef __cplusplus
25 :
26 : #include <functional>
27 : #include <memory>
28 : #include <unordered_map>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include <basic_planner.h>
33 : #include <common.h>
34 : #include <graph_node.h>
35 : #include <tensor_pool.h>
36 : #include <var_grad.h>
37 : #include <weight.h>
38 :
39 : #include "noncopyable.h"
40 : #include "nonmovable.h"
41 :
42 : namespace nntrainer {
43 : using ExecutionMode = ml::train::ExecutionMode;
44 :
45 : /**
46 : * @class MMappedMemory
47 : * @brief Memory Handler, that has mmaped memory with a file descriptor
48 : */
49 : class MMapedMemory : public Noncopyable, public Nonmovable {
50 : public:
51 : /**
52 : * @brief Construct a new MMapedMemory object
53 : *
54 : * @param size bytesize of the memory chunk
55 : * @param allocate_fd_ map a shared memory object to a file
56 : */
57 : MMapedMemory(size_t size, bool allocate_fd_ = false);
58 :
59 : /**
60 : * @brief Destroy the MMapedMemory object
61 : *
62 : */
63 : ~MMapedMemory() noexcept;
64 :
65 : /**
66 : * @brief Get the File descriptor.
67 : * Will return -1 except for android
68 : * @todo make this available for other platforms
69 : *
70 : * @return -1 if fd is not allocated (or unabled to allocate)
71 : */
72 : int getFd() noexcept { return fd; }
73 :
74 : /**
75 : * @brief get the size of managed memory
76 : *
77 : * @return size_t size
78 : */
79 : size_t size() noexcept { return buf_size; }
80 :
81 : /**
82 : * @brief get Typed buffer from the memory
83 : *
84 : * @tparam T Type to specify the buffer. return is reinterpreted to T*
85 : * @return T* Typed buffer, return nullptr if empty
86 : */
87 : template <typename T> T *typedBuffer() noexcept {
88 : return reinterpret_cast<T *>(buf);
89 : }
90 :
91 : void *data() noexcept { return typedBuffer<void>(); }
92 :
93 : private:
94 : int fd; /**< fd to access the shared_memory */
95 : void *buf; /**< buffer object when use_shared_memory */
96 : size_t buf_size; /**< buffer size */
97 : bool allocate_fd; /**< option to choose to allocate an fd */
98 : };
99 :
100 : /**
101 : * @class Manager
102 : * @brief manager of nntrainer
103 : */
104 : class Manager : public Noncopyable, public Nonmovable {
105 :
106 : public:
107 : /**
108 : * @brief Tensor Group Type
109 : * @note this is not mutually exclusive list, a tensor might be identified as
110 : * input as well as output
111 : *
112 : */
113 : enum TensorGroupType {
114 : INPUT = 0, /**< Input of an operation */
115 : OUTPUT = 1, /**< Output of an operation */
116 : WEIGHT = 2, /**< Weight of an operation */
117 : TENSORS = 3, /**< Extra states of an operation */
118 : };
119 :
120 : constexpr inline static unsigned NUM_TENSOR_GROUP_TYPE =
121 : 4; /**< number of tensor group type */
122 :
123 : /**
124 : * @brief Constructor of Manager
125 : */
126 849 : Manager() :
127 849 : enable_fsu(false),
128 849 : enable_optimizations(true),
129 849 : fsu_lookahead(0),
130 849 : tensor_format("NCHW"),
131 1698 : tensor_dtype(split("FP32-FP32", getRegex("\\-"))),
132 1698 : exec_mode(ExecutionMode::TRAIN) {}
133 :
134 : /**
135 : * @brief Constructor of Manager
136 : */
137 694 : Manager(bool enable_fsu_, const std::string &fsu_path = "",
138 : unsigned int lookahead = 0, const std::string tensor_format_ = "NCHW",
139 : const std::string tensor_dtype_ = "FP32-FP32",
140 694 : ExecutionMode exec_mode_ = ExecutionMode::TRAIN) :
141 694 : weight_pool(enable_fsu_, fsu_path, "weight_pool", exec_mode_),
142 1388 : tensor_pool(enable_fsu_ && (exec_mode_ == ExecutionMode::TRAIN), fsu_path,
143 : "tensor_pool", exec_mode_),
144 694 : enable_fsu(enable_fsu_),
145 694 : enable_optimizations(true),
146 694 : fsu_lookahead(lookahead),
147 694 : tensor_format(tensor_format_),
148 694 : tensor_dtype(split(tensor_dtype_, getRegex("\\-"))),
149 694 : exec_mode(exec_mode_) {}
150 :
151 : /**
152 : * @brief Move Construct a new Manager object
153 : *
154 : */
155 : Manager(Manager &&) noexcept;
156 :
157 : /**
158 : * @brief Move assign a new Manager object
159 : *
160 : * @return Manager& reference to newly assign
161 : */
162 : Manager &operator=(Manager &&) noexcept;
163 :
164 : /**
165 : * @brief Destructor of Manager
166 : */
167 4629 : ~Manager() = default;
168 :
169 : /**
170 : * @brief Create weights with the given spec
171 : * @todo The max_exec_order can be reduced to the max exec order which
172 : * updates gradient
173 : *
174 : * @param node Graph node to extract node identifiers/info
175 : * @param weights_spec Specification for the weights
176 : * @param trainable make the weight trainable if true
177 : * @param shared_names name to refer to when the weights are borrowed from the
178 : * original source. if not shared pass empty vector
179 : *
180 : * @return created weights list
181 : */
182 : std::vector<Weight *>
183 : requestWeights(const GraphNode &node,
184 : const std::vector<Weight::Spec> &weights_spec, bool trainable,
185 : const std::vector<std::string> &shared_names);
186 :
187 : /**
188 : * @brief Create tensors with the given spec
189 : *
190 : * @param node Graph node to extract node identifiers/info
191 : * @param tensors_spec Specification for the tensors
192 : * @param trainable make the weight trainable if true
193 : * @param shared_names if tensor is shared, name is needed
194 : *
195 : * @return created tensors list
196 : */
197 : std::vector<Var_Grad *> requestTensors(
198 : const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
199 : bool trainable, const std::vector<std::string> &shared_names = {});
200 :
201 : /**
202 : * @brief Create tensors with the given spec
203 : *
204 : * @param node Graph node to extract node identifiers/info
205 : * @param tensors_spec Specification for the tensors
206 : *
207 : * @return created tensors list
208 : */
209 : std::vector<Tensor *> requestWeightOptimizerVariables(
210 : const std::vector<TensorDim> &dims, const std::string &name,
211 : const std::string &suffix, const TensorLifespan &lifespan,
212 : bool is_grad_clip, bool is_mixed_type,
213 : Initializer initializer = Initializer::NONE);
214 :
215 : /**
216 : * @brief Create tensors with the given spec
217 : *
218 : * @param node Graph node to extract node identifiers/info
219 : * @param inputs_dim Specification for the tensors
220 : * @param outputs_name Name of the already requested output tensors
221 : *
222 : * @return created tensors list
223 : *
224 : * @details create Var_Grads to be used as input of GraphNode with the
225 : * inputs_dim as their spec. If the outputs_name is provided, the returned
226 : * Var_Grad share tensors with the already allocated Var_Grad for outputs,
227 : * named with outputs_name. In this case, the input_dim and the shape of the
228 : * output_tensors must match. If the outputs_name are empty, then new tensors
229 : * will be allocated.
230 : */
231 : std::vector<Var_Grad *>
232 : requestInputs(const GraphNode &node, const std::vector<TensorDim> &inputs_dim,
233 : const std::vector<std::string> &outputs_name = {});
234 :
235 : /**
236 : * @brief Get all the weights which match the above condition
237 : *
238 : * @return return the weights with satisfying the above condition
239 : */
240 : std::vector<Weight *>
241 : getWeights(const std::function<bool(const Weight *)> &condition = nullptr);
242 :
243 : /**
244 : * @brief Get the tensor execution orders
245 : *
246 : * @param name name of the tensor
247 : * @param is_weight check if this should be queried in weight pool
248 : * @return std::vector<unsigned int>
249 : */
250 : std::vector<unsigned int> getTensorExecutionOrders(const std::string &name,
251 : bool is_weight);
252 :
253 : /**
254 : * @brief Get the Min Max of a tensor execution order
255 : *
256 : * @param name name of the tensor
257 : * @param is_weight check if this should be queried in weight pool
258 : * @return std::pair<unsigned int, unsigned int>
259 : */
260 : std::pair<unsigned int, unsigned int>
261 : getMinMaxTensorExecutionOrder(const std::string &name, bool is_weight);
262 :
263 : /**
264 : * @brief Get the second max of a tensor execution order
265 : *
266 : * @param name name of the tensor
267 : * @param is_weight check if this should be queried in weight pool
268 : * @return 2nd max execution order value
269 : */
270 : unsigned int getSecondMaxTensorExecutionOrder(const std::string &name,
271 : bool is_weight);
272 :
273 : /**
274 : * @brief check if given execution order is the first access
275 : *
276 : * @param name tensor name
277 : * @param current_execution current execution
278 : * @param is_weight check if this should be queried in weight pool
279 : * @return bool true if given execution order first access
280 : */
281 : bool isFirstAccess(const std::string &name, unsigned current_execution,
282 : bool is_weight = false);
283 :
284 : /**
285 : * @brief check if given execution order is the last access
286 : *
287 : * @param name tensor name
288 : * @param current_execution current execution
289 : * @param is_weight check if this should be queried in weight pool
290 : * @return bool ture if given execution order is the last access
291 : */
292 : bool isLastAccess(const std::string &name, unsigned current_execution,
293 : bool is_weight = false);
294 :
295 : /**
296 : * @brief check if given execution order is the second last access
297 : *
298 : * @param name tensor name
299 : * @param current_execution current execution
300 : * @param is_weight check if this should be queried in weight pool
301 : * @return bool ture if given execution order is the second last access
302 : */
303 : bool isSecondLastAccess(const std::string &name, unsigned current_execution,
304 : bool is_weight = false);
305 :
306 : /**
307 : * @brief Check if the manager has allocated tensors
308 : *
309 : * @return true if tensors allocated, else false
310 : */
311 : bool isAllocated() const { return tensor_pool.isAllocated(); }
312 :
313 : /**
314 : * @brief Set the batch size for the inputs/outputs of the layers
315 : */
316 435 : void setBatchSize(unsigned int batch) {
317 : /**
318 : * All the tensors must be deallocated first by the called and then
319 : * allocated by the caller.
320 : */
321 4092 : for (auto &in : inputs_v2)
322 3657 : in->setBatchSize(batch);
323 3996 : for (auto &out : outputs_v2)
324 3561 : out->setBatchSize(batch);
325 435 : }
326 :
327 : /**
328 : * @brief Set the batch size for the given tensor
329 : *
330 : * @note this does not works for weights as they are supposed to be
331 : * independent of batch size.
332 : */
333 : void setBatchSize(const std::string &name, unsigned int batch) {
334 3053 : tensor_pool.setBatchSize(name, batch);
335 3053 : }
336 :
337 : /**
338 : * @brief Allocate memory for all the managed tensors
339 : *
340 : * @param[in] max_exec_order The maximum order of execution to determine
341 : * memory layout
342 : *
343 : * @note Any requested tensor which is not used inside the max_exec_order is
344 : * not initialized and will not be allocated. The initialization uses a memory
345 : * planner to plan the layout of all the tensors which are used at least once
346 : * before the max_exec_order.
347 : */
348 : void allocateTensors(unsigned int max_exec_order_);
349 :
350 : /**
351 : * @brief Deallocate memory for all the managed tensors
352 : */
353 : void deallocateTensors(bool dealloc_weights = false);
354 :
355 : /**
356 : * @brief Allocate memory for all the managed weights
357 : *
358 : * @param[in] max_exec_order The maximum order of execution to determine
359 : * memory layout
360 : *
361 : * @note Any requested tensor which is not used inside the max_exec_order is
362 : * not initialized and will not be allocated. The initialization uses a memory
363 : * planner to plan the layout of all the tensors which are used at least once
364 : * before the max_exec_order.
365 : *
366 : * @note this will make requests to the tensor pool and allocate the
367 : * corresponding weights
368 : */
369 : void allocateWeights(unsigned int max_exec_order_, bool init = true);
370 :
371 : /**
372 : * @brief Deallocate memory for all the weights
373 : */
374 : void deallocateWeights();
375 :
376 : /**
377 : * @brief Set optimizations for manager
378 : *
379 : * @param val true to enable, else false
380 : */
381 695 : void setOptimizations(bool val) { enable_optimizations = val; }
382 :
383 : /**
384 : * @brief Update externally dependent tensors
385 : *
386 : * @param name Name of the tensor
387 : * @param t External tensor
388 : */
389 : void fillPlaceholder(const std::string &name, const Tensor &t) {
390 15335 : tensor_pool.fillPlaceholder(name, t);
391 15335 : }
392 :
393 : /**
394 : * @brief Get the tensor of the given name
395 : *
396 : * @return ptr to the tensor with the given
397 : * @throws if no tensor is found with the given name
398 : */
399 1041 : Tensor *getTensor(const std::string &name) {
400 : try {
401 1041 : return tensor_pool.getTensor(name);
402 0 : } catch (...) {
403 : return weight_pool.getTensor(name);
404 0 : }
405 : }
406 :
407 : /**
408 : * @brief request Tensor with weight specification
409 : *
410 : * @param spec specification
411 : * @param identify_as identify as tensor as a group
412 : * @return Tensor* tensor
413 : */
414 : Tensor *requestTensor(const WeightSpecV2 &spec, TensorGroupType identify_as);
415 :
416 : /**
417 : * @brief request Tensor with variable + gradient specification
418 : *
419 : * @param spec specification
420 : * @param identify_as identify as tensor as a group
421 : * @param exec_order execution order to refer to
422 : * @param scope common scope to attach in front of current specification name
423 : * @param expose_var expose variable tensor out of graph, when allocation,
424 : * this tensor will be valid max_exec_order when allocation happens
425 : * @param expose_grad expose variable tensor out of graph, this tensor will be
426 : * valid max_exec_order when allocation happens
427 : * @return Tensor* tensor
428 : */
429 : Var_Grad *requestTensor(const VarGradSpecV2 &spec,
430 : TensorGroupType identify_as,
431 : const GraphNode::ExecutionOrder &exec_order,
432 : const std::string &scope = "",
433 : bool expose_var = false, bool expose_grad = false);
434 :
435 : /**
436 : * @brief request vector of tensors with variable + gradient specification
437 : *
438 : * @param spec specification
439 : * @param identify_as identify as tensor as a group
440 : * @param exec_order execution order to refer to
441 : * @param scope common scope to attach in front of current specification name
442 : * @param expose_var expose variable tensor out of graph, when
443 : * allocation, this tensor will be valid max_exec_order when allocation
444 : * happens
445 : * @param expose_grad expose variable tensor out of graph, this tensor will be
446 : * valid max_exec_order when allocation happens
447 : * @return Tensor* tensor
448 : */
449 : std::vector<Var_Grad *> requestTensors(
450 : const std::vector<VarGradSpecV2> &specs, TensorGroupType identify_as,
451 : const GraphNode::ExecutionOrder &exec_order, const std::string &scope = "",
452 : bool expose_var = false, bool expose_grad = false);
453 :
454 : /**
455 : * @brief flush cache data
456 : */
457 : void flushCache();
458 :
459 : /**
460 : * @brief flush cache data except the order
461 : *
462 : * @param order except execution order
463 : * @param lookahead preloading size
464 : * @note preloading loads execution order data asynchronously,
465 : * for lookahead size. If new flush request arrives,
466 : * it waits previous preloading is completed and invokes new one.
467 : */
468 : void flushCacheExcept(unsigned int order);
469 :
470 : /**
471 : * @brief load cache data for the execution order
472 : *
473 : * @param order execution order
474 : * @param remainder_lookahead remain look_ahead
475 : * @note preloading loads execution order data asynchronously,
476 : * for lookahead size.
477 : */
478 : void LoadTensors(unsigned int order, unsigned int remainder_lookahead = 0);
479 :
480 : /**
481 : * @brief check completion of load data for the execution order
482 : *
483 : * @param order execution order
484 : * @note preloading tensors for execution order.
485 : */
486 : bool checkLoadComplete(unsigned int order);
487 :
488 : /**
489 : * @brief check completion of unload data for the execution order
490 : *
491 : * @param order execution order
492 : * @note preloading tensors for execution order.
493 : */
494 : bool checkUnloadComplete(unsigned int order);
495 :
496 : /**
497 : * @brief load cache data for the execution order
498 : *
499 : * @param order execution order
500 : * @param lookahead look ahead value to load from execution order
501 : * @note preloading loads execution order data asynchronously,
502 : * for lookahead size.
503 : */
504 : void LoadFsuTensors(unsigned int order, unsigned int lookahead);
505 :
506 : /**
507 : * @brief flush load data for the execution order
508 : *
509 : * @param order execution order
510 : * @note flush tensors for execution order.
511 : */
512 : void UnloadTensors(unsigned int order);
513 :
514 : /**
515 : * @brief reinitialize manager
516 : */
517 : void reinitialize();
518 :
519 : /**
520 : * @brief set Execution Mode
521 : */
522 : void setExecutionMode(ExecutionMode mode = ExecutionMode::TRAIN) {
523 622 : exec_mode = mode;
524 : };
525 :
526 : /**
527 : * @brief return if it is mixed precsion
528 : */
529 4826 : bool isMixedPrecision() { return !istrequal(tensor_dtype[0], "FP32"); }
530 :
531 : /**
532 : * @brief set Inactive elems in order
533 : *
534 : */
535 : unsigned int inActive(unsigned int order);
536 :
537 : /**
538 : * @brief set FSU weight path
539 : *
540 : * @param path FSU weight file path
541 : */
542 0 : void setFsuWeightPath(std::string path) {
543 0 : weight_pool.setFsuWeightPath(path);
544 0 : }
545 :
546 : /**
547 : * @brief set weight file offset for FSU loading
548 : *
549 : * @param offsets weight file offset
550 : */
551 0 : void setWeightOffset(std::vector<std::pair<size_t, size_t>> offsets) {
552 0 : weight_pool.setWeightOffset(offsets);
553 0 : }
554 :
555 : private:
556 : /** @todo: merge this list to one */
557 : std::vector<std::unique_ptr<Weight>> weights_v2; /**< weights for the layers
558 : */
559 : std::vector<std::unique_ptr<Var_Grad>>
560 : inputs_v2; /**< inputs for the layers */
561 : std::vector<std::unique_ptr<Var_Grad>>
562 : outputs_v2; /**< outputs for the layers */
563 : std::vector<std::unique_ptr<Var_Grad>>
564 : tensors_v2; /**< extra tensors required by the layers */
565 :
566 : std::array<std::vector<std::unique_ptr<Var_Grad>>, NUM_TENSOR_GROUP_TYPE>
567 : tensor_book; /**< reference to tensor book kept */
568 :
569 : TensorPool weight_pool; /**< tensor pool to request tensors */
570 : TensorPool tensor_pool; /**< tensor pool to request tensors */
571 :
572 : /** async load task <execution order, weight completed id> */
573 : std::map<unsigned int, int> async_task_weight_load;
574 :
575 : /** async unload task <execution order, weight completed id> */
576 : std::map<unsigned int, int> async_task_weight_unload;
577 :
578 : /**< async tasks <execution order, <weight_pool completed id, tensor_pool
579 : * completed id>>
580 : */
581 :
582 : std::map<unsigned int, std::tuple<int, int>> async_task_eos;
583 : /**< async tasks <execution order, <weight_pool completed id, tensor_pool
584 : * completed id>>
585 : */
586 : std::map<unsigned int, std::tuple<int, int>> async_load_tensor;
587 :
588 : std::map<unsigned int, std::tuple<int, int>> async_unload_tensor;
589 :
590 : std::map<int, std::promise<bool>> completed;
591 :
592 : std::map<int, std::promise<bool>> completed_load_tensor;
593 :
594 : std::map<int, std::future<TaskExecutor::CompleteStatus>> completed_load_fut;
595 :
596 : std::map<int, std::promise<bool>> completed_unload_tensor;
597 :
598 : /**< async tasks completion <task id, promise> */
599 : std::mutex completed_mutex; /**< mutex for async tasks completion */
600 :
601 : std::mutex completed_load_mutex; /**< mutex for async tasks completion */
602 :
603 : std::mutex completed_unload_mutex; /**< mutex for async tasks completion */
604 :
605 : bool enable_fsu; /**< to enable fsu */
606 :
607 : bool enable_optimizations; /**< to enable memory optimizations */
608 :
609 : unsigned int fsu_lookahead; /** lookahead for memory fsu */
610 :
611 : std::string tensor_format;
612 :
613 : std::vector<std::string> tensor_dtype;
614 :
615 : ExecutionMode exec_mode;
616 :
617 : unsigned int max_exec_order = 0;
618 :
619 : /**
620 : * @brief Finalize the given tensor pool
621 : *
622 : * @param pool Tensor pool to finalize
623 : * @param start Start execution order
624 : * @param end End execution order
625 : */
626 : void finalizeTensorPool(TensorPool &pool, unsigned int start,
627 : unsigned int end);
628 : };
629 :
630 : } // namespace nntrainer
631 :
632 : #endif /* __cplusplus */
633 : #endif /* __MANAGER_H__ */
|