Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file tensor_pool.cpp
6 : * @date 19 Aug 2021
7 : * @brief This is TensorPool for all requested tensors
8 : * @see https://github.com/nnstreamer/nntrainer
9 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
10 : * @author Jihoon Lee <jhoon.it.lee@samsung.com>
11 : * @bug No known bugs except for NYI items
12 : *
13 : * @todo add checks for request/updates that finalize is not done
14 : * @todo check before allocate that finalize is done
15 : */
16 :
17 : #include <memory_pool.h>
18 : #include <nntrainer_log.h>
19 : #include <tensor.h>
20 : #include <tensor_pool.h>
21 : #include <tensor_wrap_specs.h>
22 : #include <util_func.h>
23 :
24 : namespace nntrainer {
25 :
26 : /**
27 : * @brief Request tensor with the given spec
28 : *
29 : * @note returns empty tensor which will be filled when allocate is called.
30 : * @note we assume that the caller checks if the exec_order and lifespan are
31 : * compatible.
32 : */
33 22470 : Tensor *TensorPool::request(const std::string &name, const TensorDim &dim,
34 : const std::vector<unsigned int> &exec_order,
35 : TensorLifespan lifespan, const Initializer &init,
36 : bool is_weight_grad) {
37 :
38 22470 : bool is_virtual = lifespan == TensorLifespan::VIRTUAL;
39 22470 : lifespan = is_virtual ? TensorLifespan::UNMANAGED : lifespan;
40 22470 : return registerRequestSpec(
41 : {is_weight_grad,
42 44935 : std::make_unique<Tensor>(dim, false, init, name,
43 44935 : QScheme::PER_CHANNEL_AFFINE, is_virtual),
44 44935 : TensorPool::SourceDetails{0, lifespan, exec_order, {}}});
45 22475 : }
46 :
47 : /**
48 : * @brief Request tensor with the given spec
49 : *
50 : * @note returns empty tensor which will be filled when allocate is called.
51 : */
52 1612 : Tensor *TensorPool::placeholder(const std::string &name, const TensorDim &dim) {
53 1612 : return request(name, dim, {}, TensorLifespan::UNMANAGED);
54 : }
55 :
56 : /**
57 : * @brief Request tensor which has been already requested with the given
58 : * spec
59 : *
60 : * @note returns empty tensor which will be filled when allocate is called.
61 : * @note we assume that the caller checks if the exec_order and lifespan are
62 : * compatible.
63 : */
64 11680 : Tensor *TensorPool::view(const std::string &name, const std::string &reference,
65 : const TensorDim &dim,
66 : const std::vector<unsigned int> &exec_order,
67 : TensorLifespan lifespan, const size_t offset) {
68 11680 : auto &spec = getSourceSpec(reference);
69 :
70 11676 : NNTR_THROW_IF(spec.tensor->getDataType() != dim.getDataType() ||
71 : spec.tensor->getFormat() != dim.getFormat(),
72 : std::invalid_argument)
73 : << "view tensor type != source tensor type, view tensor type: " << dim
74 0 : << " source tensor: " << spec.tensor->getDim();
75 :
76 : unsigned adjusted_offset = std::visit(
77 : [](const auto &s) {
78 : using T = std::decay_t<decltype(s)>;
79 : if constexpr (std::is_same_v<T, SourceDetails>) {
80 : return 0u;
81 : } else if constexpr (std::is_same_v<T, DependentDetails>) {
82 3905 : return s.offset;
83 : }
84 : return 0u;
85 : },
86 11676 : pool[name_map.at(reference)].details);
87 11676 : adjusted_offset += offset;
88 :
89 11676 : NNTR_THROW_IF(spec.tensor->getDim().getDataLen() <
90 : adjusted_offset + dim.getDataLen(),
91 : std::invalid_argument)
92 : << "view tensor size + offset > source tensor size, view tensor size: "
93 4 : << dim.getDataLen() << " offset: " << adjusted_offset
94 8 : << " source tensor: " << spec.tensor->getDim().getDataLen()
95 4 : << " name: " << spec.tensor->getName();
96 :
97 11672 : expandLifespan(spec, exec_order, lifespan);
98 11671 : std::get<SourceDetails>(spec.details).dependents.push_back(pool.size());
99 :
100 : /** @note below invalidates spec reference */
101 : /** @note in case of view of view, internal datastructure saves the src to
102 : * view index, not view to view reference in order to flatten depth */
103 11671 : auto parent_idx = name_map.at(spec.tensor->getName());
104 :
105 : /** @note default is_weight_grad for view is false. view is for the
106 : * activation. */
107 11671 : return registerRequestSpec(
108 23338 : {false, std::make_unique<Tensor>(dim, false, Initializer::NONE, name),
109 11667 : TensorPool::DependentDetails{parent_idx, adjusted_offset}});
110 11671 : }
111 :
112 : /**
113 : * @brief finalize the requested tensors
114 : *
115 : * @details finalize the requested tensors, request memory for them and plan
116 : * layout for their allocations.
117 : */
118 1316 : void TensorPool::finalize(const MemoryPlanner &planner,
119 : unsigned int start_order, unsigned int end_order) {
120 1316 : mem_pool->clear();
121 : unsigned int bytes_requested = 0;
122 : /** if execution order is PERSIST_END_ORDER, then we think it has another
123 : * execution order for gradient clipping
124 : * persist_end_order is for checking if the end order is updated */
125 : bool persist_end_order = false;
126 : unsigned int old_end_order = end_order;
127 :
128 41519 : for (auto &spec : pool) {
129 :
130 : auto details = std::get_if<SourceDetails>(&spec.details);
131 29069 : if (!details || details->lifespan == TensorLifespan::UNMANAGED ||
132 : details->exec_order.empty()) {
133 21109 : continue;
134 : }
135 27243 : details->token = 0;
136 :
137 : /**
138 : * 1. create the validity ranges for the all the requested tensors.
139 : * validity_start/validity_end should be a value in the exec order of the
140 : * given tensor or a value out of range so as to not request memory for
141 : * this tensor
142 : */
143 27243 : unsigned int validity_start = end_order + 1;
144 111101 : for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
145 83858 : if (details->exec_order[idx] >= start_order)
146 83858 : validity_start = std::min(validity_start, details->exec_order[idx]);
147 : /** This is to enforce not to reach if the execution order is greater
148 : * than backwarding end order. e.g., for the input layer, the
149 : * backwarding is not reached but the exeuction order is assigned.
150 : * */
151 83858 : if (details->exec_order[idx] > old_end_order &&
152 : details->exec_order[idx] != PERSIST_END_ORDER) {
153 29800 : details->exec_order[idx] = PERSIST_END_ORDER - 1;
154 : }
155 : }
156 27243 : unsigned int validity_end = validity_start;
157 111093 : for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
158 83858 : if (details->exec_order[idx] == PERSIST_END_ORDER) {
159 8 : if (!persist_end_order) {
160 : end_order = end_order + 1;
161 : persist_end_order = true;
162 : }
163 8 : validity_end = end_order;
164 8 : details->exec_order[idx] = validity_end;
165 8 : break;
166 : }
167 :
168 83850 : if (details->exec_order[idx] <= end_order) {
169 54050 : validity_end = std::max(validity_end, details->exec_order[idx]);
170 : }
171 : }
172 : /**
173 : * use lifespan to update the validity.
174 : * if the validity is long term, the tensor must stay valid for the
175 : * complete duration.
176 : */
177 27243 : if (isTensorLongTerm(details->lifespan)) {
178 5011 : validity_start = start_order;
179 5011 : validity_end = end_order;
180 : }
181 :
182 : /** 2. for each tensor request if it is in the provided range */
183 27243 : if (validity_end < start_order || validity_start > end_order) {
184 8149 : continue;
185 : }
186 :
187 : /**
188 : * 3. requestMemory for all the tensors and set their tokens
189 : * @note +1 is to make the validity_end exlusive in the interval range
190 : */
191 19094 : details->token = mem_pool->requestMemory(
192 : spec.tensor->getMemoryBytes(), validity_start, validity_end + 1,
193 19094 : details->exec_order, details->lifespan, spec.is_weight_grad);
194 : #ifdef DEBUG
195 : if (details->token == 0)
196 : throw std::runtime_error("Received invalid token from memory pool");
197 : #endif
198 :
199 19094 : bytes_requested += spec.tensor->getMemoryBytes();
200 : }
201 :
202 : /** 4. finalizeLayout for the memory pool. */
203 1316 : if (bytes_requested > 0) {
204 1309 : double efficiency = mem_pool->planLayout(planner);
205 2618 : ml_logd("Memory layout efficiency = %lf", efficiency);
206 : }
207 1316 : }
208 :
209 : /**
210 : * @brief Set the batch size for the inputs/outputs of the layers
211 : */
212 3056 : void TensorPool::setBatchSize(const std::string &name, unsigned int batch) {
213 3056 : if (name_map.find(name) == name_map.end())
214 1 : throw std::invalid_argument("Requested tensor not found");
215 :
216 3055 : pool[name_map[name]].tensor->updateBatch(batch);
217 3054 : }
218 :
219 : /**
220 : * @brief Allocate memory for all the managed tensors
221 : */
222 1314 : void TensorPool::allocate(bool init) {
223 1314 : if (minMemoryRequirement() == 0)
224 : return;
225 1308 : mem_pool->allocate();
226 :
227 : /** set the pointers using the token for all the tensors */
228 41505 : for (auto &spec : pool) {
229 : auto details = std::get_if<SourceDetails>(&spec.details);
230 29063 : if (!details || details->token == 0) {
231 21105 : continue;
232 : }
233 19092 : spec.tensor->setData(mem_pool->getMemory(details->token), 0, init);
234 :
235 19092 : syncDependents(spec);
236 : }
237 :
238 1308 : if (cache_loader) {
239 0 : cache_loader->init();
240 : }
241 : }
242 :
243 : /**
244 : * @brief Deallocate memory for all the managed tensors
245 : */
246 4031 : void TensorPool::deallocate() {
247 4031 : if (cache_loader)
248 0 : cache_loader->finish();
249 :
250 4031 : mem_pool->deallocate();
251 :
252 : /** nullify the data pointers for the tensors */
253 89233 : for (auto &spec : pool) {
254 170404 : spec.tensor->setData(nullptr);
255 : }
256 4031 : }
257 :
258 : const std::vector<unsigned int> &
259 13559 : TensorPool::getExecutionOrder(const std::string &name) {
260 13559 : return std::get<SourceDetails>(getSourceSpec(name).details).exec_order;
261 : }
262 :
263 : /**
264 : * @brief Expand the lifespan of the tensor with the given name
265 : *
266 : */
267 : TensorPool::RequestSpec &
268 0 : TensorPool::expandLifespan(const std::string &name,
269 : const std::vector<unsigned> &exec_order,
270 : TensorLifespan lifespan) {
271 0 : auto &spec = getSourceSpec(name);
272 0 : expandLifespan(spec, exec_order, lifespan);
273 0 : return spec;
274 : }
275 :
276 13109 : void TensorPool::expandLifespan(RequestSpec &spec,
277 : const std::vector<unsigned int> &exec_order,
278 : TensorLifespan lifespan) {
279 : auto &details = std::get<SourceDetails>(spec.details);
280 13110 : NNTR_THROW_IF((details.lifespan != TensorLifespan::UNMANAGED &&
281 : lifespan == TensorLifespan::UNMANAGED),
282 : std::invalid_argument)
283 : << "Extending to lifespan to unmanaged is not possible for name: "
284 1 : << spec.tensor->getName();
285 :
286 13108 : if (details.lifespan != TensorLifespan::UNMANAGED) {
287 : /// update only if lifespan is unmanaged
288 11613 : details.lifespan =
289 : enum_class_or<TensorLifespan>(details.lifespan, lifespan);
290 : }
291 13108 : details.exec_order.insert(details.exec_order.end(), exec_order.begin(),
292 : exec_order.end());
293 13108 : }
294 :
295 34428 : void TensorPool::syncDependents(const RequestSpec &spec) {
296 : /// @note syncing dependents of dependents is invalid and will throw.
297 : auto &dependents = std::get<SourceDetails>(spec.details).dependents;
298 53265 : for (auto &dep : dependents) {
299 18837 : auto &dep_spec = pool.at(dep);
300 18837 : auto offset = std::get<DependentDetails>(dep_spec.details).offset;
301 :
302 37674 : dep_spec.tensor->setData(spec.tensor->getMemoryData(),
303 18837 : spec.tensor->getOffset() + offset);
304 : }
305 34428 : }
306 :
307 34141 : Tensor *TensorPool::registerRequestSpec(RequestSpec &&spec) {
308 34141 : auto &name = spec.tensor->getName();
309 34141 : if (name_map.find(name) != name_map.end())
310 5 : throw std::invalid_argument("Cannot request tensor with same name");
311 :
312 34136 : if (spec.tensor->empty())
313 2 : throw std::invalid_argument("Cannot request tensor with size 0");
314 :
315 34134 : if (name.empty())
316 2 : throw std::invalid_argument("Cannot request tensor with empty name");
317 :
318 34132 : pool.push_back(std::move(spec));
319 34132 : name_map[name] = pool.size() - 1;
320 :
321 34132 : return pool.back().tensor.get();
322 : }
323 :
324 42012 : TensorPool::RequestSpec &TensorPool::getSourceSpec(const std::string &name) {
325 42002 : RequestSpec *rs = &pool.at(name_map.at(name));
326 : while (auto dep_details = std::get_if<DependentDetails>(&rs->details)) {
327 3939 : rs = &pool.at(dep_details->parent_idx);
328 3939 : }
329 :
330 42002 : return *rs;
331 : }
332 :
333 15336 : void TensorPool::fillPlaceholder(const std::string &name, const Tensor &t) {
334 15336 : auto &spec = getSourceSpec(name);
335 : auto &details = std::get<SourceDetails>(spec.details);
336 15336 : NNTR_THROW_IF(details.lifespan != TensorLifespan::UNMANAGED,
337 : std::invalid_argument)
338 : << "Cannot set external tensor for non-zero lifespan for " << name;
339 :
340 16911 : NNTR_THROW_IF(t.size() == 0 && t.getData(), std::invalid_argument)
341 : << "Error: setting invalid external tensor size 0 for " << name;
342 :
343 15336 : NNTR_THROW_IF(t.size() != 0 && t.size() < spec.tensor->size(),
344 : std::invalid_argument)
345 : << "Error: setting external tensor of smaller size for "
346 0 : << spec.tensor->getName() << "(maybe view of " << name << ")";
347 :
348 15336 : spec.tensor->setData(t.getMemoryData(), t.getOffset());
349 15336 : syncDependents(spec);
350 15336 : }
351 :
352 1439 : Tensor *TensorPool::extend(const std::string &name, const TensorDim &dim,
353 : const std::vector<unsigned int> &exec_order,
354 : TensorLifespan lifespan) {
355 1441 : NNTR_THROW_IF(!tensorExist(name), std::invalid_argument)
356 : << " cannot extend tensor which does not exist, name: " << name;
357 1437 : auto &spec = getSourceSpec(name);
358 1437 : NNTR_THROW_IF(dim != spec.tensor->getDim(), std::invalid_argument)
359 : << "Cannot extend tensor with different dimension";
360 1437 : spec.is_weight_grad = false;
361 1437 : expandLifespan(spec, exec_order, lifespan);
362 1437 : return getTensor(name);
363 : }
364 :
365 3847 : Tensor *TensorPool::requestOrExtend(const std::string &name,
366 : const TensorDim &dim,
367 : const std::vector<unsigned int> &exec_order,
368 : TensorLifespan lifespan,
369 : const Initializer &init) {
370 3848 : NNTR_THROW_IF(lifespan == TensorLifespan::UNMANAGED, std::invalid_argument)
371 : << "unmanaged life span is not supported";
372 :
373 3846 : if (tensorExist(name)) {
374 : Tensor *t = getTensor(name);
375 1436 : NNTR_THROW_IF(t->getDim() != dim, std::invalid_argument)
376 : << "tensor dimension mismatch for requestOrExtend name: " << name;
377 1435 : NNTR_THROW_IF(t->getInitializer() != init, std::invalid_argument)
378 : << "tensor initializer mismatch for requestOrExtend name: " << name;
379 1433 : return extend(name, dim, exec_order, lifespan);
380 : } else {
381 2411 : return request(name, dim, exec_order, lifespan, init);
382 : }
383 : }
384 :
385 0 : void TensorPool::reidentifySource(const std::string &dest,
386 : const std::string &new_src,
387 : unsigned int offset) {
388 : /// @todo add test
389 : /// source tensor of dest tensor becomes a view of new_src
390 0 : auto &old_spec = getSourceSpec(dest);
391 : auto &old_details = std::get<SourceDetails>(old_spec.details);
392 :
393 : /// 1. extend new_src with old src
394 0 : auto &new_spec = getSourceSpec(new_src);
395 0 : expandLifespan(new_spec, old_details.exec_order, old_details.lifespan);
396 0 : auto &new_dependents = std::get<SourceDetails>(new_spec.details).dependents;
397 0 : new_dependents.insert(new_dependents.end(), old_details.dependents.begin(),
398 : old_details.dependents.end());
399 :
400 : /// 2. calcaulate base offset from the new_src
401 0 : auto new_parent_idx = name_map.at(new_src);
402 : unsigned base_offset = std::visit(
403 : [](const auto &s) {
404 : using T = std::decay_t<decltype(s)>;
405 : if constexpr (std::is_same_v<T, SourceDetails>) {
406 : return 0u;
407 : } else if constexpr (std::is_same_v<T, DependentDetails>) {
408 0 : return s.offset;
409 : }
410 : return 0u;
411 : },
412 0 : pool[new_parent_idx].details);
413 0 : base_offset += offset;
414 :
415 : /// 3. transform parent idx/offset of old src's dependents base on the offset
416 0 : for (auto &dep : old_details.dependents) {
417 0 : auto &dep_spec = pool.at(dep);
418 : auto &details = std::get<DependentDetails>(dep_spec.details);
419 0 : details.offset += base_offset;
420 0 : details.parent_idx = new_parent_idx;
421 : }
422 :
423 : /// 4. replace old details to dependent srcs
424 0 : old_spec.details = DependentDetails{new_parent_idx, base_offset};
425 0 : }
426 :
427 5285 : bool TensorPool::tensorExist(const std::string &name) {
428 : /// @todo consider use a helper function to check, eg) something like
429 : /// getTensor()
430 5285 : return name_map.count(name);
431 : }
432 :
433 : /**
434 : * @brief Check if the lifespan leads to long term valitidy
435 : *
436 : */
437 27243 : bool TensorPool::isTensorLongTerm(const TensorLifespan &lifespan) {
438 27243 : switch (lifespan) {
439 : case TensorLifespan::EPOCH_LIFESPAN:
440 : [[fallthrough]];
441 : case TensorLifespan::FORWARD_INFER_LIFESPAN:
442 : [[fallthrough]];
443 : case TensorLifespan::MAX_LIFESPAN:
444 : return true;
445 22232 : case TensorLifespan::FORWARD_FUNC_LIFESPAN:
446 : [[fallthrough]];
447 : case TensorLifespan::BACKWARD_FUNC_LIFESPAN:
448 : [[fallthrough]];
449 : case TensorLifespan::ITERATION_LIFESPAN:
450 : [[fallthrough]];
451 : case TensorLifespan::UNMANAGED:
452 : [[fallthrough]];
453 : default:
454 22232 : return false;
455 : }
456 : }
457 :
458 10800 : void TensorPool::flushCache() {
459 10800 : if (auto pool = dynamic_cast<CachePool *>(mem_pool.get()))
460 0 : pool->flush();
461 10800 : }
462 :
463 188358 : void TensorPool::flushCacheExcept(unsigned int order) {
464 188358 : if (auto pool = dynamic_cast<CachePool *>(mem_pool.get()))
465 0 : pool->flushExcept(order);
466 188358 : }
467 :
468 0 : void TensorPool::loadCacheExec(unsigned int order) {
469 0 : if (dynamic_cast<CachePool *>(mem_pool.get()))
470 0 : cache_loader->loadAllinOrder(order);
471 0 : }
472 :
473 0 : int TensorPool::loadCacheExecAsync(
474 : unsigned int order, TaskExecutor::CompleteCallback complete_callback) {
475 :
476 0 : if (dynamic_cast<CachePool *>(mem_pool.get()))
477 0 : return cache_loader->loadAllinOrder(order);
478 : else
479 : return 0;
480 : }
481 :
482 0 : bool TensorPool::checkLoadComplete(unsigned int order) {
483 0 : if (dynamic_cast<CachePool *>(mem_pool.get()))
484 0 : return cache_loader->checkAllLoadComplete(order);
485 : else
486 : return true;
487 : }
488 :
489 0 : int TensorPool::flushCacheExecAsync(
490 : unsigned int order, TaskExecutor::CompleteCallback complete_callback) {
491 0 : if (dynamic_cast<CachePool *>(mem_pool.get()))
492 0 : return cache_loader->unloadAllinOrder(order);
493 : else
494 : return 0;
495 : }
496 :
497 0 : void TensorPool::loadCacheCancel(int id) {
498 0 : if (dynamic_cast<CachePool *>(mem_pool.get()) == nullptr)
499 : return;
500 :
501 0 : cache_loader->cancelAsync(id);
502 : }
503 :
504 0 : unsigned int TensorPool::inActive(unsigned int order) {
505 0 : return cache_loader->inActive(order);
506 : }
507 :
508 : } // namespace nntrainer
|