LCOV - coverage_filtered.info - nntrainer/tensor/tensor

LCOV - code coverage report

Current view:	top level - nntrainer/tensor - tensor_pool.cpp (source / functions)		Coverage	Total	Hit
Test:	coverage_filtered.info	Lines:	76.8 %	190	146
Test Date:	2026-01-12 20:43:37	Functions:	70.4 %	27	19

            Line data    Source code

       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
       4              :  *
       5              :  * @file   tensor_pool.cpp
       6              :  * @date   19 Aug 2021
       7              :  * @brief  This is TensorPool for all requested tensors
       8              :  * @see    https://github.com/nntrainer/nntrainer
       9              :  * @author Parichay Kapoor <pk.kapoor@samsung.com>
      10              :  * @author Jihoon Lee <jhoon.it.lee@samsung.com>
      11              :  * @bug    No known bugs except for NYI items
      12              :  *
      13              :  * @todo   add checks for request/updates that finalize is not done
      14              :  * @todo   check before allocate that finalize is done
      15              :  */
      16              : 
      17              : #include <memory_pool.h>
      18              : #include <nntrainer_log.h>
      19              : #include <tensor.h>
      20              : #include <tensor_pool.h>
      21              : #include <tensor_wrap_specs.h>
      22              : #include <util_func.h>
      23              : 
      24              : namespace nntrainer {
      25              : 
      26              : /**
      27              :  * @brief     Request tensor with the given spec
      28              :  *
      29              :  * @note returns empty tensor which will be filled when allocate is called.
      30              :  * @note we assume that the caller checks if the exec_order and lifespan are
      31              :  * compatible.
      32              :  */
      33        22470 : Tensor *TensorPool::request(const std::string &name, const TensorDim &dim,
      34              :                             const std::vector<unsigned int> &exec_order,
      35              :                             TensorLifespan lifespan, const Initializer &init,
      36              :                             bool is_weight_grad) {
      37              : 
      38        22470 :   bool is_virtual = lifespan == TensorLifespan::VIRTUAL;
      39        22470 :   lifespan = is_virtual ? TensorLifespan::UNMANAGED : lifespan;
      40        22470 :   return registerRequestSpec(
      41              :     {is_weight_grad,
      42        44935 :      std::make_unique<Tensor>(dim, false, init, name,
      43        44935 :                               QScheme::PER_CHANNEL_AFFINE, is_virtual),
      44        44935 :      TensorPool::SourceDetails{0, lifespan, exec_order, {}}});
      45        22475 : }
      46              : 
      47              : /**
      48              :  * @brief     Request tensor with the given spec
      49              :  *
      50              :  * @note returns empty tensor which will be filled when allocate is called.
      51              :  */
      52         1612 : Tensor *TensorPool::placeholder(const std::string &name, const TensorDim &dim) {
      53         1612 :   return request(name, dim, {}, TensorLifespan::UNMANAGED);
      54              : }
      55              : 
      56              : /**
      57              :  * @brief     Request tensor which has been already requested with the given
      58              :  * spec
      59              :  *
      60              :  * @note returns empty tensor which will be filled when allocate is called.
      61              :  * @note we assume that the caller checks if the exec_order and lifespan are
      62              :  * compatible.
      63              :  */
      64        11680 : Tensor *TensorPool::view(const std::string &name, const std::string &reference,
      65              :                          const TensorDim &dim,
      66              :                          const std::vector<unsigned int> &exec_order,
      67              :                          TensorLifespan lifespan, const size_t offset) {
      68        11680 :   auto &spec = getSourceSpec(reference);
      69              : 
      70        11676 :   NNTR_THROW_IF(spec.tensor->getDataType() != dim.getDataType() ||
      71              :                   spec.tensor->getFormat() != dim.getFormat(),
      72              :                 std::invalid_argument)
      73              :     << "view tensor type != source tensor type, view tensor type: " << dim
      74            0 :     << " source tensor: " << spec.tensor->getDim();
      75              : 
      76              :   unsigned adjusted_offset = std::visit(
      77              :     [](const auto &s) {
      78              :       using T = std::decay_t<decltype(s)>;
      79              :       if constexpr (std::is_same_v<T, SourceDetails>) {
      80              :         return 0u;
      81              :       } else if constexpr (std::is_same_v<T, DependentDetails>) {
      82         3905 :         return s.offset;
      83              :       }
      84              :       return 0u;
      85              :     },
      86        11676 :     pool[name_map.at(reference)].details);
      87        11676 :   adjusted_offset += offset;
      88              : 
      89        11676 :   NNTR_THROW_IF(spec.tensor->getDim().getDataLen() <
      90              :                   adjusted_offset + dim.getDataLen(),
      91              :                 std::invalid_argument)
      92              :     << "view tensor size + offset > source tensor size, view tensor size: "
      93            4 :     << dim.getDataLen() << " offset: " << adjusted_offset
      94            8 :     << " source tensor: " << spec.tensor->getDim().getDataLen()
      95            4 :     << " name: " << spec.tensor->getName();
      96              : 
      97        11672 :   expandLifespan(spec, exec_order, lifespan);
      98        11671 :   std::get<SourceDetails>(spec.details).dependents.push_back(pool.size());
      99              : 
     100              :   /** @note below invalidates spec reference */
     101              :   /** @note in case of view of view, internal datastructure saves the src to
     102              :    * view index, not view to view reference in order to flatten depth */
     103        11671 :   auto parent_idx = name_map.at(spec.tensor->getName());
     104              : 
     105              :   /** @note default is_weight_grad for view is false. view is for the
     106              :    * activation. */
     107        11671 :   return registerRequestSpec(
     108        23338 :     {false, std::make_unique<Tensor>(dim, false, Initializer::NONE, name),
     109        11667 :      TensorPool::DependentDetails{parent_idx, adjusted_offset}});
     110        11671 : }
     111              : 
     112              : /**
     113              :  * @brief finalize the requested tensors
     114              :  *
     115              :  * @details finalize the requested tensors, request memory for them and plan
     116              :  * layout for their allocations.
     117              :  */
     118         1316 : void TensorPool::finalize(const MemoryPlanner &planner,
     119              :                           unsigned int start_order, unsigned int end_order) {
     120         1316 :   mem_pool->clear();
     121              :   unsigned int bytes_requested = 0;
     122              :   /** if execution order is PERSIST_END_ORDER, then we think it has another
     123              :    * execution order for gradient clipping
     124              :    *  persist_end_order is for checking if the end order is updated */
     125              :   bool persist_end_order = false;
     126              :   unsigned int old_end_order = end_order;
     127              : 
     128        41519 :   for (auto &spec : pool) {
     129              : 
     130              :     auto details = std::get_if<SourceDetails>(&spec.details);
     131        29069 :     if (!details || details->lifespan == TensorLifespan::UNMANAGED ||
     132              :         details->exec_order.empty()) {
     133        21109 :       continue;
     134              :     }
     135        27243 :     details->token = 0;
     136              : 
     137              :     /**
     138              :      * 1. create the validity ranges for the all the requested tensors.
     139              :      * validity_start/validity_end should be a value in the exec order of the
     140              :      * given tensor or a value out of range so as to not request memory for
     141              :      * this tensor
     142              :      */
     143        27243 :     unsigned int validity_start = end_order + 1;
     144       111101 :     for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
     145        83858 :       if (details->exec_order[idx] >= start_order)
     146        83858 :         validity_start = std::min(validity_start, details->exec_order[idx]);
     147              :       /** This is to enforce not to reach if the execution order is greater
     148              :        * than backwarding end order. e.g., for the input layer, the
     149              :        * backwarding is not reached but the exeuction order is assigned.
     150              :        * */
     151        83858 :       if (details->exec_order[idx] > old_end_order &&
     152              :           details->exec_order[idx] != PERSIST_END_ORDER) {
     153        29800 :         details->exec_order[idx] = PERSIST_END_ORDER - 1;
     154              :       }
     155              :     }
     156        27243 :     unsigned int validity_end = validity_start;
     157       111093 :     for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
     158        83858 :       if (details->exec_order[idx] == PERSIST_END_ORDER) {
     159            8 :         if (!persist_end_order) {
     160              :           end_order = end_order + 1;
     161              :           persist_end_order = true;
     162              :         }
     163            8 :         validity_end = end_order;
     164            8 :         details->exec_order[idx] = validity_end;
     165            8 :         break;
     166              :       }
     167              : 
     168        83850 :       if (details->exec_order[idx] <= end_order) {
     169        54050 :         validity_end = std::max(validity_end, details->exec_order[idx]);
     170              :       }
     171              :     }
     172              :     /**
     173              :      * use lifespan to update the validity.
     174              :      * if the validity is long term, the tensor must stay valid for the
     175              :      * complete duration.
     176              :      */
     177        27243 :     if (isTensorLongTerm(details->lifespan)) {
     178         5011 :       validity_start = start_order;
     179         5011 :       validity_end = end_order;
     180              :     }
     181              : 
     182              :     /** 2. for each tensor request if it is in the provided range */
     183        27243 :     if (validity_end < start_order || validity_start > end_order) {
     184         8149 :       continue;
     185              :     }
     186              : 
     187              :     /**
     188              :      * 3. requestMemory for all the tensors and set their tokens
     189              :      * @note +1 is to make the validity_end exlusive in the interval range
     190              :      */
     191        19094 :     details->token = mem_pool->requestMemory(
     192              :       spec.tensor->getMemoryBytes(), validity_start, validity_end + 1,
     193        19094 :       details->exec_order, details->lifespan, spec.is_weight_grad);
     194              : #ifdef DEBUG
     195              :     if (details->token == 0)
     196              :       throw std::runtime_error("Received invalid token from memory pool");
     197              : #endif
     198              : 
     199        19094 :     bytes_requested += spec.tensor->getMemoryBytes();
     200              :   }
     201              : 
     202              :   /** 4. finalizeLayout for the memory pool. */
     203         1316 :   if (bytes_requested > 0) {
     204         1309 :     double efficiency = mem_pool->planLayout(planner);
     205         2618 :     ml_logd("Memory layout efficiency = %lf", efficiency);
     206              :   }
     207         1316 : }
     208              : 
     209              : /**
     210              :  * @brief Set the batch size for the inputs/outputs of the layers
     211              :  */
     212         3056 : void TensorPool::setBatchSize(const std::string &name, unsigned int batch) {
     213         3056 :   if (name_map.find(name) == name_map.end())
     214            1 :     throw std::invalid_argument("Requested tensor not found");
     215              : 
     216         3055 :   pool[name_map[name]].tensor->updateBatch(batch);
     217         3054 : }
     218              : 
     219              : /**
     220              :  * @brief Allocate memory for all the managed tensors
     221              :  */
     222         1314 : void TensorPool::allocate(bool init) {
     223         1314 :   if (minMemoryRequirement() == 0)
     224              :     return;
     225         1308 :   mem_pool->allocate();
     226              : 
     227              :   /** set the pointers using the token for all the tensors */
     228        41505 :   for (auto &spec : pool) {
     229              :     auto details = std::get_if<SourceDetails>(&spec.details);
     230        29063 :     if (!details || details->token == 0) {
     231        21105 :       continue;
     232              :     }
     233        19092 :     spec.tensor->setData(mem_pool->getMemory(details->token), 0, init);
     234              : 
     235        19092 :     syncDependents(spec);
     236              :   }
     237              : 
     238         1308 :   if (cache_loader) {
     239            0 :     cache_loader->init();
     240              :   }
     241              : }
     242              : 
     243              : /**
     244              :  * @brief Deallocate memory for all the managed tensors
     245              :  */
     246         4031 : void TensorPool::deallocate() {
     247         4031 :   if (cache_loader)
     248            0 :     cache_loader->finish();
     249              : 
     250         4031 :   mem_pool->deallocate();
     251              : 
     252              :   /** nullify the data pointers for the tensors */
     253        89233 :   for (auto &spec : pool) {
     254       170404 :     spec.tensor->setData(nullptr);
     255              :   }
     256         4031 : }
     257              : 
     258              : const std::vector<unsigned int> &
     259        13559 : TensorPool::getExecutionOrder(const std::string &name) {
     260        13559 :   return std::get<SourceDetails>(getSourceSpec(name).details).exec_order;
     261              : }
     262              : 
     263              : /**
     264              :  * @brief     Expand the lifespan of the tensor with the given name
     265              :  *
     266              :  */
     267              : TensorPool::RequestSpec &
     268            0 : TensorPool::expandLifespan(const std::string &name,
     269              :                            const std::vector<unsigned> &exec_order,
     270              :                            TensorLifespan lifespan) {
     271            0 :   auto &spec = getSourceSpec(name);
     272            0 :   expandLifespan(spec, exec_order, lifespan);
     273            0 :   return spec;
     274              : }
     275              : 
     276        13109 : void TensorPool::expandLifespan(RequestSpec &spec,
     277              :                                 const std::vector<unsigned int> &exec_order,
     278              :                                 TensorLifespan lifespan) {
     279              :   auto &details = std::get<SourceDetails>(spec.details);
     280        13110 :   NNTR_THROW_IF((details.lifespan != TensorLifespan::UNMANAGED &&
     281              :                  lifespan == TensorLifespan::UNMANAGED),
     282              :                 std::invalid_argument)
     283              :     << "Extending to lifespan to unmanaged is not possible for name: "
     284            1 :     << spec.tensor->getName();
     285              : 
     286        13108 :   if (details.lifespan != TensorLifespan::UNMANAGED) {
     287              :     /// update only if lifespan is unmanaged
     288        11613 :     details.lifespan =
     289              :       enum_class_or<TensorLifespan>(details.lifespan, lifespan);
     290              :   }
     291        13108 :   details.exec_order.insert(details.exec_order.end(), exec_order.begin(),
     292              :                             exec_order.end());
     293        13108 : }
     294              : 
     295        34428 : void TensorPool::syncDependents(const RequestSpec &spec) {
     296              :   /// @note syncing dependents of dependents is invalid and will throw.
     297              :   auto &dependents = std::get<SourceDetails>(spec.details).dependents;
     298        53265 :   for (auto &dep : dependents) {
     299        18837 :     auto &dep_spec = pool.at(dep);
     300        18837 :     auto offset = std::get<DependentDetails>(dep_spec.details).offset;
     301              : 
     302        37674 :     dep_spec.tensor->setData(spec.tensor->getMemoryData(),
     303        18837 :                              spec.tensor->getOffset() + offset);
     304              :   }
     305        34428 : }
     306              : 
     307        34141 : Tensor *TensorPool::registerRequestSpec(RequestSpec &&spec) {
     308        34141 :   auto &name = spec.tensor->getName();
     309        34141 :   if (name_map.find(name) != name_map.end())
     310            5 :     throw std::invalid_argument("Cannot request tensor with same name");
     311              : 
     312        34136 :   if (spec.tensor->empty())
     313            2 :     throw std::invalid_argument("Cannot request tensor with size 0");
     314              : 
     315        34134 :   if (name.empty())
     316            2 :     throw std::invalid_argument("Cannot request tensor with empty name");
     317              : 
     318        34132 :   pool.push_back(std::move(spec));
     319        34132 :   name_map[name] = pool.size() - 1;
     320              : 
     321        34132 :   return pool.back().tensor.get();
     322              : }
     323              : 
     324        42012 : TensorPool::RequestSpec &TensorPool::getSourceSpec(const std::string &name) {
     325        42002 :   RequestSpec *rs = &pool.at(name_map.at(name));
     326              :   while (auto dep_details = std::get_if<DependentDetails>(&rs->details)) {
     327         3939 :     rs = &pool.at(dep_details->parent_idx);
     328         3939 :   }
     329              : 
     330        42002 :   return *rs;
     331              : }
     332              : 
     333        15336 : void TensorPool::fillPlaceholder(const std::string &name, const Tensor &t) {
     334        15336 :   auto &spec = getSourceSpec(name);
     335              :   auto &details = std::get<SourceDetails>(spec.details);
     336        15336 :   NNTR_THROW_IF(details.lifespan != TensorLifespan::UNMANAGED,
     337              :                 std::invalid_argument)
     338              :     << "Cannot set external tensor for non-zero lifespan for " << name;
     339              : 
     340        16911 :   NNTR_THROW_IF(t.size() == 0 && t.getData(), std::invalid_argument)
     341              :     << "Error: setting invalid external tensor size 0 for " << name;
     342              : 
     343        15336 :   NNTR_THROW_IF(t.size() != 0 && t.size() < spec.tensor->size(),
     344              :                 std::invalid_argument)
     345              :     << "Error: setting external tensor of smaller size for "
     346            0 :     << spec.tensor->getName() << "(maybe view of " << name << ")";
     347              : 
     348        15336 :   spec.tensor->setData(t.getMemoryData(), t.getOffset());
     349        15336 :   syncDependents(spec);
     350        15336 : }
     351              : 
     352         1439 : Tensor *TensorPool::extend(const std::string &name, const TensorDim &dim,
     353              :                            const std::vector<unsigned int> &exec_order,
     354              :                            TensorLifespan lifespan) {
     355         1441 :   NNTR_THROW_IF(!tensorExist(name), std::invalid_argument)
     356              :     << " cannot extend tensor which does not exist, name: " << name;
     357         1437 :   auto &spec = getSourceSpec(name);
     358         1437 :   NNTR_THROW_IF(dim != spec.tensor->getDim(), std::invalid_argument)
     359              :     << "Cannot extend tensor with different dimension";
     360         1437 :   spec.is_weight_grad = false;
     361         1437 :   expandLifespan(spec, exec_order, lifespan);
     362         1437 :   return getTensor(name);
     363              : }
     364              : 
     365         3847 : Tensor *TensorPool::requestOrExtend(const std::string &name,
     366              :                                     const TensorDim &dim,
     367              :                                     const std::vector<unsigned int> &exec_order,
     368              :                                     TensorLifespan lifespan,
     369              :                                     const Initializer &init) {
     370         3848 :   NNTR_THROW_IF(lifespan == TensorLifespan::UNMANAGED, std::invalid_argument)
     371              :     << "unmanaged life span is not supported";
     372              : 
     373         3846 :   if (tensorExist(name)) {
     374              :     Tensor *t = getTensor(name);
     375         1436 :     NNTR_THROW_IF(t->getDim() != dim, std::invalid_argument)
     376              :       << "tensor dimension mismatch for requestOrExtend name: " << name;
     377         1435 :     NNTR_THROW_IF(t->getInitializer() != init, std::invalid_argument)
     378              :       << "tensor initializer mismatch for requestOrExtend name: " << name;
     379         1433 :     return extend(name, dim, exec_order, lifespan);
     380              :   } else {
     381         2411 :     return request(name, dim, exec_order, lifespan, init);
     382              :   }
     383              : }
     384              : 
     385            0 : void TensorPool::reidentifySource(const std::string &dest,
     386              :                                   const std::string &new_src,
     387              :                                   unsigned int offset) {
     388              :   /// @todo add test
     389              :   /// source tensor of dest tensor becomes a view of new_src
     390            0 :   auto &old_spec = getSourceSpec(dest);
     391              :   auto &old_details = std::get<SourceDetails>(old_spec.details);
     392              : 
     393              :   /// 1. extend new_src with old src
     394            0 :   auto &new_spec = getSourceSpec(new_src);
     395            0 :   expandLifespan(new_spec, old_details.exec_order, old_details.lifespan);
     396            0 :   auto &new_dependents = std::get<SourceDetails>(new_spec.details).dependents;
     397            0 :   new_dependents.insert(new_dependents.end(), old_details.dependents.begin(),
     398              :                         old_details.dependents.end());
     399              : 
     400              :   /// 2. calcaulate base offset from the new_src
     401            0 :   auto new_parent_idx = name_map.at(new_src);
     402              :   unsigned base_offset = std::visit(
     403              :     [](const auto &s) {
     404              :       using T = std::decay_t<decltype(s)>;
     405              :       if constexpr (std::is_same_v<T, SourceDetails>) {
     406              :         return 0u;
     407              :       } else if constexpr (std::is_same_v<T, DependentDetails>) {
     408            0 :         return s.offset;
     409              :       }
     410              :       return 0u;
     411              :     },
     412            0 :     pool[new_parent_idx].details);
     413            0 :   base_offset += offset;
     414              : 
     415              :   /// 3. transform parent idx/offset of old src's dependents base on the offset
     416            0 :   for (auto &dep : old_details.dependents) {
     417            0 :     auto &dep_spec = pool.at(dep);
     418              :     auto &details = std::get<DependentDetails>(dep_spec.details);
     419            0 :     details.offset += base_offset;
     420            0 :     details.parent_idx = new_parent_idx;
     421              :   }
     422              : 
     423              :   /// 4. replace old details to dependent srcs
     424            0 :   old_spec.details = DependentDetails{new_parent_idx, base_offset};
     425            0 : }
     426              : 
     427         5285 : bool TensorPool::tensorExist(const std::string &name) {
     428              :   /// @todo consider use a helper function to check, eg) something like
     429              :   /// getTensor()
     430         5285 :   return name_map.count(name);
     431              : }
     432              : 
     433              : /**
     434              :  * @brief     Check if the lifespan leads to long term valitidy
     435              :  *
     436              :  */
     437        27243 : bool TensorPool::isTensorLongTerm(const TensorLifespan &lifespan) {
     438        27243 :   switch (lifespan) {
     439              :   case TensorLifespan::EPOCH_LIFESPAN:
     440              :     [[fallthrough]];
     441              :   case TensorLifespan::FORWARD_INFER_LIFESPAN:
     442              :     [[fallthrough]];
     443              :   case TensorLifespan::MAX_LIFESPAN:
     444              :     return true;
     445        22232 :   case TensorLifespan::FORWARD_FUNC_LIFESPAN:
     446              :     [[fallthrough]];
     447              :   case TensorLifespan::BACKWARD_FUNC_LIFESPAN:
     448              :     [[fallthrough]];
     449              :   case TensorLifespan::ITERATION_LIFESPAN:
     450              :     [[fallthrough]];
     451              :   case TensorLifespan::UNMANAGED:
     452              :     [[fallthrough]];
     453              :   default:
     454        22232 :     return false;
     455              :   }
     456              : }
     457              : 
     458        10800 : void TensorPool::flushCache() {
     459        10800 :   if (auto pool = dynamic_cast<CachePool *>(mem_pool.get()))
     460            0 :     pool->flush();
     461        10800 : }
     462              : 
     463       188358 : void TensorPool::flushCacheExcept(unsigned int order) {
     464       188358 :   if (auto pool = dynamic_cast<CachePool *>(mem_pool.get()))
     465            0 :     pool->flushExcept(order);
     466       188358 : }
     467              : 
     468            0 : void TensorPool::loadCacheExec(unsigned int order) {
     469            0 :   if (dynamic_cast<CachePool *>(mem_pool.get()))
     470            0 :     cache_loader->loadAllinOrder(order);
     471            0 : }
     472              : 
     473            0 : int TensorPool::loadCacheExecAsync(
     474              :   unsigned int order, TaskExecutor::CompleteCallback complete_callback) {
     475              : 
     476            0 :   if (dynamic_cast<CachePool *>(mem_pool.get()))
     477            0 :     return cache_loader->loadAllinOrder(order);
     478              :   else
     479              :     return 0;
     480              : }
     481              : 
     482            0 : bool TensorPool::checkLoadComplete(unsigned int order) {
     483            0 :   if (dynamic_cast<CachePool *>(mem_pool.get()))
     484            0 :     return cache_loader->checkAllLoadComplete(order);
     485              :   else
     486              :     return true;
     487              : }
     488              : 
     489            0 : int TensorPool::flushCacheExecAsync(
     490              :   unsigned int order, TaskExecutor::CompleteCallback complete_callback) {
     491            0 :   if (dynamic_cast<CachePool *>(mem_pool.get()))
     492            0 :     return cache_loader->unloadAllinOrder(order);
     493              :   else
     494              :     return 0;
     495              : }
     496              : 
     497            0 : void TensorPool::loadCacheCancel(int id) {
     498            0 :   if (dynamic_cast<CachePool *>(mem_pool.get()) == nullptr)
     499              :     return;
     500              : 
     501            0 :   cache_loader->cancelAsync(id);
     502              : }
     503              : 
     504            0 : unsigned int TensorPool::inActive(unsigned int order) {
     505            0 :   return cache_loader->inActive(order);
     506              : }
     507              : 
     508              : } // namespace nntrainer

Generated by: LCOV version 2.0-1