LCOV - code coverage report
Current view: top level - nntrainer/tensor - optimized_v2_planner.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 0.0 % 68 0
Test Date: 2025-12-14 20:38:17 Functions: 0.0 % 1 0

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2022 Jijoong Moon <jijoong.moon@samsung.com>
       4              :  *
       5              :  * @file   optimized_v2_planner.cpp
       6              :  * @date   29 December 2022
       7              :  * @see    https://github.com/nnstreamer/nntrainer
       8              :  * @author Parichay Kapoor <pk.kapoor@samsung.com>
       9              :  * @author Jijoong Moon <jijoong.moon@samsung.com>
      10              :  * @bug    No known bugs except for NYI items
      11              :  * @brief  This is Optimized V2 Memory Planner
      12              :  *
      13              :  */
      14              : 
      15              : #include <algorithm>
      16              : #include <memory>
      17              : #include <nntrainer_error.h>
      18              : #include <nntrainer_log.h>
      19              : #include <stdexcept>
      20              : #include <vector>
      21              : 
      22              : #include <optimized_v2_planner.h>
      23              : 
      24              : namespace nntrainer {
      25              : 
      26              : /**
      27              :  * @brief Memory Request data structure clubbing all the requests
      28              :  *
      29              :  */
      30              : struct MemoryRequest {
      31              :   unsigned int start; /**< start of the validity (inclusive) */
      32              :   unsigned int end;   /**< end of the validity (exclusive) */
      33              :   unsigned int loc;   /**< index/location of the this request */
      34              :   size_t size;        /**< size of the request */
      35              :   size_t offset;      /**< offset for this request */
      36              : 
      37              :   /**
      38              :    * @brief Constructor for the Memory Request
      39              :    *
      40              :    */
      41              :   MemoryRequest(size_t s, const std::pair<unsigned int, unsigned int> &valid,
      42            0 :                 unsigned int idx) :
      43            0 :     start(valid.first), end(valid.second), loc(idx), size(s), offset(0) {}
      44              : };
      45              : 
      46              : /**
      47              :  * @brief Memory Request data structure clubbing for the weight gradient
      48              :  * requests
      49              :  */
      50            0 : struct WGradMemoryRequest {
      51              :   MemoryRequest *mem_req;
      52              :   std::vector<std::pair<unsigned int, unsigned int>> start_end;
      53              : 
      54            0 :   WGradMemoryRequest(MemoryRequest *req) : mem_req(req) {}
      55              : };
      56              : 
      57              : /**
      58              :  * @brief check if validate interval is overlapping in a very naive way.
      59              :  *
      60              :  * @param memory_validity validity
      61              :  * @param memory_size  size
      62              :  * @param memory_offset  offset
      63              :  * @param memory_req  request
      64              :  */
      65              : [[maybe_unused]] static void validateIntervalOverlap(
      66              :   const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
      67              :   const std::vector<size_t> &memory_size,
      68              :   const std::vector<size_t> &memory_offset, size_t memory_req) {
      69              :   auto bits = std::make_unique<bool[]>(memory_req);
      70              : 
      71              :   for (size_t i = 0; i < memory_req; ++i) {
      72              :     bits[i] = 0;
      73              :   }
      74              : 
      75              :   auto exec_start =
      76              :     std::min_element(memory_validity.begin(), memory_validity.end(),
      77              :                      [](auto &a, auto &b) { return a.first < b.first; });
      78              : 
      79              :   auto exec_end =
      80              :     std::max_element(memory_validity.begin(), memory_validity.end(),
      81              :                      [](auto &a, auto &b) { return a.second < b.second; });
      82              : 
      83              :   auto set = [&](int offset, size_t size, int idx) {
      84              :     for (unsigned int i = offset; i < size; ++i) {
      85              :       NNTR_THROW_IF(bits[i], std::invalid_argument)
      86              :         << " bits taken at i: " << i << " offset: " << offset
      87              :         << " size: " << size << " idx: " << idx;
      88              :       bits[i] = 1;
      89              :     }
      90              :   };
      91              : 
      92              :   auto unset = [&](int offset, size_t size, int idx) {
      93              :     for (unsigned int i = offset; i < size; ++i) {
      94              :       NNTR_THROW_IF(!bits[i], std::invalid_argument)
      95              :         << "double freeing bits at i: " << i << " offset: " << offset
      96              :         << " size: " << size << " idx: " << idx;
      97              :       bits[i] = 0;
      98              :     }
      99              :   };
     100              : 
     101              :   for (unsigned int exec = exec_start->first; exec <= exec_end->second;
     102              :        ++exec) {
     103              : 
     104              :     for (unsigned int idx = 0; idx < memory_validity.size(); ++idx) {
     105              :       auto &validity = memory_validity.at(idx);
     106              :       auto &sz = memory_size.at(idx);
     107              :       auto &offset = memory_offset.at(idx);
     108              :       if (validity.first == exec) {
     109              :         set(offset, sz, idx);
     110              :       }
     111              :       if (validity.second == exec) {
     112              :         unset(offset, sz, idx);
     113              :       }
     114              :     }
     115              :   }
     116              :   // check if there is any dangling memory
     117              :   set(0, memory_req, memory_validity.size());
     118              : }
     119              : 
     120              : /**
     121              :  * @copydoc MemoryPlanner::planLayout(
     122              :  * const std::vector<size_t> &memory_size,
     123              :  * const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
     124              :  * std::vector<size_t> &memory_offset,
     125              :  * std::vector<bool> &memory_is_wgrad);
     126              :  *
     127              :  * @details The optimized v1 memory planner assigns memory to the requests whose
     128              :  * validity starts first.
     129              :  * The requested memories are sorted based on the ascending order of the start
     130              :  * timestamps, and descending order using the end timestamps. The
     131              :  * sorted memories are given increasing offset based on the memory size.
     132              :  * At the end of each timestamp, invalid memories are freed, and offset updated
     133              :  * for reuse. This planner allocates overlapping memory for all the required
     134              :  * memories.
     135              :  *
     136              :  */
     137            0 : size_t OptimizedV2Planner::planLayout(
     138              :   const std::vector<size_t> &memory_size,
     139              :   const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
     140              :   std::vector<size_t> &memory_offset, std::vector<bool> &memory_is_wgrad,
     141              :   size_t n_wgrad) const {
     142              : 
     143              :   std::vector<MemoryRequest> wgrad_requests;
     144            0 :   wgrad_requests.reserve(n_wgrad);
     145              : 
     146              :   /** create memory requests structure array for easier management */
     147              :   std::vector<MemoryRequest> requests;
     148            0 :   requests.reserve(memory_size.size() - n_wgrad);
     149            0 :   if (n_wgrad) {
     150            0 :     for (unsigned int idx = 0; idx < memory_size.size(); idx++) {
     151            0 :       if (!memory_is_wgrad[idx]) {
     152            0 :         requests.emplace_back(memory_size[idx], memory_validity[idx], idx);
     153              :       } else {
     154            0 :         wgrad_requests.emplace_back(memory_size[idx], memory_validity[idx],
     155              :                                     idx);
     156              :       }
     157              :     }
     158              :   } else {
     159            0 :     for (unsigned int idx = 0; idx < memory_size.size(); idx++) {
     160            0 :       requests.emplace_back(memory_size[idx], memory_validity[idx], idx);
     161              :     }
     162              :   }
     163              : 
     164              :   /**
     165              :    * sort the memory requests with ascending order of start time first, and
     166              :    * then end time
     167              :    */
     168            0 :   std::sort(requests.begin(), requests.end(),
     169              :             [](auto const &v1, auto const &v2) -> int {
     170            0 :               if (v1.start == v2.start)
     171            0 :                 return v1.end < v2.end;
     172            0 :               return v1.start < v2.start;
     173              :               /** TODO: try this */
     174              :               //   if (v1.end == v2.end)
     175              :               //     return v1.start < v2.start;
     176              :               //   return v1.end > v2.end;
     177              :             });
     178              : 
     179              :   /** all the memories in use sorted by their assigned offset and size */
     180              :   std::vector<MemoryRequest *> sorted_req;
     181              : 
     182              :   /** iterate over the sorted requests and start allocation of the requests */
     183            0 :   memory_offset.resize(memory_size.size());
     184            0 :   size_t memory_req = 0;
     185            0 :   for (auto &req : requests) {
     186              :     /** remove expired memories and update offset */
     187            0 :     while (!sorted_req.empty() && sorted_req.back()->end <= req.start)
     188              :       sorted_req.pop_back();
     189              : 
     190              :     /** if there exists an expired memory with same size (not at the edge),
     191              :      * reuse it */
     192              :     bool replace_and_fill = false;
     193            0 :     for (int idx = sorted_req.size() - 1; idx >= 0; idx--) {
     194            0 :       auto const &sr = sorted_req[idx];
     195              :       /** TODO: reuse if memory size not exactly match */
     196            0 :       if (sr->end <= req.start && sr->size == req.size) {
     197            0 :         req.offset = sr->offset;
     198            0 :         memory_offset[req.loc] = req.offset;
     199            0 :         sorted_req[idx] = &req;
     200              :         replace_and_fill = true;
     201              :         break;
     202              :       }
     203              :     }
     204            0 :     if (replace_and_fill) {
     205            0 :       continue;
     206              :     }
     207              : 
     208              :     size_t offset = 0;
     209            0 :     if (!sorted_req.empty())
     210            0 :       offset = sorted_req.back()->offset + sorted_req.back()->size;
     211              : 
     212              :     /** assign offset to the new request and push to queue */
     213            0 :     req.offset = offset;
     214            0 :     memory_offset[req.loc] = offset;
     215            0 :     memory_req = std::max(memory_req, req.offset + req.size);
     216            0 :     sorted_req.push_back(&req);
     217              :   }
     218              : 
     219            0 :   if (wgrad_requests.size()) {
     220              :     /** TODO: We donot need to start from memeory_req. We might find proper
     221              :      * offset considering execution order */
     222            0 :     size_t last_offset = memory_req;
     223              : 
     224              :     /* sort the memory request with ascending order of size */
     225            0 :     std::sort(
     226              :       wgrad_requests.begin(), wgrad_requests.end(),
     227            0 :       [](auto const &v1, auto const &v2) -> int { return v1.size > v2.size; });
     228              : 
     229              :     std::vector<WGradMemoryRequest> wgrad_sorted_req;
     230              : 
     231              :     bool replace_and_fill = false;
     232              : #ifdef DEBUG
     233              :     unsigned int new_grad_cnt = 0;
     234              :     unsigned int reused_grad_cnt = 0;
     235              :     size_t new_grad_size = 0;
     236              :     size_t reused_grad_size = 0;
     237              : #endif
     238            0 :     for (auto &req : wgrad_requests) {
     239            0 :       for (unsigned int idx = 0; idx < wgrad_sorted_req.size(); idx++) {
     240              :         const auto &sr = wgrad_sorted_req[idx];
     241              :         bool merge = true;
     242            0 :         if (sr.mem_req->size >= req.size) {
     243            0 :           for (auto &interval : sr.start_end) {
     244            0 :             if ((interval.first < req.start && interval.first < req.end &&
     245            0 :                  req.end < interval.second) ||
     246            0 :                 (req.start > interval.first && req.start < interval.second &&
     247            0 :                  req.end > interval.second) ||
     248            0 :                 (req.start == interval.first && req.end == interval.second)) {
     249              :               merge = false;
     250              :               break;
     251              :             }
     252              :           }
     253              :         }
     254              : 
     255            0 :         if (merge) {
     256            0 :           req.offset = sr.mem_req->offset;
     257            0 :           memory_offset[req.loc] = req.offset;
     258              :           replace_and_fill = true;
     259            0 :           wgrad_sorted_req[idx].start_end.push_back(
     260            0 :             std::make_pair(req.start, req.end));
     261              : #ifdef DEBUG
     262              :           reused_grad_size += req.size;
     263              :           reused_grad_cnt++;
     264              : #endif
     265              :           break;
     266              :         } else {
     267              :           replace_and_fill = false;
     268              :         }
     269              :       }
     270            0 :       if (replace_and_fill) {
     271            0 :         continue;
     272              :       }
     273              : 
     274              :       size_t offset = last_offset;
     275            0 :       if (!wgrad_sorted_req.empty())
     276            0 :         offset = wgrad_sorted_req.back().mem_req->offset +
     277            0 :                  wgrad_sorted_req.back().mem_req->size;
     278              : 
     279            0 :       req.offset = offset;
     280            0 :       memory_offset[req.loc] = offset;
     281            0 :       memory_req = std::max(memory_req, req.offset + req.size);
     282            0 :       wgrad_sorted_req.push_back(WGradMemoryRequest(&req));
     283            0 :       wgrad_sorted_req.back().start_end.push_back(
     284            0 :         std::make_pair(req.start, req.end));
     285              : #ifdef DEBUG
     286              :       new_grad_cnt++;
     287              :       new_grad_size += req.size;
     288              : #endif
     289              :     }
     290            0 :   }
     291              : 
     292              :   //   validateIntervalOverlap(memory_validity, memory_size, memory_offset,
     293              :   //   memory_req);
     294              : 
     295            0 :   return memory_req;
     296            0 : }
     297              : 
     298              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1