LCOV - code coverage report
Current view: top level - nntrainer/tensor - optimized_v3_planner.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 0.0 % 39 0
Test Date: 2025-12-14 20:38:17 Functions: 0.0 % 2 0

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2023 Jijoong Moon <jijoong.moon@samsung.com>
       4              :  *
       5              :  * @file   optimized_v3_planner.cpp
       6              :  * @date   2 January 2023
       7              :  * @see    https://github.com/nnstreamer/nntrainer
       8              :  * @author Jijoong Moon <jijoong.moon@samsung.com>
       9              :  * @bug    No known bugs except for NYI items
      10              :  * @brief  This is Optimized V3 Memory Planner
      11              :  *
      12              :  */
      13              : 
      14              : #include <algorithm>
      15              : #include <memory>
      16              : #include <nntrainer_error.h>
      17              : #include <stdexcept>
      18              : #include <vector>
      19              : 
      20              : #include <optimized_v3_planner.h>
      21              : 
      22              : namespace nntrainer {
      23              : 
      24              : /**
      25              :  * @brief Memory Request data structure clubbing all the requests
      26              :  *
      27              :  */
      28              : struct MemoryRequest {
      29              :   unsigned int start; /**< start of the validity (inclusive) */
      30              :   unsigned int end;   /**< end of the validity (exclusive) */
      31              :   unsigned int loc;   /**< index/location of the this request */
      32              :   size_t size;        /**< size of the request */
      33              :   size_t offset;      /**< offset for this request */
      34              : 
      35              :   /**
      36              :    * @brief Constructor for the Memory Request
      37              :    *
      38              :    */
      39              :   MemoryRequest(size_t s, const std::pair<unsigned int, unsigned int> &valid,
      40            0 :                 unsigned int idx) :
      41            0 :     start(valid.first), end(valid.second), loc(idx), size(s), offset(0) {}
      42              : };
      43              : 
      44            0 : static size_t computeSpace(unsigned int exec_order,
      45              :                            std::vector<MemoryRequest *> &sorted_req,
      46              :                            std::vector<std::pair<size_t, size_t>> &vacant) {
      47              :   size_t bottom = 0;
      48              :   size_t max_offset = 0;
      49              : 
      50            0 :   std::sort(sorted_req.begin(), sorted_req.end(),
      51              :             [](auto const &v1, auto const &v2) -> int {
      52            0 :               return v1->offset < v2->offset;
      53              :               /** TODO: try this */
      54              :               //   if (v1.end == v2.end)
      55              :               //     return v1.start < v2.start;
      56              :               //   return v1.end > v2.end;
      57              :             });
      58              : 
      59            0 :   for (unsigned idx = 0; idx < sorted_req.size(); idx++) {
      60              :     auto const &sr = sorted_req[idx];
      61            0 :     size_t top = sr->offset + sr->size;
      62              : 
      63            0 :     if (max_offset < top)
      64              :       max_offset = top;
      65              : 
      66            0 :     if (sr->offset > bottom) {
      67            0 :       vacant.push_back(std::make_pair(bottom, sr->offset));
      68              :     }
      69              :     bottom = top;
      70              :   }
      71              : 
      72            0 :   return max_offset;
      73              : }
      74              : 
      75              : /**
      76              :  * @brief check if validate interval is overlapping in a very naive way.
      77              :  *
      78              :  * @param memory_validity validity
      79              :  * @param memory_size  size
      80              :  * @param memory_offset  offset
      81              :  * @param memory_req  request
      82              :  */
      83              : [[maybe_unused]] static void validateIntervalOverlap(
      84              :   const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
      85              :   const std::vector<size_t> &memory_size,
      86              :   const std::vector<size_t> &memory_offset, size_t memory_req) {
      87              :   auto bits = std::make_unique<bool[]>(memory_req);
      88              : 
      89              :   for (size_t i = 0; i < memory_req; ++i) {
      90              :     bits[i] = 0;
      91              :   }
      92              : 
      93              :   auto exec_start =
      94              :     std::min_element(memory_validity.begin(), memory_validity.end(),
      95              :                      [](auto &a, auto &b) { return a.first < b.first; });
      96              : 
      97              :   auto exec_end =
      98              :     std::max_element(memory_validity.begin(), memory_validity.end(),
      99              :                      [](auto &a, auto &b) { return a.second < b.second; });
     100              : 
     101              :   auto set = [&](int offset, size_t size, int idx) {
     102              :     for (unsigned int i = offset; i < size; ++i) {
     103              :       NNTR_THROW_IF(bits[i], std::invalid_argument)
     104              :         << " bits taken at i: " << i << " offset: " << offset
     105              :         << " size: " << size << " idx: " << idx;
     106              :       bits[i] = 1;
     107              :     }
     108              :   };
     109              : 
     110              :   auto unset = [&](int offset, size_t size, int idx) {
     111              :     for (unsigned int i = offset; i < size; ++i) {
     112              :       NNTR_THROW_IF(!bits[i], std::invalid_argument)
     113              :         << "double freeing bits at i: " << i << " offset: " << offset
     114              :         << " size: " << size << " idx: " << idx;
     115              :       bits[i] = 0;
     116              :     }
     117              :   };
     118              : 
     119              :   for (unsigned int exec = exec_start->first; exec <= exec_end->second;
     120              :        ++exec) {
     121              : 
     122              :     for (unsigned int idx = 0; idx < memory_validity.size(); ++idx) {
     123              :       auto &validity = memory_validity.at(idx);
     124              :       auto &sz = memory_size.at(idx);
     125              :       auto &offset = memory_offset.at(idx);
     126              :       if (validity.first == exec) {
     127              :         set(offset, sz, idx);
     128              :       }
     129              :       if (validity.second == exec) {
     130              :         unset(offset, sz, idx);
     131              :       }
     132              :     }
     133              :   }
     134              :   // check if there is any dangling memory
     135              :   set(0, memory_req, memory_validity.size());
     136              : }
     137              : 
     138              : /**
     139              :  * @copydoc MemoryPlanner::planLayout(
     140              :  * const std::vector<size_t> &memory_size,
     141              :  * const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
     142              :  * std::vector<size_t> &memory_offset,
     143              :  * std::vector<bool> &memory_is_wgrad);
     144              :  *
     145              :  * @details The optimized v1 memory planner assigns memory to the requests whose
     146              :  * validity starts first.
     147              :  * The requested memories are sorted based on the ascending order of the start
     148              :  * timestamps, and descending order using the end timestamps. The
     149              :  * sorted memories are given increasing offset based on the memory size.
     150              :  * At the end of each timestamp, invalid memories are freed, and offset updated
     151              :  * for reuse. This planner allocates overlapping memory for all the required
     152              :  * memories.
     153              :  *
     154              :  */
     155            0 : size_t OptimizedV3Planner::planLayout(
     156              :   const std::vector<size_t> &memory_size,
     157              :   const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
     158              :   std::vector<size_t> &memory_offset, std::vector<bool> &memory_is_wgrad,
     159              :   size_t n_wgrad) const {
     160              : 
     161              :   /** create memory requests structure array for easier management */
     162              :   std::vector<MemoryRequest> requests;
     163            0 :   requests.reserve(memory_size.size());
     164            0 :   for (unsigned int idx = 0; idx < memory_size.size(); idx++) {
     165            0 :     requests.emplace_back(memory_size[idx], memory_validity[idx], idx);
     166              :   }
     167              : 
     168              :   /**
     169              :    * sort the memory requests with ascending order of start time first, and
     170              :    * then end time
     171              :    */
     172            0 :   std::sort(requests.begin(), requests.end(),
     173              :             [](auto const &v1, auto const &v2) -> int {
     174            0 :               if (v1.start == v2.start)
     175            0 :                 return v1.end < v2.end;
     176            0 :               return v1.start < v2.start;
     177              :               /** TODO: try this */
     178              :               //   if (v1.end == v2.end)
     179              :               //     return v1.start < v2.start;
     180              :               //   return v1.end > v2.end;
     181              :             });
     182              : 
     183              :   /** all the memories in use sorted by their assigned offset and size */
     184              :   std::vector<MemoryRequest *> sorted_req;
     185              : 
     186              :   /** iterate over the sorted requests and start allocation of the requests */
     187            0 :   memory_offset.resize(memory_size.size());
     188            0 :   size_t memory_req = 0;
     189            0 :   for (auto &req : requests) {
     190              :     sorted_req.erase(
     191            0 :       std::remove_if(sorted_req.begin(), sorted_req.end(),
     192            0 :                      [req](auto elem) { return elem->end <= req.start; }),
     193              :       sorted_req.end());
     194              : 
     195              :     bool replace_and_fill = false;
     196              :     std::vector<std::pair<size_t, size_t>> vacant;
     197              : 
     198            0 :     size_t max_offset = computeSpace(req.start, sorted_req, vacant);
     199              : 
     200            0 :     for (unsigned int idx = 0; idx < vacant.size(); idx++) {
     201            0 :       if (vacant[idx].second - vacant[idx].first >= req.size) {
     202            0 :         req.offset = vacant[idx].first;
     203            0 :         memory_offset[req.loc] = req.offset;
     204            0 :         sorted_req.push_back(&req);
     205              :         replace_and_fill = true;
     206            0 :         break;
     207              :       }
     208              :     }
     209              :     vacant.clear();
     210              : 
     211            0 :     if (replace_and_fill) {
     212              :       continue;
     213              :     }
     214              : 
     215            0 :     req.offset = max_offset;
     216            0 :     memory_offset[req.loc] = max_offset;
     217            0 :     memory_req = std::max(memory_req, req.offset + req.size);
     218            0 :     sorted_req.push_back(&req);
     219            0 :   }
     220              : 
     221            0 :   return memory_req;
     222            0 : }
     223              : 
     224              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1