LCOV - coverage_filtered.info - nntrainer/tensor/memory

LCOV - code coverage report

Current view:	top level - nntrainer/tensor - memory_pool.cpp (source / functions)		Coverage	Total	Hit
Test:	coverage_filtered.info	Lines:	65.2 %	135	88
Test Date:	2025-12-14 20:38:17	Functions:	86.7 %	15	13

            Line data    Source code

       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
       4              :  *
       5              :  * @file   memory_pool.cpp
       6              :  * @date   11 August 2021
       7              :  * @see    https://github.com/nnstreamer/nntrainer
       8              :  * @author Parichay Kapoor <pk.kapoor@samsung.com>
       9              :  * @bug    No known bugs except for NYI items
      10              :  * @brief  This is Memory Pool Class
      11              :  */
      12              : 
      13              : #include <cstdlib>
      14              : #include <limits>
      15              : 
      16              : #include <numeric>
      17              : #include <vector>
      18              : 
      19              : #include <map>
      20              : 
      21              : #include <memory_pool.h>
      22              : #include <nntrainer_error.h>
      23              : #include <nntrainer_log.h>
      24              : #include <numeric>
      25              : #include <profiler.h>
      26              : #include <vector>
      27              : 
      28              : #if defined(_WIN32)
      29              : #define GET_SYSTEM_ALIGMENT()                                                  \
      30              :   ([]() -> size_t {                                                            \
      31              :     SYSTEM_INFO sysInfo;                                                       \
      32              :     GetSystemInfo(&sysInfo);                                                   \
      33              :     return sysInfo.dwPageSize;                                                 \
      34              :   })()
      35              : 
      36              : #define ALIGNED_ALLOC(size) _aligned_malloc(size, GET_SYSTEM_ALIGMENT())
      37              : #define ALIGNED_FREE(ptr) _aligned_free(ptr)
      38              : #elif defined(__ANDROID__) && ENABLE_NPU
      39              : #define RPCMEM_HEAP_ID_SYSTEM 25
      40              : #define RPCMEM_DEFAULT_FLAGS 1
      41              : #define ALIGNED_ALLOC(size)                                                    \
      42              :   rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, size)
      43              : #define ALIGNED_FREE(ptr) rpcmem_free(ptr)
      44              : #else
      45              : #define GET_SYSTEM_ALIGMENT()                                                  \
      46              :   ([]() -> size_t { return sysconf(_SC_PAGE_SIZE); })()
      47              : #define ALIGNED_ALLOC(size) std::aligned_alloc(GET_SYSTEM_ALIGMENT(), size)
      48              : #define ALIGNED_FREE(ptr) free(ptr)
      49              : #endif
      50              : 
      51              : namespace nntrainer {
      52              : 
      53              : /**
      54              :  * @brief Request Memory from memory pool
      55              :  * @note start_time is inclusive, but end_time is exclusive
      56              :  */
      57        19094 : unsigned int MemoryPool::requestMemory(size_t bytes, unsigned int start_time,
      58              :                                        unsigned int end_time,
      59              :                                        std::vector<unsigned int> exec_order,
      60              :                                        TensorLifespan lifespan, bool is_wgrad) {
      61        19094 :   if (bytes == 0)
      62            0 :     throw std::invalid_argument("Requesting memory of 0 size");
      63              : 
      64        19094 :   if (mem_pool != nullptr)
      65              :     throw std::invalid_argument(
      66            0 :       "Deallocate memory pool before requesting more memory");
      67              : 
      68        19094 :   if (end_time <= start_time)
      69              :     throw std::invalid_argument(
      70            0 :       "Invalid validity range for the requested memory");
      71              : 
      72        19094 :   memory_size.push_back(bytes);
      73        19094 :   memory_validity.push_back({start_time, end_time});
      74        19094 :   memory_exec_order.push_back(exec_order);
      75        19094 :   memory_is_wgrad.push_back(is_wgrad);
      76        19094 :   if (is_wgrad)
      77         1458 :     n_wgrad++;
      78              : 
      79              :   /** invalidate min_pool_size if already there */
      80        19094 :   min_pool_size = 0;
      81              : 
      82        19094 :   return memory_size.size();
      83              : }
      84              : 
      85              : /**
      86              :  * @brief Planner the layout with memory planner
      87              :  *
      88              :  * @details The efficiency of the planner is calculated as the ratio of the
      89              :  * theoretical minimum memory requirement divided by the memory requirement
      90              :  * given by the memory planner.
      91              :  *
      92              :  * @details planLayout can be called multiple times as this does not perform
      93              :  * any allocation but rather just plans the layout and stores the layout.
      94              :  * Subsequent call to this function will overwrite any existing layout.
      95              :  */
      96         1309 : double MemoryPool::planLayout(const MemoryPlanner &planner) {
      97         1309 :   if (mem_pool != nullptr)
      98              :     /** mem_pool must be deallocated when planLayout is being called */
      99            0 :     throw std::runtime_error("Planning memory layout after allocation");
     100              : 
     101         1309 :   if (memory_size.empty())
     102            0 :     throw std::runtime_error("Planning memory layout for empty pool");
     103              : 
     104              :   /** calculate min_pool_size if not already calculated */
     105         1309 :   if (min_pool_size == 0)
     106         1309 :     min_pool_size = calcMinMemoryRequirement();
     107              : 
     108         2618 :   pool_size = planner.planLayout(memory_size, memory_validity, memory_offset,
     109         1309 :                                  memory_is_wgrad, n_wgrad);
     110         1309 :   if (pool_size < min_pool_size || !validateLayout())
     111            0 :     throw std::runtime_error("Planned layout is not feasible");
     112              : 
     113         1309 :   return double(min_pool_size) / double(pool_size);
     114              : }
     115              : 
     116              : /**
     117              :  * @brief Do the allocation of memory
     118              :  *
     119              :  */
     120         1308 : void MemoryPool::allocate() {
     121         1308 :   if (pool_size == 0)
     122            0 :     throw std::runtime_error("Allocating memory pool with size 0");
     123              : 
     124         1308 :   if (mem_pool != nullptr)
     125            0 :     throw std::runtime_error("Memory pool is already allocated");
     126              : 
     127              : #if defined(__ANDROID__) && ENABLE_NPU
     128              :   int i = 0;
     129              : #define RPCMEM_HEAP_ID_SYSTEM 25
     130              : #define RPCMEM_DEFAULT_FLAGS 1
     131              :   std::map<size_t, void *> offset_ptr;     // offset : ptr
     132              :   std::map<size_t, size_t> allocated_size; // offset : memory size
     133              :   std::map<size_t, std::vector<int>>
     134              :     offset_indices; // offset : list of index which has same offset
     135              : 
     136              :   for (auto &s : memory_offset) {
     137              :     size_t current_size = memory_size.at(i);
     138              :     auto it = offset_ptr.find(s);
     139              :     if (it == offset_ptr.end()) {
     140              :       void *ptr =
     141              :         rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, current_size);
     142              :       memory_ptrs.push_back(ptr);
     143              :       offset_ptr[s] = ptr;
     144              :       allocated_size[s] = current_size;
     145              :       offset_indices[s].push_back(i);
     146              :     } else {
     147              :       void *existing_ptr = it->second;
     148              :       size_t max_size = allocated_size[s];
     149              :       if (max_size < current_size) {
     150              :         void *new_ptr = rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM,
     151              :                                      RPCMEM_DEFAULT_FLAGS, current_size);
     152              : 
     153              :         for (int idx : offset_indices[s]) {
     154              :           memory_ptrs[idx] = new_ptr;
     155              :         }
     156              :         rpcmem_free(existing_ptr);
     157              :         offset_ptr[s] = new_ptr;
     158              :         allocated_size[s] = current_size;
     159              :       }
     160              :       memory_ptrs.push_back(offset_ptr[s]);
     161              :       offset_indices[s].push_back(i);
     162              :     }
     163              :     i++;
     164              :   }
     165              : 
     166              :   mem_pool = calloc(1, 1);
     167              : 
     168              : #else
     169              : 
     170              : #ifdef ENABLE_OPENCL
     171              :   auto *cl_context =
     172              :     static_cast<ClContext *>(Engine::Global().getRegisteredContext("gpu"));
     173              :   mem_pool = cl_context->context_inst_.createSVMRegion(pool_size);
     174              : 
     175              :   // If SVM allocation fails, use calloc()
     176              :   if (mem_pool != nullptr) {
     177              :     svm_allocation = true;
     178              :   }
     179              : #endif
     180              : 
     181              :   if (mem_pool == nullptr)
     182         1308 :     mem_pool = calloc(pool_size, 1);
     183              : 
     184              :   unsigned int idx = 1;
     185        20400 :   for (auto &s : memory_offset) {
     186        19092 :     char *ptr = static_cast<char *>(mem_pool) + memory_offset.at(idx - 1);
     187        19092 :     memory_ptrs.push_back(ptr);
     188        19092 :     idx++;
     189              :   }
     190              : #endif
     191              : 
     192              : #ifdef PROFILE
     193              :   static long long seq = 0;
     194              : 
     195              :   std::string msg("MemoryPool #");
     196              :   msg.append(std::to_string(seq++));
     197              :   PROFILE_MEM_ALLOC(mem_pool, pool_size, msg);
     198              : #endif
     199         1308 : }
     200              : 
     201            0 : void MemoryPool::allocateFSU() {
     202            0 :   if (pool_size == 0)
     203            0 :     throw std::runtime_error("Allocating memory pool with size 0");
     204              : 
     205            0 :   if (mem_pool != nullptr)
     206            0 :     throw std::runtime_error("Memory pool is already allocated");
     207              : 
     208            0 :   int i = 0;
     209              :   std::map<size_t, void *> offset_ptr;     // offset : ptr
     210              :   std::map<size_t, size_t> allocated_size; // offset : memory size
     211              :   std::map<size_t, std::vector<int>>
     212              :     offset_indices; // offset : list of index which has same offset
     213              : 
     214            0 :   for (auto &s : memory_offset) {
     215            0 :     size_t current_size = memory_size.at(i);
     216              :     auto it = offset_ptr.find(s);
     217            0 :     if (it == offset_ptr.end()) {
     218            0 :       void *ptr = ALIGNED_ALLOC(current_size);
     219            0 :       memory_ptrs.push_back(ptr);
     220            0 :       offset_ptr[s] = ptr;
     221            0 :       allocated_size[s] = current_size;
     222            0 :       offset_indices[s].push_back(i);
     223              : 
     224              :     } else {
     225            0 :       void *existing_ptr = it->second;
     226            0 :       size_t max_size = allocated_size[s];
     227            0 :       if (max_size < current_size) {
     228            0 :         void *new_ptr = ALIGNED_ALLOC(current_size);
     229              : 
     230            0 :         for (int idx : offset_indices[s]) {
     231            0 :           memory_ptrs[idx] = new_ptr;
     232              :         }
     233            0 :         ALIGNED_FREE(existing_ptr);
     234            0 :         offset_ptr[s] = new_ptr;
     235            0 :         allocated_size[s] = current_size;
     236              :       }
     237            0 :       memory_ptrs.push_back(offset_ptr[s]);
     238            0 :       offset_indices[s].push_back(i);
     239              :     }
     240            0 :     i++;
     241              :   }
     242              : 
     243            0 :   mem_pool = calloc(1, 1);
     244              : 
     245            0 :   if (mem_pool == nullptr)
     246              :     throw std::runtime_error(
     247            0 :       "Failed to allocate memory: " + std::to_string(pool_size) + "bytes");
     248            0 : }
     249              : 
     250              : /**
     251              :  * @brief Get the allocated memory
     252              :  *
     253              :  */
     254        19092 : std::shared_ptr<MemoryData> MemoryPool::getMemory(unsigned int idx) {
     255        19092 :   if (mem_pool == nullptr)
     256            0 :     throw std::invalid_argument("Getting memory before allocation");
     257              : 
     258        19092 :   auto mem_data = std::make_shared<MemoryData>((void *)memory_ptrs.at(idx - 1));
     259        19092 :   mem_data->setSVM(svm_allocation);
     260        19092 :   return mem_data;
     261              : }
     262              : 
     263              : /**
     264              :  * @brief Free all the allocated memory
     265              :  *
     266              :  */
     267         7166 : void MemoryPool::deallocate() {
     268         7166 :   if (mem_pool != nullptr) {
     269              : #ifdef ENABLE_OPENCL
     270              :     if (svm_allocation) {
     271              :       auto *cl_context =
     272              :         static_cast<ClContext *>(Engine::Global().getRegisteredContext("gpu"));
     273              :       cl_context->context_inst_.releaseSVMRegion(mem_pool);
     274              :     } else {
     275              :       free(mem_pool);
     276              :     }
     277              : #else
     278         1308 :     free(mem_pool);
     279              : #endif
     280              :     memory_size.clear();
     281              :     memory_validity.clear();
     282              :     memory_exec_order.clear();
     283              :     memory_is_wgrad.clear();
     284              : 
     285              : #ifdef PROFILE
     286              :     PROFILE_MEM_DEALLOC(mem_pool);
     287              : #endif
     288              : 
     289              :     memory_ptrs.clear();
     290              :   }
     291         7166 :   mem_pool = nullptr;
     292         7166 : }
     293              : 
     294              : /**
     295              :  * @brief Get the maximum real memory requirement
     296              :  *
     297              :  */
     298            0 : size_t MemoryPool::size() { return pool_size; }
     299              : 
     300              : /**
     301              :  * @brief Get the minimum theoretical memory requirement
     302              :  *
     303              :  */
     304         1317 : size_t MemoryPool::minMemoryRequirement() {
     305         1317 :   if (memory_size.size() && min_pool_size == 0)
     306            0 :     min_pool_size = calcMinMemoryRequirement();
     307              : 
     308         1317 :   return min_pool_size;
     309              : }
     310              : 
     311              : /**
     312              :  * @brief Validate the provided layout so that no two memories to be used at
     313              :  * overlap interval has overlapping memories
     314              :  */
     315         1309 : bool MemoryPool::validateLayout() {
     316         1309 :   if (memory_offset.size() != memory_size.size())
     317              :     return false;
     318              : 
     319         1309 :   if (memory_size.empty())
     320            0 :     return pool_size == 0;
     321              : 
     322         1309 :   return validateOverflow() && validateOverlap();
     323              : }
     324              : 
     325              : /**
     326              :  * @brief Validate the provided layout does not overflow outside the given
     327              :  * size of the memory pool
     328              :  */
     329         1309 : bool MemoryPool::validateOverflow() {
     330        20403 :   for (unsigned int idx = 0; idx < memory_size.size(); idx++)
     331        19094 :     if (memory_offset[idx] + memory_size[idx] > pool_size)
     332              :       return false;
     333              : 
     334              :   return true;
     335              : }
     336              : 
     337              : /**
     338              :  * @brief check if the two given intervals overlap
     339              :  *
     340              :  * @param s1 start of interval 1
     341              :  * @param e1 end of interval 1
     342              :  * @param s2 start of interval 2
     343              :  * @param e2 end of interval 2
     344              :  *
     345              :  * @return true if overlap else false
     346              :  *
     347              :  * @note overlap check assumes are start is inclusive and end is exclusive
     348              :  */
     349              : template <typename T> static bool overlap(T s1, T e1, T s2, T e2) {
     350              : #if DEBUG
     351              :   if (e1 <= s1 || e2 <= s2)
     352              :     throw std::invalid_argument("Invalid range for intervals in MemoryPool");
     353              : #endif
     354              : 
     355        17820 :   return !(e1 <= s2 || e2 <= s1);
     356              : }
     357              : 
     358              : /**
     359              :  * @brief Validate the provided layout so that no two memories to be used at
     360              :  * overlap interval has overlapping memories
     361              :  */
     362         1309 : bool MemoryPool::validateOverlap() {
     363              :   /** get sorted permutation */
     364         1309 :   std::vector<unsigned int> perm = getSortedPermutation();
     365              : 
     366              :   /** iterate over sorted array view and check overlap of memories */
     367              :   size_t len = perm.size();
     368        20403 :   for (unsigned int i = 0; i < len; i++) {
     369        19094 :     unsigned int idx = perm[i];
     370        19094 :     size_t mem_start = memory_offset[idx], mem_size = memory_size[idx];
     371        19094 :     unsigned int valid_start = memory_validity[idx].first,
     372        19094 :                  valid_end = memory_validity[idx].second;
     373        19112 :     for (unsigned int match = idx + 1; match < len; match++) {
     374        17802 :       if (overlap(mem_start, mem_start + mem_size, memory_offset[match],
     375              :                   memory_offset[match] + memory_size[match])) {
     376              :         /**
     377              :          * if the memories given to two requests overlap, then their valid
     378              :          * range should not overlap
     379              :          */
     380           18 :         if (overlap(valid_start, valid_end, memory_validity[match].first,
     381              :                     memory_validity[match].second))
     382              :           return false;
     383              :       } else {
     384              :         /**
     385              :          * as the list memories are sorted by offset, we can safely assume that
     386              :          * memory allocations after idx will not overlap as well
     387              :          */
     388              :         break;
     389              :       }
     390              :     }
     391              :   }
     392              : 
     393              :   return true;
     394         1309 : }
     395              : 
     396              : /**
     397              :  * @brief Get sorted permutation for the memory requests
     398              :  *
     399              :  * @details Performs sorting based on the memory overlap using memory offset
     400              :  * as the start and the memory offset + memory size as the end of the interval.
     401              :  */
     402         1309 : std::vector<unsigned int> MemoryPool::getSortedPermutation() {
     403         1309 :   std::vector<unsigned int> perm(memory_size.size());
     404              :   std::iota(perm.begin(), perm.end(), 0);
     405              :   /** sorted by memory_offset first and then memory_offset + memory_size next */
     406         1309 :   std::sort(perm.begin(), perm.end(), [&](auto const &idx1, auto const &idx2) {
     407        77427 :     if (memory_offset[idx1] == memory_offset[idx2])
     408          102 :       return memory_size[idx1] < memory_size[idx2];
     409              : 
     410        77325 :     return memory_offset[idx1] < memory_offset[idx2];
     411              :   });
     412              : 
     413         1309 :   return perm;
     414              : }
     415              : 
     416              : /**
     417              :  * @brief Calculate the minimum memory requirement for the given memory requests
     418              :  *
     419              :  * @note This will be theoretical minimum memory requirement ensuring that the
     420              :  * memory usages at the same time do not overlap with their validity. This does
     421              :  * not consider about the fragmentation which comes from the actual memory
     422              :  * layout.
     423              :  */
     424         1309 : size_t MemoryPool::calcMinMemoryRequirement() {
     425              :   auto max_interval =
     426              :     *std::max_element(memory_validity.begin(), memory_validity.end(),
     427              :                       [](auto const &val1, auto const &val2) {
     428        17785 :                         return val1.second < val2.second;
     429         1309 :                       });
     430         1309 :   unsigned int last_interval = max_interval.second;
     431              :   /**
     432              :    * as weights stay valid for max duration, ignore this value and get the real
     433              :    * max value
     434              :    */
     435         1309 :   if (last_interval == (std::numeric_limits<unsigned int>::max)()) {
     436              :     max_interval = *std::max_element(
     437              :       memory_validity.begin(), memory_validity.end(),
     438              :       [last_interval](auto const &val1, auto const &val2) {
     439            0 :         return ((val2.second != last_interval) && (val1.second < val2.second));
     440              :       });
     441            0 :     last_interval = max_interval.second;
     442              :     /**
     443              :      * if the second largest number is also numeric_limit, implies that all the
     444              :      * elements are max values. In this case, last_interval is set to 1
     445              :      */
     446            0 :     if (last_interval == (std::numeric_limits<unsigned int>::max)())
     447            0 :       last_interval = 1;
     448              :   }
     449              : 
     450         1309 :   std::vector<size_t> interval_req(last_interval + 1, 0);
     451              :   /**
     452              :    * @note This method fills requirement for each value in the interval. This is
     453              :    * efficient for the current use case as there is going to be atleast 1 new
     454              :    * memory request for each interval because each interval is mapped to a node
     455              :    * in the graph.
     456              :    */
     457        20403 :   for (unsigned int idx = 0; idx < memory_size.size(); idx++) {
     458        19094 :     for (unsigned int interval = memory_validity[idx].first;
     459       479365 :          interval < std::min(memory_validity[idx].second, last_interval);
     460              :          interval++) {
     461       460271 :       interval_req[interval] += memory_size[idx];
     462              :     }
     463              :   }
     464              : 
     465         2618 :   return *std::max_element(interval_req.begin(), interval_req.end());
     466         1309 : }
     467              : 
     468              : /**
     469              :  * @brief Clear the memory pool
     470              :  *
     471              :  */
     472         1316 : void MemoryPool::clear() {
     473         1316 :   if (mem_pool != nullptr)
     474            0 :     throw std::invalid_argument("Cannot clear allocated memory pool");
     475              : 
     476              :   memory_size.clear();
     477              :   memory_validity.clear();
     478              :   memory_offset.clear();
     479              :   file_offset.clear();
     480              :   memory_is_wgrad.clear();
     481              : 
     482         1316 :   pool_size = 0;
     483         1316 :   min_pool_size = 0;
     484         1316 :   n_wgrad = 0;
     485         1316 : }
     486              : 
     487              : /**
     488              :  * @brief Is the memory pool allocated
     489              :  *
     490              :  * @return true if the memory is allocated, else false
     491              :  */
     492         2418 : bool MemoryPool::isAllocated() const { return mem_pool != nullptr; }
     493              : 
     494              : } // namespace nntrainer

Generated by: LCOV version 2.0-1