Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file memory_pool.cpp
6 : * @date 11 August 2021
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief This is Memory Pool Class
11 : */
12 :
13 : #include <cstdlib>
14 : #include <limits>
15 :
16 : #include <numeric>
17 : #include <vector>
18 :
19 : #include <map>
20 :
21 : #include <memory_pool.h>
22 : #include <nntrainer_error.h>
23 : #include <nntrainer_log.h>
24 : #include <numeric>
25 : #include <profiler.h>
26 : #include <vector>
27 :
28 : #if defined(_WIN32)
29 : #define GET_SYSTEM_ALIGMENT() \
30 : ([]() -> size_t { \
31 : SYSTEM_INFO sysInfo; \
32 : GetSystemInfo(&sysInfo); \
33 : return sysInfo.dwPageSize; \
34 : })()
35 :
36 : #define ALIGNED_ALLOC(size) _aligned_malloc(size, GET_SYSTEM_ALIGMENT())
37 : #define ALIGNED_FREE(ptr) _aligned_free(ptr)
38 : #elif defined(__ANDROID__) && ENABLE_NPU
39 : #define RPCMEM_HEAP_ID_SYSTEM 25
40 : #define RPCMEM_DEFAULT_FLAGS 1
41 : #define ALIGNED_ALLOC(size) \
42 : rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, size)
43 : #define ALIGNED_FREE(ptr) rpcmem_free(ptr)
44 : #else
45 : #define GET_SYSTEM_ALIGMENT() \
46 : ([]() -> size_t { return sysconf(_SC_PAGE_SIZE); })()
47 : #define ALIGNED_ALLOC(size) std::aligned_alloc(GET_SYSTEM_ALIGMENT(), size)
48 : #define ALIGNED_FREE(ptr) free(ptr)
49 : #endif
50 :
51 : namespace nntrainer {
52 :
53 : /**
54 : * @brief Request Memory from memory pool
55 : * @note start_time is inclusive, but end_time is exclusive
56 : */
57 19094 : unsigned int MemoryPool::requestMemory(size_t bytes, unsigned int start_time,
58 : unsigned int end_time,
59 : std::vector<unsigned int> exec_order,
60 : TensorLifespan lifespan, bool is_wgrad) {
61 19094 : if (bytes == 0)
62 0 : throw std::invalid_argument("Requesting memory of 0 size");
63 :
64 19094 : if (mem_pool != nullptr)
65 : throw std::invalid_argument(
66 0 : "Deallocate memory pool before requesting more memory");
67 :
68 19094 : if (end_time <= start_time)
69 : throw std::invalid_argument(
70 0 : "Invalid validity range for the requested memory");
71 :
72 19094 : memory_size.push_back(bytes);
73 19094 : memory_validity.push_back({start_time, end_time});
74 19094 : memory_exec_order.push_back(exec_order);
75 19094 : memory_is_wgrad.push_back(is_wgrad);
76 19094 : if (is_wgrad)
77 1458 : n_wgrad++;
78 :
79 : /** invalidate min_pool_size if already there */
80 19094 : min_pool_size = 0;
81 :
82 19094 : return memory_size.size();
83 : }
84 :
85 : /**
86 : * @brief Planner the layout with memory planner
87 : *
88 : * @details The efficiency of the planner is calculated as the ratio of the
89 : * theoretical minimum memory requirement divided by the memory requirement
90 : * given by the memory planner.
91 : *
92 : * @details planLayout can be called multiple times as this does not perform
93 : * any allocation but rather just plans the layout and stores the layout.
94 : * Subsequent call to this function will overwrite any existing layout.
95 : */
96 1309 : double MemoryPool::planLayout(const MemoryPlanner &planner) {
97 1309 : if (mem_pool != nullptr)
98 : /** mem_pool must be deallocated when planLayout is being called */
99 0 : throw std::runtime_error("Planning memory layout after allocation");
100 :
101 1309 : if (memory_size.empty())
102 0 : throw std::runtime_error("Planning memory layout for empty pool");
103 :
104 : /** calculate min_pool_size if not already calculated */
105 1309 : if (min_pool_size == 0)
106 1309 : min_pool_size = calcMinMemoryRequirement();
107 :
108 2618 : pool_size = planner.planLayout(memory_size, memory_validity, memory_offset,
109 1309 : memory_is_wgrad, n_wgrad);
110 1309 : if (pool_size < min_pool_size || !validateLayout())
111 0 : throw std::runtime_error("Planned layout is not feasible");
112 :
113 1309 : return double(min_pool_size) / double(pool_size);
114 : }
115 :
116 : /**
117 : * @brief Do the allocation of memory
118 : *
119 : */
120 1308 : void MemoryPool::allocate() {
121 1308 : if (pool_size == 0)
122 0 : throw std::runtime_error("Allocating memory pool with size 0");
123 :
124 1308 : if (mem_pool != nullptr)
125 0 : throw std::runtime_error("Memory pool is already allocated");
126 :
127 : #if defined(__ANDROID__) && ENABLE_NPU
128 : int i = 0;
129 : #define RPCMEM_HEAP_ID_SYSTEM 25
130 : #define RPCMEM_DEFAULT_FLAGS 1
131 : std::map<size_t, void *> offset_ptr; // offset : ptr
132 : std::map<size_t, size_t> allocated_size; // offset : memory size
133 : std::map<size_t, std::vector<int>>
134 : offset_indices; // offset : list of index which has same offset
135 :
136 : for (auto &s : memory_offset) {
137 : size_t current_size = memory_size.at(i);
138 : auto it = offset_ptr.find(s);
139 : if (it == offset_ptr.end()) {
140 : void *ptr =
141 : rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, current_size);
142 : memory_ptrs.push_back(ptr);
143 : offset_ptr[s] = ptr;
144 : allocated_size[s] = current_size;
145 : offset_indices[s].push_back(i);
146 : } else {
147 : void *existing_ptr = it->second;
148 : size_t max_size = allocated_size[s];
149 : if (max_size < current_size) {
150 : void *new_ptr = rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM,
151 : RPCMEM_DEFAULT_FLAGS, current_size);
152 :
153 : for (int idx : offset_indices[s]) {
154 : memory_ptrs[idx] = new_ptr;
155 : }
156 : rpcmem_free(existing_ptr);
157 : offset_ptr[s] = new_ptr;
158 : allocated_size[s] = current_size;
159 : }
160 : memory_ptrs.push_back(offset_ptr[s]);
161 : offset_indices[s].push_back(i);
162 : }
163 : i++;
164 : }
165 :
166 : mem_pool = calloc(1, 1);
167 :
168 : #else
169 :
170 : #ifdef ENABLE_OPENCL
171 : auto *cl_context =
172 : static_cast<ClContext *>(Engine::Global().getRegisteredContext("gpu"));
173 : mem_pool = cl_context->context_inst_.createSVMRegion(pool_size);
174 :
175 : // If SVM allocation fails, use calloc()
176 : if (mem_pool != nullptr) {
177 : svm_allocation = true;
178 : }
179 : #endif
180 :
181 : if (mem_pool == nullptr)
182 1308 : mem_pool = calloc(pool_size, 1);
183 :
184 : unsigned int idx = 1;
185 20400 : for (auto &s : memory_offset) {
186 19092 : char *ptr = static_cast<char *>(mem_pool) + memory_offset.at(idx - 1);
187 19092 : memory_ptrs.push_back(ptr);
188 19092 : idx++;
189 : }
190 : #endif
191 :
192 : #ifdef PROFILE
193 : static long long seq = 0;
194 :
195 : std::string msg("MemoryPool #");
196 : msg.append(std::to_string(seq++));
197 : PROFILE_MEM_ALLOC(mem_pool, pool_size, msg);
198 : #endif
199 1308 : }
200 :
201 0 : void MemoryPool::allocateFSU() {
202 0 : if (pool_size == 0)
203 0 : throw std::runtime_error("Allocating memory pool with size 0");
204 :
205 0 : if (mem_pool != nullptr)
206 0 : throw std::runtime_error("Memory pool is already allocated");
207 :
208 0 : int i = 0;
209 : std::map<size_t, void *> offset_ptr; // offset : ptr
210 : std::map<size_t, size_t> allocated_size; // offset : memory size
211 : std::map<size_t, std::vector<int>>
212 : offset_indices; // offset : list of index which has same offset
213 :
214 0 : for (auto &s : memory_offset) {
215 0 : size_t current_size = memory_size.at(i);
216 : auto it = offset_ptr.find(s);
217 0 : if (it == offset_ptr.end()) {
218 0 : void *ptr = ALIGNED_ALLOC(current_size);
219 0 : memory_ptrs.push_back(ptr);
220 0 : offset_ptr[s] = ptr;
221 0 : allocated_size[s] = current_size;
222 0 : offset_indices[s].push_back(i);
223 :
224 : } else {
225 0 : void *existing_ptr = it->second;
226 0 : size_t max_size = allocated_size[s];
227 0 : if (max_size < current_size) {
228 0 : void *new_ptr = ALIGNED_ALLOC(current_size);
229 :
230 0 : for (int idx : offset_indices[s]) {
231 0 : memory_ptrs[idx] = new_ptr;
232 : }
233 0 : ALIGNED_FREE(existing_ptr);
234 0 : offset_ptr[s] = new_ptr;
235 0 : allocated_size[s] = current_size;
236 : }
237 0 : memory_ptrs.push_back(offset_ptr[s]);
238 0 : offset_indices[s].push_back(i);
239 : }
240 0 : i++;
241 : }
242 :
243 0 : mem_pool = calloc(1, 1);
244 :
245 0 : if (mem_pool == nullptr)
246 : throw std::runtime_error(
247 0 : "Failed to allocate memory: " + std::to_string(pool_size) + "bytes");
248 0 : }
249 :
250 : /**
251 : * @brief Get the allocated memory
252 : *
253 : */
254 19092 : std::shared_ptr<MemoryData> MemoryPool::getMemory(unsigned int idx) {
255 19092 : if (mem_pool == nullptr)
256 0 : throw std::invalid_argument("Getting memory before allocation");
257 :
258 19092 : auto mem_data = std::make_shared<MemoryData>((void *)memory_ptrs.at(idx - 1));
259 19092 : mem_data->setSVM(svm_allocation);
260 19092 : return mem_data;
261 : }
262 :
263 : /**
264 : * @brief Free all the allocated memory
265 : *
266 : */
267 7166 : void MemoryPool::deallocate() {
268 7166 : if (mem_pool != nullptr) {
269 : #ifdef ENABLE_OPENCL
270 : if (svm_allocation) {
271 : auto *cl_context =
272 : static_cast<ClContext *>(Engine::Global().getRegisteredContext("gpu"));
273 : cl_context->context_inst_.releaseSVMRegion(mem_pool);
274 : } else {
275 : free(mem_pool);
276 : }
277 : #else
278 1308 : free(mem_pool);
279 : #endif
280 : memory_size.clear();
281 : memory_validity.clear();
282 : memory_exec_order.clear();
283 : memory_is_wgrad.clear();
284 :
285 : #ifdef PROFILE
286 : PROFILE_MEM_DEALLOC(mem_pool);
287 : #endif
288 :
289 : memory_ptrs.clear();
290 : }
291 7166 : mem_pool = nullptr;
292 7166 : }
293 :
294 : /**
295 : * @brief Get the maximum real memory requirement
296 : *
297 : */
298 0 : size_t MemoryPool::size() { return pool_size; }
299 :
300 : /**
301 : * @brief Get the minimum theoretical memory requirement
302 : *
303 : */
304 1317 : size_t MemoryPool::minMemoryRequirement() {
305 1317 : if (memory_size.size() && min_pool_size == 0)
306 0 : min_pool_size = calcMinMemoryRequirement();
307 :
308 1317 : return min_pool_size;
309 : }
310 :
311 : /**
312 : * @brief Validate the provided layout so that no two memories to be used at
313 : * overlap interval has overlapping memories
314 : */
315 1309 : bool MemoryPool::validateLayout() {
316 1309 : if (memory_offset.size() != memory_size.size())
317 : return false;
318 :
319 1309 : if (memory_size.empty())
320 0 : return pool_size == 0;
321 :
322 1309 : return validateOverflow() && validateOverlap();
323 : }
324 :
325 : /**
326 : * @brief Validate the provided layout does not overflow outside the given
327 : * size of the memory pool
328 : */
329 1309 : bool MemoryPool::validateOverflow() {
330 20403 : for (unsigned int idx = 0; idx < memory_size.size(); idx++)
331 19094 : if (memory_offset[idx] + memory_size[idx] > pool_size)
332 : return false;
333 :
334 : return true;
335 : }
336 :
337 : /**
338 : * @brief check if the two given intervals overlap
339 : *
340 : * @param s1 start of interval 1
341 : * @param e1 end of interval 1
342 : * @param s2 start of interval 2
343 : * @param e2 end of interval 2
344 : *
345 : * @return true if overlap else false
346 : *
347 : * @note overlap check assumes are start is inclusive and end is exclusive
348 : */
349 : template <typename T> static bool overlap(T s1, T e1, T s2, T e2) {
350 : #if DEBUG
351 : if (e1 <= s1 || e2 <= s2)
352 : throw std::invalid_argument("Invalid range for intervals in MemoryPool");
353 : #endif
354 :
355 17820 : return !(e1 <= s2 || e2 <= s1);
356 : }
357 :
358 : /**
359 : * @brief Validate the provided layout so that no two memories to be used at
360 : * overlap interval has overlapping memories
361 : */
362 1309 : bool MemoryPool::validateOverlap() {
363 : /** get sorted permutation */
364 1309 : std::vector<unsigned int> perm = getSortedPermutation();
365 :
366 : /** iterate over sorted array view and check overlap of memories */
367 : size_t len = perm.size();
368 20403 : for (unsigned int i = 0; i < len; i++) {
369 19094 : unsigned int idx = perm[i];
370 19094 : size_t mem_start = memory_offset[idx], mem_size = memory_size[idx];
371 19094 : unsigned int valid_start = memory_validity[idx].first,
372 19094 : valid_end = memory_validity[idx].second;
373 19112 : for (unsigned int match = idx + 1; match < len; match++) {
374 17802 : if (overlap(mem_start, mem_start + mem_size, memory_offset[match],
375 : memory_offset[match] + memory_size[match])) {
376 : /**
377 : * if the memories given to two requests overlap, then their valid
378 : * range should not overlap
379 : */
380 18 : if (overlap(valid_start, valid_end, memory_validity[match].first,
381 : memory_validity[match].second))
382 : return false;
383 : } else {
384 : /**
385 : * as the list memories are sorted by offset, we can safely assume that
386 : * memory allocations after idx will not overlap as well
387 : */
388 : break;
389 : }
390 : }
391 : }
392 :
393 : return true;
394 1309 : }
395 :
396 : /**
397 : * @brief Get sorted permutation for the memory requests
398 : *
399 : * @details Performs sorting based on the memory overlap using memory offset
400 : * as the start and the memory offset + memory size as the end of the interval.
401 : */
402 1309 : std::vector<unsigned int> MemoryPool::getSortedPermutation() {
403 1309 : std::vector<unsigned int> perm(memory_size.size());
404 : std::iota(perm.begin(), perm.end(), 0);
405 : /** sorted by memory_offset first and then memory_offset + memory_size next */
406 1309 : std::sort(perm.begin(), perm.end(), [&](auto const &idx1, auto const &idx2) {
407 77427 : if (memory_offset[idx1] == memory_offset[idx2])
408 102 : return memory_size[idx1] < memory_size[idx2];
409 :
410 77325 : return memory_offset[idx1] < memory_offset[idx2];
411 : });
412 :
413 1309 : return perm;
414 : }
415 :
416 : /**
417 : * @brief Calculate the minimum memory requirement for the given memory requests
418 : *
419 : * @note This will be theoretical minimum memory requirement ensuring that the
420 : * memory usages at the same time do not overlap with their validity. This does
421 : * not consider about the fragmentation which comes from the actual memory
422 : * layout.
423 : */
424 1309 : size_t MemoryPool::calcMinMemoryRequirement() {
425 : auto max_interval =
426 : *std::max_element(memory_validity.begin(), memory_validity.end(),
427 : [](auto const &val1, auto const &val2) {
428 17785 : return val1.second < val2.second;
429 1309 : });
430 1309 : unsigned int last_interval = max_interval.second;
431 : /**
432 : * as weights stay valid for max duration, ignore this value and get the real
433 : * max value
434 : */
435 1309 : if (last_interval == (std::numeric_limits<unsigned int>::max)()) {
436 : max_interval = *std::max_element(
437 : memory_validity.begin(), memory_validity.end(),
438 : [last_interval](auto const &val1, auto const &val2) {
439 0 : return ((val2.second != last_interval) && (val1.second < val2.second));
440 : });
441 0 : last_interval = max_interval.second;
442 : /**
443 : * if the second largest number is also numeric_limit, implies that all the
444 : * elements are max values. In this case, last_interval is set to 1
445 : */
446 0 : if (last_interval == (std::numeric_limits<unsigned int>::max)())
447 0 : last_interval = 1;
448 : }
449 :
450 1309 : std::vector<size_t> interval_req(last_interval + 1, 0);
451 : /**
452 : * @note This method fills requirement for each value in the interval. This is
453 : * efficient for the current use case as there is going to be atleast 1 new
454 : * memory request for each interval because each interval is mapped to a node
455 : * in the graph.
456 : */
457 20403 : for (unsigned int idx = 0; idx < memory_size.size(); idx++) {
458 19094 : for (unsigned int interval = memory_validity[idx].first;
459 479365 : interval < std::min(memory_validity[idx].second, last_interval);
460 : interval++) {
461 460271 : interval_req[interval] += memory_size[idx];
462 : }
463 : }
464 :
465 2618 : return *std::max_element(interval_req.begin(), interval_req.end());
466 1309 : }
467 :
468 : /**
469 : * @brief Clear the memory pool
470 : *
471 : */
472 1316 : void MemoryPool::clear() {
473 1316 : if (mem_pool != nullptr)
474 0 : throw std::invalid_argument("Cannot clear allocated memory pool");
475 :
476 : memory_size.clear();
477 : memory_validity.clear();
478 : memory_offset.clear();
479 : file_offset.clear();
480 : memory_is_wgrad.clear();
481 :
482 1316 : pool_size = 0;
483 1316 : min_pool_size = 0;
484 1316 : n_wgrad = 0;
485 1316 : }
486 :
487 : /**
488 : * @brief Is the memory pool allocated
489 : *
490 : * @return true if the memory is allocated, else false
491 : */
492 2418 : bool MemoryPool::isAllocated() const { return mem_pool != nullptr; }
493 :
494 : } // namespace nntrainer
|