Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file memory_pool.h
6 : * @date 10 August 2021
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief This is Memory Pool Class
11 : *
12 : * @todo Support an external allocator for different backends and alignment
13 : * @todo Support releaseMemory(token) - this need not release actual memory
14 : * until deallocate
15 : * @todo Support maximum memory size for the memory pool as an argument
16 : * @todo support late memory request without optimization
17 : */
18 :
19 : #ifndef __MEMORY_POOL_H__
20 : #define __MEMORY_POOL_H__
21 :
22 : #include <functional>
23 : #include <memory>
24 : #include <vector>
25 :
26 : #include <engine.h>
27 : #include <memory_data.h>
28 : #include <memory_planner.h>
29 : #include <tensor_wrap_specs.h>
30 :
31 : #ifdef ENABLE_OPENCL
32 : #include <cl_context.h>
33 : #endif
34 :
35 : #include <cstdlib>
36 : #include <dynamic_library_loader.h>
37 : #include <functional>
38 : #include <memory>
39 : #include <vector>
40 : #if defined(_WIN32)
41 : #ifndef NOMINMAX
42 : #ifdef max
43 : #undef max
44 : #undef min
45 : #endif
46 : #define NOMINMAX
47 : #endif
48 : #define O_SYNC 0UL
49 : #include <io.h>
50 : #include <sysinfoapi.h>
51 : #include <windows.h>
52 : #else
53 : #include <sys/mman.h>
54 : #include <unistd.h>
55 : #endif
56 :
57 : #include <dynamic_library_loader.h>
58 : #include <engine.h>
59 : #include <iostream>
60 : #include <mem_allocator.h>
61 : #include <set>
62 :
63 : static const std::string func_tag = "[MemoryPool] ";
64 : typedef void *(*RpcMemAllocFn_t)(int, uint32_t, int);
65 : typedef void (*RpcMemFreeFn_t)(void *);
66 :
67 : enum {
68 : DL_NOW = 0x0001,
69 : DL_LOCAL = 0x0002,
70 : DL_GLOBAL = 0x0004,
71 : };
72 :
73 : namespace nntrainer {
74 :
75 : /**
76 : * @class MemoryPool
77 : * @brief Memory Pool provides a common pool for all the tensor memory
78 : */
79 : class MemoryPool {
80 : public:
81 : /**
82 : * @brief MemoryPool default constructor
83 : *
84 : */
85 3135 : explicit MemoryPool() :
86 3135 : mem_pool(nullptr),
87 3135 : pool_size(0),
88 3135 : min_pool_size(0),
89 3135 : n_wgrad(0),
90 3135 : svm_allocation(false) {
91 :
92 : #if defined(__ANDROID__) && ENABLE_NPU
93 : void *handle =
94 : DynamicLibraryLoader::loadLibrary("libcdsprpc.so", DL_NOW | DL_LOCAL);
95 : const char *error_msg = DynamicLibraryLoader::getLastError();
96 :
97 : rpcmem_alloc =
98 : (RpcMemAllocFn_t)DynamicLibraryLoader::loadSymbol(handle, "rpcmem_alloc");
99 : rpcmem_free =
100 : (RpcMemFreeFn_t)DynamicLibraryLoader::loadSymbol(handle, "rpcmem_free");
101 :
102 : auto close_dl = [handle] { DynamicLibraryLoader::freeLibrary(handle); };
103 :
104 : if (rpcmem_alloc == nullptr || rpcmem_free == nullptr) {
105 : NNTR_THROW_IF_CLEANUP(rpcmem_alloc == nullptr || rpcmem_free == nullptr,
106 : std::invalid_argument, close_dl)
107 : << func_tag << "open rpc mem failed";
108 : }
109 : #else
110 3135 : allocators = Engine::Global().getAllocators();
111 : #endif
112 3135 : }
113 :
114 : /**
115 : * @brief MemoryPool destructor
116 : *
117 : */
118 8016 : virtual ~MemoryPool() { deallocate(); }
119 :
120 : /**
121 : * @brief Request Memory from memory pool
122 : *
123 : * @param bytes The size of the memory requested in bytes
124 : * @param start_time The start of the validity interval of this memory
125 : * @param end_time The end of the validity interval of this memory
126 : * @param exec_order execution orders of this memory
127 : * @param lifespan lifespan of memory
128 : * @param is_wgrad check if the tensor is weight gradient
129 : *
130 : * @return The token to get the pointer for this memory after allocation
131 : * @note start_time is inclusive, but end_time is exclusive
132 : * @note The value of the return token starts from 1.
133 : */
134 : virtual unsigned int requestMemory(
135 : size_t bytes, unsigned int start_time, unsigned int end_time,
136 : std::vector<unsigned int> exec_order = std::vector<unsigned int>(),
137 : TensorLifespan lifespan = TensorLifespan::MAX_LIFESPAN,
138 : bool is_wgrad = false);
139 :
140 : /**
141 : * @brief Plan the layout with memory planner
142 : *
143 : * @param planner The memory planner to be used for finalizing the layout
144 : *
145 : * @return The efficiency of the memory layer with the given memory planner
146 : *
147 : * @details The efficiency of the planner is calculated as the ratio of the
148 : * theoretical minimum memory requirement divided by the memory requirement
149 : * given by the memory planner.
150 : *
151 : * @details planLayout can be called multiple times as this does not perform
152 : * any allocation but rather just plans the layout and stores the layout.
153 : * Subsequent call to this function will overwrite any existing layout.
154 : */
155 : double planLayout(const MemoryPlanner &planner);
156 :
157 : /**
158 : * @brief Do the allocation of memory
159 : *
160 : */
161 : virtual void allocate();
162 :
163 : /**
164 : * @brief Do the allocation of memory for FSU
165 : *
166 : */
167 : virtual void allocateFSU();
168 :
169 : /**
170 : * @brief Get the allocated memory
171 : *
172 : * @param token The token received from the requestMemory
173 : *
174 : * @return The pointer of the memory
175 : *
176 : * @details This function will throw if called before allocation.
177 : */
178 : virtual std::shared_ptr<MemoryData> getMemory(unsigned int idx);
179 :
180 : /**
181 : * @brief Free all the allocated memory
182 : *
183 : */
184 : virtual void deallocate();
185 :
186 : /**
187 : * @brief Get the maximum real memory requirement
188 : *
189 : * @return The real memory requirement with this strategy in bytes
190 : */
191 : size_t size();
192 :
193 : /**
194 : * @brief Get the minimum theoretical memory requirement
195 : *
196 : * @return The theoretical memory requirement with this strategy in bytes
197 : */
198 : size_t minMemoryRequirement();
199 :
200 : /**
201 : * @brief Clear the memory pool
202 : *
203 : */
204 : virtual void clear();
205 :
206 : /**
207 : * @brief Is the memory pool allocated
208 : *
209 : * @return true if the memory is allocated, else false
210 : */
211 : virtual bool isAllocated() const;
212 :
213 : /**
214 : * @brief Get memory ptrs vector from memory pool class.
215 : *
216 : * @return memory ptrs vector
217 : */
218 0 : std::vector<void *> getMemoryPtrs() { return memory_ptrs; }
219 :
220 : /**
221 : * @brief Get the memory pool address.
222 : *
223 : * @return MemoryPool address.
224 : */
225 : void *getMemoryPoolAddress() { return mem_pool; }
226 :
227 : /**
228 : * @brief set FSU weight path
229 : *
230 : * @param path FSU weight file path
231 : */
232 0 : virtual void setFsuWeightPath(std::string path){};
233 :
234 : /**
235 : * @brief set weight file offset for FSU loading
236 : *
237 : * @param offsets weight file offset
238 : */
239 : virtual void
240 0 : setWeightOffset(std::vector<std::pair<size_t, size_t>> offsets){};
241 :
242 : protected:
243 : /**
244 : * @brief Get memory offset
245 : */
246 : std::vector<size_t> &getMemoryOffset() { return memory_offset; }
247 :
248 : protected:
249 : /**
250 : * @brief Get file offset
251 : */
252 : std::vector<size_t> &getFileOffset() { return file_offset; }
253 :
254 : /**
255 : * @brief Get memory size
256 : */
257 : std::vector<size_t> &getMemorySize() { return memory_size; }
258 :
259 : /**
260 : * @brief Get memory execution order
261 : */
262 : std::vector<std::vector<unsigned int>> &getMemoryExecOrder() {
263 0 : return memory_exec_order;
264 : }
265 :
266 : private:
267 : /**
268 : * @brief Validate the provided layout
269 : */
270 : bool validateLayout();
271 :
272 : /**
273 : * @brief Validate the provided layout does not overflow outside the given
274 : * size of the memory pool
275 : */
276 : bool validateOverflow();
277 :
278 : /**
279 : * @brief Validate the provided layout so that no two memories to be used at
280 : * overlap interval has overlapping memories
281 : */
282 : bool validateOverlap();
283 :
284 : /**
285 : * @brief Calculate the minimum memory requirement for the given memory
286 : * requests
287 : *
288 : * @return the minimum memory requirement in bytes
289 : *
290 : * @note This will be theoretical minimum memory requirement ensuring that the
291 : * memory usages at the same time do not overlap with their validity. This
292 : * does not consider about the fragmentation which comes from the actual
293 : * memory layout.
294 : */
295 : size_t calcMinMemoryRequirement();
296 :
297 : /**
298 : * @brief Get sorted permuation for the memory requests
299 : *
300 : * @return sorted permutation
301 : *
302 : * @details Performs sorting based on the memory overlap using memory offset
303 : * as the start and the memory offset + memory size as the end of the
304 : * interval.
305 : */
306 : std::vector<unsigned int> getSortedPermutation();
307 :
308 : std::vector<size_t> memory_size; /**< various sizes memory requested */
309 : std::vector<void *> memory_ptrs; /**< various pointers memory requested */
310 :
311 : std::vector<std::pair<unsigned int, unsigned int>>
312 : memory_validity; /**< validity intervals for each requested memory */
313 : std::vector<size_t> memory_offset; /**< offsets for the memory requested */
314 : std::vector<size_t> file_offset; /**< offsets for the bin file */
315 : std::vector<std::vector<unsigned int>>
316 : memory_exec_order; /**< execution order for the requested memory */
317 :
318 : std::vector<bool>
319 : memory_is_wgrad; /**< index for identification of weight gradient */
320 :
321 : void *mem_pool; /**< memory pool allocated at once */
322 :
323 : size_t pool_size; /**< memory requirement for this pool */
324 :
325 : size_t min_pool_size; /**< minimum theoretical memory requirement */
326 :
327 : size_t n_wgrad;
328 :
329 : bool svm_allocation; /**< flag if memory is a shared virtual memory */
330 :
331 : std::unordered_map<std::string, std::shared_ptr<nntrainer::MemAllocator>>
332 : allocators;
333 : #if defined(__ANDROID__) && ENABLE_NPU
334 : RpcMemAllocFn_t rpcmem_alloc;
335 : RpcMemFreeFn_t rpcmem_free;
336 : #endif
337 : };
338 :
339 : } // namespace nntrainer
340 :
341 : #endif /** __MEMORY_POOL_H__ */
|