Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2023 Jijoong Moon <jijoong.moon@samsung.com>
4 : *
5 : * @file optimized_v3_planner.cpp
6 : * @date 2 January 2023
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Jijoong Moon <jijoong.moon@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief This is Optimized V3 Memory Planner
11 : *
12 : */
13 :
14 : #include <algorithm>
15 : #include <memory>
16 : #include <nntrainer_error.h>
17 : #include <stdexcept>
18 : #include <vector>
19 :
20 : #include <optimized_v3_planner.h>
21 :
22 : namespace nntrainer {
23 :
24 : /**
25 : * @brief Memory Request data structure clubbing all the requests
26 : *
27 : */
28 : struct MemoryRequest {
29 : unsigned int start; /**< start of the validity (inclusive) */
30 : unsigned int end; /**< end of the validity (exclusive) */
31 : unsigned int loc; /**< index/location of the this request */
32 : size_t size; /**< size of the request */
33 : size_t offset; /**< offset for this request */
34 :
35 : /**
36 : * @brief Constructor for the Memory Request
37 : *
38 : */
39 : MemoryRequest(size_t s, const std::pair<unsigned int, unsigned int> &valid,
40 0 : unsigned int idx) :
41 0 : start(valid.first), end(valid.second), loc(idx), size(s), offset(0) {}
42 : };
43 :
44 0 : static size_t computeSpace(unsigned int exec_order,
45 : std::vector<MemoryRequest *> &sorted_req,
46 : std::vector<std::pair<size_t, size_t>> &vacant) {
47 : size_t bottom = 0;
48 : size_t max_offset = 0;
49 :
50 0 : std::sort(sorted_req.begin(), sorted_req.end(),
51 : [](auto const &v1, auto const &v2) -> int {
52 0 : return v1->offset < v2->offset;
53 : /** TODO: try this */
54 : // if (v1.end == v2.end)
55 : // return v1.start < v2.start;
56 : // return v1.end > v2.end;
57 : });
58 :
59 0 : for (unsigned idx = 0; idx < sorted_req.size(); idx++) {
60 : auto const &sr = sorted_req[idx];
61 0 : size_t top = sr->offset + sr->size;
62 :
63 0 : if (max_offset < top)
64 : max_offset = top;
65 :
66 0 : if (sr->offset > bottom) {
67 0 : vacant.push_back(std::make_pair(bottom, sr->offset));
68 : }
69 : bottom = top;
70 : }
71 :
72 0 : return max_offset;
73 : }
74 :
75 : /**
76 : * @brief check if validate interval is overlapping in a very naive way.
77 : *
78 : * @param memory_validity validity
79 : * @param memory_size size
80 : * @param memory_offset offset
81 : * @param memory_req request
82 : */
83 : [[maybe_unused]] static void validateIntervalOverlap(
84 : const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
85 : const std::vector<size_t> &memory_size,
86 : const std::vector<size_t> &memory_offset, size_t memory_req) {
87 : auto bits = std::make_unique<bool[]>(memory_req);
88 :
89 : for (size_t i = 0; i < memory_req; ++i) {
90 : bits[i] = 0;
91 : }
92 :
93 : auto exec_start =
94 : std::min_element(memory_validity.begin(), memory_validity.end(),
95 : [](auto &a, auto &b) { return a.first < b.first; });
96 :
97 : auto exec_end =
98 : std::max_element(memory_validity.begin(), memory_validity.end(),
99 : [](auto &a, auto &b) { return a.second < b.second; });
100 :
101 : auto set = [&](int offset, size_t size, int idx) {
102 : for (unsigned int i = offset; i < size; ++i) {
103 : NNTR_THROW_IF(bits[i], std::invalid_argument)
104 : << " bits taken at i: " << i << " offset: " << offset
105 : << " size: " << size << " idx: " << idx;
106 : bits[i] = 1;
107 : }
108 : };
109 :
110 : auto unset = [&](int offset, size_t size, int idx) {
111 : for (unsigned int i = offset; i < size; ++i) {
112 : NNTR_THROW_IF(!bits[i], std::invalid_argument)
113 : << "double freeing bits at i: " << i << " offset: " << offset
114 : << " size: " << size << " idx: " << idx;
115 : bits[i] = 0;
116 : }
117 : };
118 :
119 : for (unsigned int exec = exec_start->first; exec <= exec_end->second;
120 : ++exec) {
121 :
122 : for (unsigned int idx = 0; idx < memory_validity.size(); ++idx) {
123 : auto &validity = memory_validity.at(idx);
124 : auto &sz = memory_size.at(idx);
125 : auto &offset = memory_offset.at(idx);
126 : if (validity.first == exec) {
127 : set(offset, sz, idx);
128 : }
129 : if (validity.second == exec) {
130 : unset(offset, sz, idx);
131 : }
132 : }
133 : }
134 : // check if there is any dangling memory
135 : set(0, memory_req, memory_validity.size());
136 : }
137 :
138 : /**
139 : * @copydoc MemoryPlanner::planLayout(
140 : * const std::vector<size_t> &memory_size,
141 : * const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
142 : * std::vector<size_t> &memory_offset,
143 : * std::vector<bool> &memory_is_wgrad);
144 : *
145 : * @details The optimized v1 memory planner assigns memory to the requests whose
146 : * validity starts first.
147 : * The requested memories are sorted based on the ascending order of the start
148 : * timestamps, and descending order using the end timestamps. The
149 : * sorted memories are given increasing offset based on the memory size.
150 : * At the end of each timestamp, invalid memories are freed, and offset updated
151 : * for reuse. This planner allocates overlapping memory for all the required
152 : * memories.
153 : *
154 : */
155 0 : size_t OptimizedV3Planner::planLayout(
156 : const std::vector<size_t> &memory_size,
157 : const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
158 : std::vector<size_t> &memory_offset, std::vector<bool> &memory_is_wgrad,
159 : size_t n_wgrad) const {
160 :
161 : /** create memory requests structure array for easier management */
162 : std::vector<MemoryRequest> requests;
163 0 : requests.reserve(memory_size.size());
164 0 : for (unsigned int idx = 0; idx < memory_size.size(); idx++) {
165 0 : requests.emplace_back(memory_size[idx], memory_validity[idx], idx);
166 : }
167 :
168 : /**
169 : * sort the memory requests with ascending order of start time first, and
170 : * then end time
171 : */
172 0 : std::sort(requests.begin(), requests.end(),
173 : [](auto const &v1, auto const &v2) -> int {
174 0 : if (v1.start == v2.start)
175 0 : return v1.end < v2.end;
176 0 : return v1.start < v2.start;
177 : /** TODO: try this */
178 : // if (v1.end == v2.end)
179 : // return v1.start < v2.start;
180 : // return v1.end > v2.end;
181 : });
182 :
183 : /** all the memories in use sorted by their assigned offset and size */
184 : std::vector<MemoryRequest *> sorted_req;
185 :
186 : /** iterate over the sorted requests and start allocation of the requests */
187 0 : memory_offset.resize(memory_size.size());
188 0 : size_t memory_req = 0;
189 0 : for (auto &req : requests) {
190 : sorted_req.erase(
191 0 : std::remove_if(sorted_req.begin(), sorted_req.end(),
192 0 : [req](auto elem) { return elem->end <= req.start; }),
193 : sorted_req.end());
194 :
195 : bool replace_and_fill = false;
196 : std::vector<std::pair<size_t, size_t>> vacant;
197 :
198 0 : size_t max_offset = computeSpace(req.start, sorted_req, vacant);
199 :
200 0 : for (unsigned int idx = 0; idx < vacant.size(); idx++) {
201 0 : if (vacant[idx].second - vacant[idx].first >= req.size) {
202 0 : req.offset = vacant[idx].first;
203 0 : memory_offset[req.loc] = req.offset;
204 0 : sorted_req.push_back(&req);
205 : replace_and_fill = true;
206 0 : break;
207 : }
208 : }
209 : vacant.clear();
210 :
211 0 : if (replace_and_fill) {
212 : continue;
213 : }
214 :
215 0 : req.offset = max_offset;
216 0 : memory_offset[req.loc] = max_offset;
217 0 : memory_req = std::max(memory_req, req.offset + req.size);
218 0 : sorted_req.push_back(&req);
219 0 : }
220 :
221 0 : return memory_req;
222 0 : }
223 :
224 : } // namespace nntrainer
|