Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file optimized_v1_planner.cpp
6 : * @date 3 September 2021
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief This is Optimized V1 Memory Planner
11 : *
12 : */
13 :
14 : #include <algorithm>
15 : #include <memory>
16 : #include <nntrainer_error.h>
17 : #include <stdexcept>
18 : #include <vector>
19 :
20 : #include <optimized_v1_planner.h>
21 :
22 : namespace nntrainer {
23 :
24 : /**
25 : * @brief Memory Request data structure clubbing all the requests
26 : *
27 : */
28 : struct MemoryRequest {
29 : unsigned int start; /**< start of the validity (inclusive) */
30 : unsigned int end; /**< end of the validity (exclusive) */
31 : unsigned int loc; /**< index/location of the this request */
32 : size_t size; /**< size of the request */
33 : size_t offset; /**< offset for this request */
34 :
35 : /**
36 : * @brief Constructor for the Memory Request
37 : *
38 : */
39 : MemoryRequest(size_t s, const std::pair<unsigned int, unsigned int> &valid,
40 2902 : unsigned int idx) :
41 2902 : start(valid.first), end(valid.second), loc(idx), size(s), offset(0) {}
42 : };
43 :
44 : /**
45 : * @brief check if validate interval is overlapping in a very naive way.
46 : *
47 : * @param memory_validity validity
48 : * @param memory_size size
49 : * @param memory_offset offset
50 : * @param memory_req request
51 : */
52 : [[maybe_unused]] static void validateIntervalOverlap(
53 : const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
54 : const std::vector<size_t> &memory_size,
55 : const std::vector<size_t> &memory_offset, size_t memory_req) {
56 : auto bits = std::make_unique<bool[]>(memory_req);
57 :
58 : for (size_t i = 0; i < memory_req; ++i) {
59 : bits[i] = 0;
60 : }
61 :
62 : auto exec_start =
63 : std::min_element(memory_validity.begin(), memory_validity.end(),
64 : [](auto &a, auto &b) { return a.first < b.first; });
65 :
66 : auto exec_end =
67 : std::max_element(memory_validity.begin(), memory_validity.end(),
68 : [](auto &a, auto &b) { return a.second < b.second; });
69 :
70 : auto set = [&](int offset, size_t size, int idx) {
71 : for (unsigned int i = offset; i < size; ++i) {
72 : NNTR_THROW_IF(bits[i], std::invalid_argument)
73 : << " bits taken at i: " << i << " offset: " << offset
74 : << " size: " << size << " idx: " << idx;
75 : bits[i] = 1;
76 : }
77 : };
78 :
79 : auto unset = [&](int offset, size_t size, int idx) {
80 : for (unsigned int i = offset; i < size; ++i) {
81 : NNTR_THROW_IF(!bits[i], std::invalid_argument)
82 : << "double freeing bits at i: " << i << " offset: " << offset
83 : << " size: " << size << " idx: " << idx;
84 : bits[i] = 0;
85 : }
86 : };
87 :
88 : for (unsigned int exec = exec_start->first; exec <= exec_end->second;
89 : ++exec) {
90 :
91 : for (unsigned int idx = 0; idx < memory_validity.size(); ++idx) {
92 : auto &validity = memory_validity.at(idx);
93 : auto &sz = memory_size.at(idx);
94 : auto &offset = memory_offset.at(idx);
95 : if (validity.first == exec) {
96 : set(offset, sz, idx);
97 : }
98 : if (validity.second == exec) {
99 : unset(offset, sz, idx);
100 : }
101 : }
102 : }
103 : // check if there is any dangling memory
104 : set(0, memory_req, memory_validity.size());
105 : }
106 :
107 : /**
108 : * @copydoc MemoryPlanner::planLayout(
109 : * const std::vector<size_t> &memory_size,
110 : * const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
111 : * std::vector<size_t> &memory_offset,
112 : * std::vector<bool> &memory_is_wgrad);
113 : *
114 : * @details The optimized v1 memory planner assigns memory to the requests whose
115 : * validity starts first.
116 : * The requested memories are sorted based on the ascending order of the start
117 : * timestamps, and descending order using the end timestamps. The
118 : * sorted memories are given increasing offset based on the memory size.
119 : * At the end of each timestamp, invalid memories are freed, and offset updated
120 : * for reuse. This planner allocates overlapping memory for all the required
121 : * memories.
122 : *
123 : */
124 406 : size_t OptimizedV1Planner::planLayout(
125 : const std::vector<size_t> &memory_size,
126 : const std::vector<std::pair<unsigned int, unsigned int>> &memory_validity,
127 : std::vector<size_t> &memory_offset, std::vector<bool> &memory_is_wgrad,
128 : size_t n_wgrad) const {
129 :
130 : /** create memory requests structure array for easier management */
131 : std::vector<MemoryRequest> requests;
132 406 : requests.reserve(memory_size.size());
133 :
134 3308 : for (unsigned int idx = 0; idx < memory_size.size(); idx++) {
135 2902 : requests.emplace_back(memory_size[idx], memory_validity[idx], idx);
136 : }
137 :
138 : /**
139 : * sort the memory requests with ascending order of start time first, and
140 : * then end time
141 : */
142 406 : std::sort(requests.begin(), requests.end(),
143 : [](auto const &v1, auto const &v2) -> int {
144 6925 : if (v1.start == v2.start)
145 5706 : return v1.end < v2.end;
146 1219 : return v1.start < v2.start;
147 : /** TODO: try this */
148 : // if (v1.end == v2.end)
149 : // return v1.start < v2.start;
150 : // return v1.end > v2.end;
151 : });
152 :
153 : /** all the memories in use sorted by their assigned offset and size */
154 : std::vector<MemoryRequest *> sorted_req;
155 :
156 : /** iterate over the sorted requests and start allocation of the requests */
157 406 : memory_offset.resize(memory_size.size());
158 406 : size_t memory_req = 0;
159 :
160 3308 : for (auto &req : requests) {
161 : /** remove expired memories and update offset */
162 3001 : while (!sorted_req.empty() && sorted_req.back()->end <= req.start)
163 : sorted_req.pop_back();
164 :
165 : /** if there exists an expired memory with same size (not at the edge),
166 : * reuse it */
167 : bool replace_and_fill = false;
168 30218 : for (int idx = sorted_req.size() - 1; idx >= 0; idx--) {
169 27353 : auto const &sr = sorted_req[idx];
170 : /** TODO: reuse if memory size not exactly match */
171 27353 : if (sr->end <= req.start && sr->size == req.size) {
172 37 : req.offset = sr->offset;
173 37 : memory_offset[req.loc] = req.offset;
174 37 : sorted_req[idx] = &req;
175 : replace_and_fill = true;
176 : break;
177 : }
178 : }
179 37 : if (replace_and_fill) {
180 37 : continue;
181 : }
182 :
183 : size_t offset = 0;
184 2865 : if (!sorted_req.empty())
185 2457 : offset = sorted_req.back()->offset + sorted_req.back()->size;
186 :
187 : /** assign offset to the new request and push to queue */
188 2865 : req.offset = offset;
189 2865 : memory_offset[req.loc] = offset;
190 2865 : memory_req = std::max(memory_req, req.offset + req.size);
191 2865 : sorted_req.push_back(&req);
192 : }
193 :
194 : // validateIntervalOverlap(memory_validity, memory_size, memory_offset,
195 : // memory_req);
196 406 : return memory_req;
197 406 : }
198 :
199 : } // namespace nntrainer
|