Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Jihoon Lee <jhoon.it.lee@samsung.com>
4 : *
5 : * @file profiler.cpp
6 : * @date 09 December 2020
7 : * @brief Profiler related codes to be used to benchmark things
8 : * @see https://github.com/nnstreamer/nntrainer
9 : * @author Jihoon Lee <jhoon.it.lee@samsung.com>
10 : * @bug No known bugs except for NYI items
11 : *
12 : */
13 : #include <algorithm>
14 : #include <atomic>
15 : #include <iomanip>
16 : #include <map>
17 : #include <numeric>
18 : #include <set>
19 : #include <sstream>
20 : #include <tuple>
21 :
22 : #include <nntrainer_error.h>
23 : #include <nntrainer_log.h>
24 : #include <profiler.h>
25 :
26 : namespace nntrainer {
27 : namespace profile {
28 :
29 0 : void GenericProfileListener::onNotifyTimeEvent(
30 : PROFILE_EVENT event, const int time_item, const std::string &str,
31 : const std::chrono::microseconds &duration) {
32 : auto time_iter = time_taken.find(time_item);
33 :
34 0 : if (time_iter == time_taken.end()) {
35 0 : reset(time_item, str);
36 : time_iter = time_taken.find(time_item);
37 0 : if (time_iter == time_taken.end()) {
38 0 : throw std::runtime_error("Couldn't find time_iter.");
39 : }
40 : }
41 :
42 : auto &cnt_ = std::get<GenericProfileListener::CNT>(time_iter->second);
43 0 : cnt_++;
44 :
45 0 : if (warmups >= cnt_)
46 : return;
47 :
48 : auto &cur_ = std::get<GenericProfileListener::CUR>(time_iter->second);
49 : auto &min_ = std::get<GenericProfileListener::MIN>(time_iter->second);
50 : auto &max_ = std::get<GenericProfileListener::MAX>(time_iter->second);
51 : auto &sum_ = std::get<GenericProfileListener::SUM>(time_iter->second);
52 :
53 0 : cur_ = duration;
54 0 : min_ = std::min(min_, duration);
55 0 : max_ = std::max(max_, duration);
56 : sum_ += duration;
57 : }
58 :
59 0 : void GenericProfileListener::onNotifyMemoryEvent(
60 : PROFILE_EVENT event, const size_t alloc_current, const size_t alloc_total,
61 : const std::string &str, const std::chrono::microseconds &duration,
62 : const std::string &cache_policy, bool cache_fsu) {
63 :
64 0 : if (event != EVENT_MEM_ANNOTATE) {
65 0 : mem_max = std::max(mem_max, alloc_total);
66 0 : mem_sum += alloc_total;
67 0 : mem_average = mem_sum / ++mem_count;
68 : }
69 :
70 0 : mem_taken.emplace_back(event, alloc_current, alloc_total, str, duration,
71 : cache_policy, cache_fsu);
72 0 : }
73 :
74 0 : void GenericProfileListener::notify(
75 : PROFILE_EVENT event, const std::shared_ptr<ProfileEventData> data) {
76 0 : switch (event) {
77 : case EVENT_TIME_START:
78 : /* ignore start time. we only consider duration of item */
79 : break;
80 : case EVENT_TIME_END:
81 0 : onNotifyTimeEvent(event, data->time_item, data->event_str, data->duration);
82 0 : break;
83 : case EVENT_MEM_ALLOC:
84 : case EVENT_MEM_DEALLOC:
85 : case EVENT_MEM_ANNOTATE:
86 0 : onNotifyMemoryEvent(event, data->alloc_current, data->alloc_total,
87 0 : data->event_str, data->duration, data->cache_policy,
88 0 : data->cache_fsu);
89 0 : break;
90 0 : default:
91 0 : throw std::runtime_error("Invalid PROFILE_EVENT");
92 : break;
93 : }
94 0 : }
95 :
96 0 : void GenericProfileListener::reset(const int time_item,
97 : const std::string &name) {
98 : time_taken[time_item] = std::make_tuple(
99 : std::chrono::microseconds{0}, std::chrono::microseconds::max(),
100 : std::chrono::microseconds::min(), std::chrono::microseconds{0}, int{0});
101 : names[time_item] = name;
102 0 : }
103 :
104 : const std::chrono::microseconds
105 0 : GenericProfileListener::result(const int time_item) {
106 : auto iter = time_taken.find(time_item);
107 :
108 0 : if (iter == time_taken.end() ||
109 0 : std::get<GenericProfileListener::CNT>(iter->second) == 0) {
110 0 : std::stringstream ss;
111 : ss << "time_item has never recorded: " << names[time_item];
112 0 : throw std::invalid_argument("time_item has never recorded");
113 0 : }
114 :
115 0 : return std::get<GenericProfileListener::CUR>(iter->second);
116 : }
117 :
118 0 : void GenericProfileListener::report(std::ostream &out) const {
119 0 : std::vector<unsigned int> column_size = {20, 23, 23, 23, 23, 23, 23, 23};
120 :
121 0 : for (auto &[item, time] : time_taken) {
122 : auto title = names.find(item);
123 0 : if (title == names.end())
124 0 : throw std::runtime_error("Couldn't find name. it's already removed.");
125 0 : column_size[0] =
126 0 : std::max(column_size[0], static_cast<unsigned int>(title->second.size()));
127 : }
128 : auto total_col_size =
129 0 : std::accumulate(column_size.begin(), column_size.end(), 0);
130 :
131 0 : if (warmups != 0)
132 0 : out << "warm up: " << warmups << '\n';
133 :
134 0 : auto end = std::chrono::steady_clock::now();
135 : auto duration =
136 0 : std::chrono::duration_cast<std::chrono::microseconds>(end - start_time);
137 :
138 0 : out << "profiled for " << duration.count() << '\n';
139 :
140 : /// creating header
141 : // clang-format off
142 : out << std::setw(column_size[0]) << "key"
143 : << std::setw(column_size[1]) << "avg"
144 : << std::setw(column_size[2]) << "min"
145 : << std::setw(column_size[3]) << "max"
146 : << std::setw(column_size[4]) << "sum"
147 0 : << std::setw(column_size[5]) << "pct" << '\n';
148 : // clang-format on
149 :
150 : // seperator
151 0 : out << std::string(total_col_size, '=') << '\n';
152 :
153 : std::map<int, std::function<void(std::ostream & out)>> ordered_report;
154 :
155 : /// calculate metrics while skipping warmups
156 0 : for (auto &time : time_taken) {
157 0 : auto func = [&](std::ostream &out_) {
158 0 : auto &cnt_ = std::get<GenericProfileListener::CNT>(time.second);
159 : auto &min_ = std::get<GenericProfileListener::MIN>(time.second);
160 : auto &max_ = std::get<GenericProfileListener::MAX>(time.second);
161 : auto &sum_ = std::get<GenericProfileListener::SUM>(time.second);
162 :
163 0 : auto title = names.find(time.first);
164 0 : if (title == names.end())
165 0 : throw std::runtime_error("Couldn't find name. it's already removed.");
166 :
167 0 : if (warmups >= cnt_) {
168 : out_ << std::left << std::setw(total_col_size) << title->second
169 0 : << "less data then warmup\n";
170 : out_
171 : << std::right; // Restore outputstream adjustflag to standard stream
172 : return;
173 : }
174 :
175 0 : out_.setf(std::ios::fixed);
176 : std::streamsize default_precision = out_.precision(2);
177 : // clang-format off
178 0 : out_ << std::setw(column_size[0]) << title->second
179 0 : << std::setw(column_size[1]) << sum_.count() / (cnt_ - warmups)
180 0 : << std::setw(column_size[2]) << min_.count()
181 0 : << std::setw(column_size[3]) << max_.count()
182 0 : << std::setw(column_size[4]) << sum_.count()
183 0 : << std::setw(column_size[5]) << sum_.count() / (double)duration.count() * 100 << '\n';
184 : // clang-format on
185 0 : out_.precision(default_precision);
186 0 : out_.unsetf(std::ios::fixed);
187 0 : };
188 0 : ordered_report[-time.first] = func;
189 : }
190 :
191 0 : for (auto &entry : ordered_report)
192 : entry.second(out);
193 :
194 : ordered_report.clear();
195 :
196 : column_size.clear();
197 0 : column_size = {30, 20, 20, 60, 10, 20, 10};
198 :
199 0 : out << std::string(total_col_size, '=') << '\n';
200 0 : out << "\n\n";
201 : /// creating header
202 : // clang-format off
203 : out << std::setw(column_size[0]) << "event"
204 : << std::setw(column_size[1]) << "size"
205 : << std::setw(column_size[2]) << "total"
206 : << std::setw(column_size[3]) << "info"
207 : << std::setw(column_size[4]) << "dur"
208 : << std::setw(column_size[5]) << "policy"
209 0 : << std::setw(column_size[6]) << "fsu"
210 : << std::endl;
211 : // clang-format on
212 0 : out << std::string(total_col_size, '=') << std::endl;
213 :
214 : int order = 0;
215 0 : for (auto &mem : mem_taken) {
216 0 : auto func = [&](std::ostream &out_) {
217 0 : auto &event = std::get<PROFILE_EVENT>(mem);
218 : auto &cur = std::get<1>(mem);
219 : auto &total = std::get<2>(mem);
220 : auto &info = std::get<3>(mem);
221 : auto &dur = std::get<std::chrono::microseconds>(mem);
222 : auto &policy = std::get<5>(mem);
223 : auto &fsu = std::get<bool>(mem);
224 :
225 0 : out_.setf(std::ios::fixed);
226 : out_.setf(std::ios::right);
227 : std::streamsize default_precision = out_.precision(2);
228 : // clang-format off
229 0 : if (event == EVENT_MEM_ANNOTATE) {
230 : out_ << info << std::endl;
231 : } else {
232 0 : out_ << std::setw(column_size[0]) << ((event == EVENT_MEM_ALLOC) ? "ALLOC" :
233 0 : (event == EVENT_MEM_DEALLOC) ? "DEALLOC" : "")
234 0 : << std::setw(column_size[1]) << std::to_string(cur)
235 0 : << std::setw(column_size[2]) << std::to_string(total)
236 0 : << std::setw(column_size[3]) << info
237 0 : << std::setw(column_size[4]) << ((event == EVENT_MEM_DEALLOC) ? std::to_string(dur.count()) : "")
238 0 : << std::setw(column_size[5]) << policy
239 0 : << std::setw(column_size[6]) << (fsu ? ((event == EVENT_MEM_ALLOC) ? "IN" :
240 0 : (event == EVENT_MEM_DEALLOC) ? "OUT" : "") : "")
241 : << std::endl;
242 : }
243 : // clang-format on
244 0 : out_.precision(default_precision);
245 0 : out_.unsetf(std::ios::fixed);
246 0 : };
247 0 : ordered_report[order++] = func;
248 : }
249 :
250 0 : for (auto &entry : ordered_report)
251 : entry.second(out);
252 :
253 0 : out << "Max Memory Size = " << mem_max << std::endl;
254 0 : out << "Average Memory Size = " << mem_average << std::endl;
255 0 : }
256 :
257 0 : void Profiler::start(const int item) {
258 : #ifdef DEBUG
259 : /// @todo: consider race condition
260 : if (time_item_times.find(item) != time_item_times.end())
261 : throw std::invalid_argument("profiler has already started");
262 : #endif
263 :
264 : auto name = time_item_names.find(item);
265 0 : if (name == time_item_names.end())
266 0 : throw std::invalid_argument("the item is not registered");
267 :
268 0 : time_item_times[item] = std::chrono::steady_clock::now();
269 :
270 0 : auto data = std::make_shared<ProfileEventData>(item, 0, 0, name->second,
271 0 : std::chrono::microseconds(0));
272 0 : notifyListeners(EVENT_TIME_START, data);
273 0 : }
274 :
275 0 : void Profiler::end(const int item) {
276 : /// @todo: consider race condition
277 0 : auto end = std::chrono::steady_clock::now();
278 :
279 : #ifdef DEBUG
280 : if (time_item_times.find(item) == time_item_times.end())
281 : throw std::invalid_argument("profiler hasn't started with the item");
282 : #endif
283 :
284 : auto name = time_item_names.find(item);
285 0 : if (name == time_item_names.end())
286 0 : throw std::invalid_argument("the item is not registered");
287 :
288 0 : auto start = time_item_times[item];
289 : auto duration =
290 0 : std::chrono::duration_cast<std::chrono::microseconds>(end - start);
291 : auto data =
292 0 : std::make_shared<ProfileEventData>(item, 0, 0, name->second, duration);
293 :
294 0 : notifyListeners(EVENT_TIME_END, data);
295 :
296 : time_item_times.erase(item);
297 0 : }
298 :
299 0 : void Profiler::notifyListeners(PROFILE_EVENT event,
300 : const std::shared_ptr<ProfileEventData> data) {
301 0 : std::lock_guard<std::mutex> lock(listeners_mutex);
302 :
303 0 : for (auto &l : time_item_listeners[data->time_item])
304 0 : l->notify(event, data);
305 :
306 0 : for (auto &l : listeners)
307 0 : l->notify(event, data);
308 0 : }
309 :
310 0 : void Profiler::subscribe(std::shared_ptr<ProfileListener> listener,
311 : const std::set<int> &time_items) {
312 0 : if (listener == nullptr) {
313 0 : throw std::invalid_argument("listener is null!");
314 : }
315 :
316 0 : std::lock_guard<std::mutex> lock(listeners_mutex);
317 0 : if (time_items.empty()) {
318 : listeners.insert(listener);
319 : } else {
320 0 : for (auto item : time_items)
321 : time_item_listeners[item].insert(listener);
322 : }
323 0 : }
324 :
325 0 : void Profiler::unsubscribe(std::shared_ptr<ProfileListener> listener) {
326 0 : std::lock_guard<std::mutex> lock(listeners_mutex);
327 : listeners.erase(listener);
328 :
329 0 : for (auto &[item, listeners] : time_item_listeners) {
330 : auto found = listeners.find(listener);
331 0 : if (found != listeners.end())
332 : listeners.erase(found);
333 : }
334 0 : }
335 :
336 0 : int Profiler::registerTimeItem(const std::string &name) {
337 0 : std::lock_guard<std::mutex> lock_listener(listeners_mutex);
338 0 : std::lock_guard<std::mutex> lock(registr_mutex);
339 :
340 0 : int item = time_item_names.size() + 1;
341 :
342 : time_item_names[item] = name;
343 0 : time_item_listeners[item] = std::set<std::shared_ptr<ProfileListener>>();
344 :
345 0 : ml_logd("[Profiler] Event registered, time item name: %s key: %d",
346 : name.c_str(), item);
347 :
348 0 : return item;
349 : }
350 :
351 0 : void Profiler::alloc(const void *ptr, size_t size, const std::string &str,
352 : const std::string &policy, bool fsu) {
353 0 : std::lock_guard<std::mutex> lock(allocates_mutex);
354 :
355 : #ifdef DEBUG
356 : auto found = allocates.find(ptr);
357 : if (found != allocates.end())
358 : throw std::invalid_argument("memory profiler is already allocated");
359 : #endif
360 :
361 0 : allocates[ptr] = std::tuple<size_t, timepoint, std::string>(
362 0 : size, std::chrono::steady_clock::now(), str);
363 :
364 0 : total_size += size;
365 :
366 : auto data = std::make_shared<ProfileEventData>(
367 0 : 0, size, total_size.load(), str, std::chrono::microseconds(0), policy, fsu);
368 0 : notifyListeners(EVENT_MEM_ALLOC, data);
369 0 : }
370 :
371 0 : void Profiler::dealloc(const void *ptr, const std::string &policy, bool fsu) {
372 0 : std::lock_guard<std::mutex> lock(allocates_mutex);
373 :
374 0 : auto end = std::chrono::steady_clock::now();
375 : auto found = allocates.find(ptr);
376 :
377 0 : if (found == allocates.end())
378 0 : throw std::invalid_argument("memory profiler didn't allocated");
379 :
380 0 : auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
381 : end - std::get<timepoint>(found->second));
382 :
383 0 : auto size = std::get<size_t>(found->second);
384 : total_size -= size;
385 :
386 : auto str = std::get<std::string>(found->second);
387 0 : auto data = std::make_shared<ProfileEventData>(0, size, total_size.load(),
388 : str, duration, policy, fsu);
389 :
390 0 : notifyListeners(EVENT_MEM_DEALLOC, data);
391 :
392 : allocates.erase(found);
393 0 : }
394 :
395 0 : void Profiler::annotate(const std::string &str) {
396 0 : std::lock_guard<std::mutex> lock(allocates_mutex);
397 :
398 0 : auto data = std::make_shared<ProfileEventData>(0, 0, 0, str,
399 0 : std::chrono::microseconds(0));
400 :
401 0 : notifyListeners(EVENT_MEM_ANNOTATE, data);
402 0 : }
403 :
404 : } // namespace profile
405 :
406 : } // namespace nntrainer
|