LCOV - code coverage report
Current view: top level - nntrainer/utils - profiler.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 0.0 % 166 0
Test Date: 2025-12-14 20:38:17 Functions: 0.0 % 17 0

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2020 Jihoon Lee <jhoon.it.lee@samsung.com>
       4              :  *
       5              :  * @file   profiler.cpp
       6              :  * @date   09 December 2020
       7              :  * @brief  Profiler related codes to be used to benchmark things
       8              :  * @see    https://github.com/nnstreamer/nntrainer
       9              :  * @author Jihoon Lee <jhoon.it.lee@samsung.com>
      10              :  * @bug    No known bugs except for NYI items
      11              :  *
      12              :  */
      13              : #include <algorithm>
      14              : #include <atomic>
      15              : #include <iomanip>
      16              : #include <map>
      17              : #include <numeric>
      18              : #include <set>
      19              : #include <sstream>
      20              : #include <tuple>
      21              : 
      22              : #include <nntrainer_error.h>
      23              : #include <nntrainer_log.h>
      24              : #include <profiler.h>
      25              : 
      26              : namespace nntrainer {
      27              : namespace profile {
      28              : 
      29            0 : void GenericProfileListener::onNotifyTimeEvent(
      30              :   PROFILE_EVENT event, const int time_item, const std::string &str,
      31              :   const std::chrono::microseconds &duration) {
      32              :   auto time_iter = time_taken.find(time_item);
      33              : 
      34            0 :   if (time_iter == time_taken.end()) {
      35            0 :     reset(time_item, str);
      36              :     time_iter = time_taken.find(time_item);
      37            0 :     if (time_iter == time_taken.end()) {
      38            0 :       throw std::runtime_error("Couldn't find time_iter.");
      39              :     }
      40              :   }
      41              : 
      42              :   auto &cnt_ = std::get<GenericProfileListener::CNT>(time_iter->second);
      43            0 :   cnt_++;
      44              : 
      45            0 :   if (warmups >= cnt_)
      46              :     return;
      47              : 
      48              :   auto &cur_ = std::get<GenericProfileListener::CUR>(time_iter->second);
      49              :   auto &min_ = std::get<GenericProfileListener::MIN>(time_iter->second);
      50              :   auto &max_ = std::get<GenericProfileListener::MAX>(time_iter->second);
      51              :   auto &sum_ = std::get<GenericProfileListener::SUM>(time_iter->second);
      52              : 
      53            0 :   cur_ = duration;
      54            0 :   min_ = std::min(min_, duration);
      55            0 :   max_ = std::max(max_, duration);
      56              :   sum_ += duration;
      57              : }
      58              : 
      59            0 : void GenericProfileListener::onNotifyMemoryEvent(
      60              :   PROFILE_EVENT event, const size_t alloc_current, const size_t alloc_total,
      61              :   const std::string &str, const std::chrono::microseconds &duration,
      62              :   const std::string &cache_policy, bool cache_fsu) {
      63              : 
      64            0 :   if (event != EVENT_MEM_ANNOTATE) {
      65            0 :     mem_max = std::max(mem_max, alloc_total);
      66            0 :     mem_sum += alloc_total;
      67            0 :     mem_average = mem_sum / ++mem_count;
      68              :   }
      69              : 
      70            0 :   mem_taken.emplace_back(event, alloc_current, alloc_total, str, duration,
      71              :                          cache_policy, cache_fsu);
      72            0 : }
      73              : 
      74            0 : void GenericProfileListener::notify(
      75              :   PROFILE_EVENT event, const std::shared_ptr<ProfileEventData> data) {
      76            0 :   switch (event) {
      77              :   case EVENT_TIME_START:
      78              :     /* ignore start time. we only consider duration of item */
      79              :     break;
      80              :   case EVENT_TIME_END:
      81            0 :     onNotifyTimeEvent(event, data->time_item, data->event_str, data->duration);
      82            0 :     break;
      83              :   case EVENT_MEM_ALLOC:
      84              :   case EVENT_MEM_DEALLOC:
      85              :   case EVENT_MEM_ANNOTATE:
      86            0 :     onNotifyMemoryEvent(event, data->alloc_current, data->alloc_total,
      87            0 :                         data->event_str, data->duration, data->cache_policy,
      88            0 :                         data->cache_fsu);
      89            0 :     break;
      90            0 :   default:
      91            0 :     throw std::runtime_error("Invalid PROFILE_EVENT");
      92              :     break;
      93              :   }
      94            0 : }
      95              : 
      96            0 : void GenericProfileListener::reset(const int time_item,
      97              :                                    const std::string &name) {
      98              :   time_taken[time_item] = std::make_tuple(
      99              :     std::chrono::microseconds{0}, std::chrono::microseconds::max(),
     100              :     std::chrono::microseconds::min(), std::chrono::microseconds{0}, int{0});
     101              :   names[time_item] = name;
     102            0 : }
     103              : 
     104              : const std::chrono::microseconds
     105            0 : GenericProfileListener::result(const int time_item) {
     106              :   auto iter = time_taken.find(time_item);
     107              : 
     108            0 :   if (iter == time_taken.end() ||
     109            0 :       std::get<GenericProfileListener::CNT>(iter->second) == 0) {
     110            0 :     std::stringstream ss;
     111              :     ss << "time_item has never recorded: " << names[time_item];
     112            0 :     throw std::invalid_argument("time_item has never recorded");
     113            0 :   }
     114              : 
     115            0 :   return std::get<GenericProfileListener::CUR>(iter->second);
     116              : }
     117              : 
     118            0 : void GenericProfileListener::report(std::ostream &out) const {
     119            0 :   std::vector<unsigned int> column_size = {20, 23, 23, 23, 23, 23, 23, 23};
     120              : 
     121            0 :   for (auto &[item, time] : time_taken) {
     122              :     auto title = names.find(item);
     123            0 :     if (title == names.end())
     124            0 :       throw std::runtime_error("Couldn't find name. it's already removed.");
     125            0 :     column_size[0] =
     126            0 :       std::max(column_size[0], static_cast<unsigned int>(title->second.size()));
     127              :   }
     128              :   auto total_col_size =
     129            0 :     std::accumulate(column_size.begin(), column_size.end(), 0);
     130              : 
     131            0 :   if (warmups != 0)
     132            0 :     out << "warm up: " << warmups << '\n';
     133              : 
     134            0 :   auto end = std::chrono::steady_clock::now();
     135              :   auto duration =
     136            0 :     std::chrono::duration_cast<std::chrono::microseconds>(end - start_time);
     137              : 
     138            0 :   out << "profiled for " << duration.count() << '\n';
     139              : 
     140              :   /// creating header
     141              :   // clang-format off
     142              :   out << std::setw(column_size[0]) << "key"
     143              :       << std::setw(column_size[1]) << "avg"
     144              :       << std::setw(column_size[2]) << "min"
     145              :       << std::setw(column_size[3]) << "max"
     146              :       << std::setw(column_size[4]) << "sum"
     147            0 :       << std::setw(column_size[5]) << "pct" << '\n';
     148              :   // clang-format on
     149              : 
     150              :   // seperator
     151            0 :   out << std::string(total_col_size, '=') << '\n';
     152              : 
     153              :   std::map<int, std::function<void(std::ostream & out)>> ordered_report;
     154              : 
     155              :   /// calculate metrics while skipping warmups
     156            0 :   for (auto &time : time_taken) {
     157            0 :     auto func = [&](std::ostream &out_) {
     158            0 :       auto &cnt_ = std::get<GenericProfileListener::CNT>(time.second);
     159              :       auto &min_ = std::get<GenericProfileListener::MIN>(time.second);
     160              :       auto &max_ = std::get<GenericProfileListener::MAX>(time.second);
     161              :       auto &sum_ = std::get<GenericProfileListener::SUM>(time.second);
     162              : 
     163            0 :       auto title = names.find(time.first);
     164            0 :       if (title == names.end())
     165            0 :         throw std::runtime_error("Couldn't find name. it's already removed.");
     166              : 
     167            0 :       if (warmups >= cnt_) {
     168              :         out_ << std::left << std::setw(total_col_size) << title->second
     169            0 :              << "less data then warmup\n";
     170              :         out_
     171              :           << std::right; // Restore outputstream adjustflag to standard stream
     172              :         return;
     173              :       }
     174              : 
     175            0 :       out_.setf(std::ios::fixed);
     176              :       std::streamsize default_precision = out_.precision(2);
     177              :       // clang-format off
     178            0 :       out_ << std::setw(column_size[0]) << title->second
     179            0 :           << std::setw(column_size[1]) << sum_.count() / (cnt_ - warmups)
     180            0 :           << std::setw(column_size[2]) << min_.count()
     181            0 :           << std::setw(column_size[3]) << max_.count()
     182            0 :           << std::setw(column_size[4]) << sum_.count()
     183            0 :           << std::setw(column_size[5]) << sum_.count() / (double)duration.count() * 100 << '\n';
     184              :       // clang-format on
     185            0 :       out_.precision(default_precision);
     186            0 :       out_.unsetf(std::ios::fixed);
     187            0 :     };
     188            0 :     ordered_report[-time.first] = func;
     189              :   }
     190              : 
     191            0 :   for (auto &entry : ordered_report)
     192              :     entry.second(out);
     193              : 
     194              :   ordered_report.clear();
     195              : 
     196              :   column_size.clear();
     197            0 :   column_size = {30, 20, 20, 60, 10, 20, 10};
     198              : 
     199            0 :   out << std::string(total_col_size, '=') << '\n';
     200            0 :   out << "\n\n";
     201              :   /// creating header
     202              :   // clang-format off
     203              :   out << std::setw(column_size[0]) << "event"
     204              :       << std::setw(column_size[1]) << "size"
     205              :       << std::setw(column_size[2]) << "total"
     206              :       << std::setw(column_size[3]) << "info"
     207              :       << std::setw(column_size[4]) << "dur"
     208              :       << std::setw(column_size[5]) << "policy"
     209            0 :       << std::setw(column_size[6]) << "fsu"
     210              :       << std::endl;
     211              :   // clang-format on
     212            0 :   out << std::string(total_col_size, '=') << std::endl;
     213              : 
     214              :   int order = 0;
     215            0 :   for (auto &mem : mem_taken) {
     216            0 :     auto func = [&](std::ostream &out_) {
     217            0 :       auto &event = std::get<PROFILE_EVENT>(mem);
     218              :       auto &cur = std::get<1>(mem);
     219              :       auto &total = std::get<2>(mem);
     220              :       auto &info = std::get<3>(mem);
     221              :       auto &dur = std::get<std::chrono::microseconds>(mem);
     222              :       auto &policy = std::get<5>(mem);
     223              :       auto &fsu = std::get<bool>(mem);
     224              : 
     225            0 :       out_.setf(std::ios::fixed);
     226              :       out_.setf(std::ios::right);
     227              :       std::streamsize default_precision = out_.precision(2);
     228              :       // clang-format off
     229            0 :       if (event == EVENT_MEM_ANNOTATE) {
     230              :         out_ << info << std::endl;
     231              :       } else { 
     232            0 :         out_ << std::setw(column_size[0]) << ((event == EVENT_MEM_ALLOC) ? "ALLOC" :
     233            0 :                                               (event == EVENT_MEM_DEALLOC) ? "DEALLOC" : "")
     234            0 :              << std::setw(column_size[1]) << std::to_string(cur)
     235            0 :              << std::setw(column_size[2]) << std::to_string(total)
     236            0 :              << std::setw(column_size[3]) << info
     237            0 :              << std::setw(column_size[4]) << ((event == EVENT_MEM_DEALLOC) ? std::to_string(dur.count()) : "")
     238            0 :              << std::setw(column_size[5]) << policy
     239            0 :              << std::setw(column_size[6]) << (fsu ? ((event == EVENT_MEM_ALLOC) ? "IN" :
     240            0 :                                                      (event == EVENT_MEM_DEALLOC) ? "OUT" : "") : "")
     241              :              << std::endl;
     242              :       }
     243              :       // clang-format on
     244            0 :       out_.precision(default_precision);
     245            0 :       out_.unsetf(std::ios::fixed);
     246            0 :     };
     247            0 :     ordered_report[order++] = func;
     248              :   }
     249              : 
     250            0 :   for (auto &entry : ordered_report)
     251              :     entry.second(out);
     252              : 
     253            0 :   out << "Max Memory Size = " << mem_max << std::endl;
     254            0 :   out << "Average Memory Size = " << mem_average << std::endl;
     255            0 : }
     256              : 
     257            0 : void Profiler::start(const int item) {
     258              : #ifdef DEBUG
     259              :   /// @todo: consider race condition
     260              :   if (time_item_times.find(item) != time_item_times.end())
     261              :     throw std::invalid_argument("profiler has already started");
     262              : #endif
     263              : 
     264              :   auto name = time_item_names.find(item);
     265            0 :   if (name == time_item_names.end())
     266            0 :     throw std::invalid_argument("the item is not registered");
     267              : 
     268            0 :   time_item_times[item] = std::chrono::steady_clock::now();
     269              : 
     270            0 :   auto data = std::make_shared<ProfileEventData>(item, 0, 0, name->second,
     271            0 :                                                  std::chrono::microseconds(0));
     272            0 :   notifyListeners(EVENT_TIME_START, data);
     273            0 : }
     274              : 
     275            0 : void Profiler::end(const int item) {
     276              :   /// @todo: consider race condition
     277            0 :   auto end = std::chrono::steady_clock::now();
     278              : 
     279              : #ifdef DEBUG
     280              :   if (time_item_times.find(item) == time_item_times.end())
     281              :     throw std::invalid_argument("profiler hasn't started with the item");
     282              : #endif
     283              : 
     284              :   auto name = time_item_names.find(item);
     285            0 :   if (name == time_item_names.end())
     286            0 :     throw std::invalid_argument("the item is not registered");
     287              : 
     288            0 :   auto start = time_item_times[item];
     289              :   auto duration =
     290            0 :     std::chrono::duration_cast<std::chrono::microseconds>(end - start);
     291              :   auto data =
     292            0 :     std::make_shared<ProfileEventData>(item, 0, 0, name->second, duration);
     293              : 
     294            0 :   notifyListeners(EVENT_TIME_END, data);
     295              : 
     296              :   time_item_times.erase(item);
     297            0 : }
     298              : 
     299            0 : void Profiler::notifyListeners(PROFILE_EVENT event,
     300              :                                const std::shared_ptr<ProfileEventData> data) {
     301            0 :   std::lock_guard<std::mutex> lock(listeners_mutex);
     302              : 
     303            0 :   for (auto &l : time_item_listeners[data->time_item])
     304            0 :     l->notify(event, data);
     305              : 
     306            0 :   for (auto &l : listeners)
     307            0 :     l->notify(event, data);
     308            0 : }
     309              : 
     310            0 : void Profiler::subscribe(std::shared_ptr<ProfileListener> listener,
     311              :                          const std::set<int> &time_items) {
     312            0 :   if (listener == nullptr) {
     313            0 :     throw std::invalid_argument("listener is null!");
     314              :   }
     315              : 
     316            0 :   std::lock_guard<std::mutex> lock(listeners_mutex);
     317            0 :   if (time_items.empty()) {
     318              :     listeners.insert(listener);
     319              :   } else {
     320            0 :     for (auto item : time_items)
     321              :       time_item_listeners[item].insert(listener);
     322              :   }
     323            0 : }
     324              : 
     325            0 : void Profiler::unsubscribe(std::shared_ptr<ProfileListener> listener) {
     326            0 :   std::lock_guard<std::mutex> lock(listeners_mutex);
     327              :   listeners.erase(listener);
     328              : 
     329            0 :   for (auto &[item, listeners] : time_item_listeners) {
     330              :     auto found = listeners.find(listener);
     331            0 :     if (found != listeners.end())
     332              :       listeners.erase(found);
     333              :   }
     334            0 : }
     335              : 
     336            0 : int Profiler::registerTimeItem(const std::string &name) {
     337            0 :   std::lock_guard<std::mutex> lock_listener(listeners_mutex);
     338            0 :   std::lock_guard<std::mutex> lock(registr_mutex);
     339              : 
     340            0 :   int item = time_item_names.size() + 1;
     341              : 
     342              :   time_item_names[item] = name;
     343            0 :   time_item_listeners[item] = std::set<std::shared_ptr<ProfileListener>>();
     344              : 
     345            0 :   ml_logd("[Profiler] Event registered, time item name: %s key: %d",
     346              :           name.c_str(), item);
     347              : 
     348            0 :   return item;
     349              : }
     350              : 
     351            0 : void Profiler::alloc(const void *ptr, size_t size, const std::string &str,
     352              :                      const std::string &policy, bool fsu) {
     353            0 :   std::lock_guard<std::mutex> lock(allocates_mutex);
     354              : 
     355              : #ifdef DEBUG
     356              :   auto found = allocates.find(ptr);
     357              :   if (found != allocates.end())
     358              :     throw std::invalid_argument("memory profiler is already allocated");
     359              : #endif
     360              : 
     361            0 :   allocates[ptr] = std::tuple<size_t, timepoint, std::string>(
     362            0 :     size, std::chrono::steady_clock::now(), str);
     363              : 
     364            0 :   total_size += size;
     365              : 
     366              :   auto data = std::make_shared<ProfileEventData>(
     367            0 :     0, size, total_size.load(), str, std::chrono::microseconds(0), policy, fsu);
     368            0 :   notifyListeners(EVENT_MEM_ALLOC, data);
     369            0 : }
     370              : 
     371            0 : void Profiler::dealloc(const void *ptr, const std::string &policy, bool fsu) {
     372            0 :   std::lock_guard<std::mutex> lock(allocates_mutex);
     373              : 
     374            0 :   auto end = std::chrono::steady_clock::now();
     375              :   auto found = allocates.find(ptr);
     376              : 
     377            0 :   if (found == allocates.end())
     378            0 :     throw std::invalid_argument("memory profiler didn't allocated");
     379              : 
     380            0 :   auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
     381              :     end - std::get<timepoint>(found->second));
     382              : 
     383            0 :   auto size = std::get<size_t>(found->second);
     384              :   total_size -= size;
     385              : 
     386              :   auto str = std::get<std::string>(found->second);
     387            0 :   auto data = std::make_shared<ProfileEventData>(0, size, total_size.load(),
     388              :                                                  str, duration, policy, fsu);
     389              : 
     390            0 :   notifyListeners(EVENT_MEM_DEALLOC, data);
     391              : 
     392              :   allocates.erase(found);
     393            0 : }
     394              : 
     395            0 : void Profiler::annotate(const std::string &str) {
     396            0 :   std::lock_guard<std::mutex> lock(allocates_mutex);
     397              : 
     398            0 :   auto data = std::make_shared<ProfileEventData>(0, 0, 0, str,
     399            0 :                                                  std::chrono::microseconds(0));
     400              : 
     401            0 :   notifyListeners(EVENT_MEM_ANNOTATE, data);
     402            0 : }
     403              : 
     404              : } // namespace profile
     405              : 
     406              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1