LCOV - code coverage report
Current view: top level - nntrainer/tensor/cpu_backend/cblas_interface - cblas_interface.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 86.7 % 30 26
Test Date: 2026-01-12 20:43:37 Functions: 88.9 % 9 8

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2024 Sungsik Kong <ss.kong@samsung.com>
       4              :  *
       5              :  * @file cblas_interface.cpp
       6              :  * @date   23 April 2024
       7              :  * @see    https://github.com/nntrainer/nntrainer
       8              :  * @author Sungsik Kong <ss.kong@samsung.com>
       9              :  * @bug    No known bugs except for NYI items
      10              :  * @brief  Function interface to use cblas lib from cpu_backend
      11              :  *
      12              :  */
      13              : 
      14              : #include <cblas.h>
      15              : #include <cblas_interface.h>
      16              : 
      17              : namespace nntrainer {
      18              : 
      19              : /**
      20              :  * @brief Set openblas #thread.
      21              :  * @param [in] num_threads Use BLAS_NUM_THREADS if < 0.
      22              :  *                         Otherwise, directly call openblas API.
      23              :  */
      24           61 : void __openblas_set_num_threads(int num_threads) {
      25           61 :   if (num_threads < 0) {
      26              :   #ifdef BLAS_NUM_THREADS
      27              :     openblas_set_num_threads(BLAS_NUM_THREADS);
      28              :   #else
      29              :     /// Without openblas_set_num_threads,
      30              :     /// it's set std::thread::hardware_concurrency()
      31              :     /// It can be too high especially when the given blas function is small
      32              :     /// or if there are other threads already created (nntrainer thread pool)
      33              :     /// With big-little & threadboost, hardware_concurrency might be not good.
      34              :     /// @todo configure this! (4? num of big cores? ...)
      35              :   #endif
      36              :   } else {
      37            0 :     openblas_set_num_threads(num_threads);
      38              :   }
      39           61 : }
      40        15254 : void __cblas_saxpy(const unsigned int N, const float alpha, const float *X,
      41              :                    const unsigned int incX, float *Y, const unsigned int incY) {
      42        15254 :   cblas_saxpy(N, alpha, X, incX, Y, incY);
      43        15254 : }
      44              : 
      45        88188 : void __cblas_sgemv(const unsigned int TStorageOrder, bool TransA,
      46              :                    const unsigned int M, const unsigned int N,
      47              :                    const float alpha, const float *A, const unsigned int lda,
      48              :                    const float *X, const unsigned int incX, const float beta,
      49              :                    float *Y, const unsigned int incY) {
      50        88188 :   CBLAS_TRANSPOSE transA = TransA ? CblasTrans : CblasNoTrans;
      51        88188 :   CBLAS_ORDER order = TStorageOrder ? CblasColMajor : CblasRowMajor;
      52        88188 :   cblas_sgemv(order, transA, M, N, alpha, A, lda, X, incX, beta, Y, incY);
      53        88188 : }
      54              : 
      55          245 : float __cblas_sdot(const unsigned int N, const float *X,
      56              :                    const unsigned int incX, const float *Y,
      57              :                    const unsigned int incY) {
      58          245 :   return cblas_sdot(N, X, incX, Y, incY);
      59              : }
      60              : 
      61            0 : void __cblas_scopy(const unsigned int N, const float *X,
      62              :                    const unsigned int incX, float *Y, const unsigned int incY) {
      63            0 :   cblas_scopy(N, X, incX, Y, incY);
      64            0 : }
      65              : 
      66         2613 : void __cblas_sscal(const unsigned int N, const float alpha, float *X,
      67              :                    const unsigned int incX) {
      68         2613 :   cblas_sscal(N, alpha, X, incX);
      69         2613 : }
      70              : 
      71         1935 : float __cblas_snrm2(const unsigned int N, const float *X,
      72              :                     const unsigned int incX) {
      73         1935 :   return cblas_snrm2(N, X, incX);
      74              : }
      75              : 
      76        16977 : void __cblas_sgemm(const unsigned int TStorageOrder, bool TransA, bool TransB,
      77              :                    const unsigned int M, const unsigned int N,
      78              :                    const unsigned int K, const float alpha, const float *A,
      79              :                    const unsigned int lda, const float *B,
      80              :                    const unsigned int ldb, const float beta, float *C,
      81              :                    const unsigned int ldc) {
      82        16977 :   CBLAS_TRANSPOSE transA = TransA ? CblasTrans : CblasNoTrans;
      83        16977 :   CBLAS_TRANSPOSE transB = TransB ? CblasTrans : CblasNoTrans;
      84        16977 :   CBLAS_ORDER order = TStorageOrder ? CblasColMajor : CblasRowMajor;
      85        16977 :   cblas_sgemm(order, transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C,
      86              :               ldc);
      87        16977 : }
      88              : 
      89            3 : unsigned int __cblas_isamax(const unsigned int N, const float *X,
      90              :                             const unsigned int incX) {
      91            3 :   return cblas_isamax(N, X, incX);
      92              : }
      93              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1