Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2024 Sungsik Kong <ss.kong@samsung.com>
4 : *
5 : * @file cblas_interface.cpp
6 : * @date 23 April 2024
7 : * @see https://github.com/nntrainer/nntrainer
8 : * @author Sungsik Kong <ss.kong@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief Function interface to use cblas lib from cpu_backend
11 : *
12 : */
13 :
14 : #include <cblas.h>
15 : #include <cblas_interface.h>
16 :
17 : namespace nntrainer {
18 :
19 : /**
20 : * @brief Set openblas #thread.
21 : * @param [in] num_threads Use BLAS_NUM_THREADS if < 0.
22 : * Otherwise, directly call openblas API.
23 : */
24 61 : void __openblas_set_num_threads(int num_threads) {
25 61 : if (num_threads < 0) {
26 : #ifdef BLAS_NUM_THREADS
27 : openblas_set_num_threads(BLAS_NUM_THREADS);
28 : #else
29 : /// Without openblas_set_num_threads,
30 : /// it's set std::thread::hardware_concurrency()
31 : /// It can be too high especially when the given blas function is small
32 : /// or if there are other threads already created (nntrainer thread pool)
33 : /// With big-little & threadboost, hardware_concurrency might be not good.
34 : /// @todo configure this! (4? num of big cores? ...)
35 : #endif
36 : } else {
37 0 : openblas_set_num_threads(num_threads);
38 : }
39 61 : }
40 15254 : void __cblas_saxpy(const unsigned int N, const float alpha, const float *X,
41 : const unsigned int incX, float *Y, const unsigned int incY) {
42 15254 : cblas_saxpy(N, alpha, X, incX, Y, incY);
43 15254 : }
44 :
45 88188 : void __cblas_sgemv(const unsigned int TStorageOrder, bool TransA,
46 : const unsigned int M, const unsigned int N,
47 : const float alpha, const float *A, const unsigned int lda,
48 : const float *X, const unsigned int incX, const float beta,
49 : float *Y, const unsigned int incY) {
50 88188 : CBLAS_TRANSPOSE transA = TransA ? CblasTrans : CblasNoTrans;
51 88188 : CBLAS_ORDER order = TStorageOrder ? CblasColMajor : CblasRowMajor;
52 88188 : cblas_sgemv(order, transA, M, N, alpha, A, lda, X, incX, beta, Y, incY);
53 88188 : }
54 :
55 245 : float __cblas_sdot(const unsigned int N, const float *X,
56 : const unsigned int incX, const float *Y,
57 : const unsigned int incY) {
58 245 : return cblas_sdot(N, X, incX, Y, incY);
59 : }
60 :
61 0 : void __cblas_scopy(const unsigned int N, const float *X,
62 : const unsigned int incX, float *Y, const unsigned int incY) {
63 0 : cblas_scopy(N, X, incX, Y, incY);
64 0 : }
65 :
66 2613 : void __cblas_sscal(const unsigned int N, const float alpha, float *X,
67 : const unsigned int incX) {
68 2613 : cblas_sscal(N, alpha, X, incX);
69 2613 : }
70 :
71 1935 : float __cblas_snrm2(const unsigned int N, const float *X,
72 : const unsigned int incX) {
73 1935 : return cblas_snrm2(N, X, incX);
74 : }
75 :
76 16977 : void __cblas_sgemm(const unsigned int TStorageOrder, bool TransA, bool TransB,
77 : const unsigned int M, const unsigned int N,
78 : const unsigned int K, const float alpha, const float *A,
79 : const unsigned int lda, const float *B,
80 : const unsigned int ldb, const float beta, float *C,
81 : const unsigned int ldc) {
82 16977 : CBLAS_TRANSPOSE transA = TransA ? CblasTrans : CblasNoTrans;
83 16977 : CBLAS_TRANSPOSE transB = TransB ? CblasTrans : CblasNoTrans;
84 16977 : CBLAS_ORDER order = TStorageOrder ? CblasColMajor : CblasRowMajor;
85 16977 : cblas_sgemm(order, transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C,
86 : ldc);
87 16977 : }
88 :
89 3 : unsigned int __cblas_isamax(const unsigned int N, const float *X,
90 : const unsigned int incX) {
91 3 : return cblas_isamax(N, X, incX);
92 : }
93 : } // namespace nntrainer
|