LCOV - code coverage report
Current view: top level - nntrainer/tensor/cpu_backend/ggml_interface - ggml_interface.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 57.4 % 54 31
Test Date: 2025-12-14 20:38:17 Functions: 70.6 % 17 12

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : /**
       3              :  * Copyright (C) 2025 Sungsik Kong <ss.kong@samsung.com>
       4              :  *
       5              :  * @file   ggml_interface.cpp
       6              :  * @date   13 August 2025
       7              :  * @see    https://github.com/nnstreamer/nntrainer
       8              :  * @author Sungsik Kong <ss.kong@samsung.com>
       9              :  * @bug    No known bugs except for NYI items
      10              :  * @brief  Function interface to use ggml lib from cpu_backend
      11              :  */
      12              : 
      13              : #include <algorithm>
      14              : #include <cmath>
      15              : #include <ggml_interface.h>
      16              : #include <nntr_ggml_impl.h>
      17              : #include <nntr_ggml_impl_utils.h>
      18              : #include <string>
      19              : #include <thread>
      20              : #include <vector>
      21              : 
      22              : namespace nntrainer {
      23              : 
      24           37 : void __ggml_init() { nntr_ggml_init(); }
      25              : 
      26           31 : size_t __ggml_quantize_q4_0(const float *src, void *dst, int64_t nrow,
      27              :                             int64_t n_per_row, const float *quant_weights) {
      28           31 :   return nntr_quantize_q4_0(src, dst, nrow, n_per_row, quant_weights);
      29              : }
      30              : 
      31            8 : size_t __ggml_quantize_q4_K(const float *src, void *dst, int64_t nrow,
      32              :                             int64_t n_per_row, const float *quant_weights) {
      33            8 :   return nntr_quantize_q4_K(src, dst, nrow, n_per_row, quant_weights);
      34              : }
      35              : 
      36           72 : size_t __ggml_quantize_q6_K(const float *src, void *dst, int64_t nrow,
      37              :                             int64_t n_per_row, const float *quant_weights) {
      38           72 :   return nntr_quantize_q6_K(src, dst, nrow, n_per_row, quant_weights);
      39              : }
      40              : 
      41            0 : size_t __ggml_quantize_q8_0(const float *src, void *dst, int64_t nrow,
      42              :                             int64_t n_per_row, const float *quant_weights) {
      43            0 :   return nntr_quantize_q8_0(src, dst, nrow, n_per_row, quant_weights);
      44              : }
      45              : 
      46           63 : void __ggml_quantize_row_q6_K(const float *src, void *dst, int64_t k) {
      47           63 :   __ggml_quantize_q6_K(src, dst, 1, k, nullptr);
      48           63 : }
      49              : 
      50              : template <>
      51           63 : void __ggml_quantize_row_q8_K(const float *src, void *dst, int64_t k) {
      52           63 :   nntr_quantize_row_q8_K(src, dst, k);
      53           63 : }
      54              : 
      55           49 : void __ggml_dequantize_row_q4_0(const void *x_raw, float *y, int64_t k) {
      56           49 :   nntr_dequantize_row_q4_0(x_raw, y, k);
      57           49 : }
      58              : 
      59            1 : void __ggml_dequantize_row_q4_K(const void *x_raw, float *y, int64_t k) {
      60            1 :   nntr_dequantize_row_q4_K(x_raw, y, k);
      61            1 : }
      62              : 
      63            2 : void __ggml_dequantize_row_q6_K(const void *x, float *y, int64_t k) {
      64            2 :   nntr_dequantize_row_q6_K(x, y, k);
      65            2 : }
      66              : 
      67              : template <>
      68            0 : void __ggml_dequantize_row_q8_K(const void *x, float *y, int64_t k) {
      69            0 :   nntr_dequantize_row_q8_K(x, y, k);
      70            0 : }
      71              : 
      72           63 : float __ggml_vec_dot_q6_K_q8_K(const unsigned int K,
      73              :                                const void *__restrict v_q6_K,
      74              :                                const void *__restrict v_q8_K) {
      75              :   float result;
      76              :   int bs = 1, bx = 1, by = 1,
      77              :       nrc = 1; // unused variables in ggml_vec_dot_q6_K_q8_K
      78           63 :   nntr_vec_dot_q6_K_q8_K(K, &result, bs, v_q6_K, bx, v_q8_K, by, nrc);
      79           63 :   return result;
      80              : }
      81              : 
      82            0 : float __ggml_vec_dot_q6_K_f32(const unsigned int K, const void *v_q6_K,
      83              :                               const float *f) {
      84              :   // Quantization of activations
      85            0 :   int blocks_per_row = (K + QK_K - 1) / QK_K;
      86            0 :   int q8_K_activation_size = sizeof(block_q8_K) * blocks_per_row;
      87            0 :   std::vector<char> v_q8_activation = std::vector<char>(q8_K_activation_size);
      88            0 :   __ggml_quantize_row_q8_K(f, v_q8_activation.data(), K);
      89              : 
      90            0 :   return __ggml_vec_dot_q6_K_q8_K(K, v_q6_K, v_q8_activation.data());
      91            0 : }
      92              : 
      93            0 : float __ggml_vec_dot_q6_K(const unsigned int K, const void *__restrict v_q6_K,
      94              :                           const float *__restrict activation) {
      95              :   float result;
      96              :   int bs = 1, bx = 1, by = 1,
      97              :       nrc = 1; // unused variables in ggml_vec_dot_q6_K_q8_K
      98              : 
      99            0 :   int blocks_per_row = (K + QK_K - 1) / QK_K;
     100            0 :   int q8_K_activation_size = sizeof(block_q8_K) * blocks_per_row;
     101            0 :   std::vector<char> v_q8_activation = std::vector<char>(q8_K_activation_size);
     102            0 :   __ggml_quantize_row_q8_K(activation, v_q8_activation.data(), K);
     103              : 
     104            0 :   nntr_vec_dot_q6_K_q8_K(K, &result, bs, v_q6_K, bx, v_q8_activation.data(), by,
     105              :                          nrc);
     106            0 :   return result;
     107            0 : }
     108              : 
     109            0 : void __ggml_repack_q4_0_to_q4_0_4(void *W, void *repacked_W, size_t data_size,
     110              :                                   const unsigned int M, const unsigned int N) {
     111            0 :   nntr_repack_q4_0_to_q4_0_4_bl(W, 8, repacked_W, data_size, M, N);
     112            0 : }
     113              : 
     114           30 : void __ggml_repack_q4_0_to_q4_0_8(void *W, void *repacked_W, size_t data_size,
     115              :                                   const unsigned int M, const unsigned int N) {
     116           30 :   nntr_repack_q4_0_to_q4_0_8_bl(W, 8, repacked_W, data_size, M, N);
     117           30 : }
     118              : 
     119            7 : void __ggml_repack_q4_K_to_q4_K_8(void *W, void *repacked_W, size_t data_size,
     120              :                                   const unsigned int M, const unsigned int N) {
     121            7 :   nntr_repack_q4_K_to_q4_K_8_bl(W, 8, repacked_W, data_size, M, N);
     122            7 : }
     123              : 
     124              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1