Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2025 Sungsik Kong <ss.kong@samsung.com>
4 : *
5 : * @file ggml_interface.cpp
6 : * @date 13 August 2025
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Sungsik Kong <ss.kong@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief Function interface to use ggml lib from cpu_backend
11 : */
12 :
13 : #include <algorithm>
14 : #include <cmath>
15 : #include <ggml_interface.h>
16 : #include <nntr_ggml_impl.h>
17 : #include <nntr_ggml_impl_utils.h>
18 : #include <string>
19 : #include <thread>
20 : #include <vector>
21 :
22 : namespace nntrainer {
23 :
24 37 : void __ggml_init() { nntr_ggml_init(); }
25 :
26 31 : size_t __ggml_quantize_q4_0(const float *src, void *dst, int64_t nrow,
27 : int64_t n_per_row, const float *quant_weights) {
28 31 : return nntr_quantize_q4_0(src, dst, nrow, n_per_row, quant_weights);
29 : }
30 :
31 8 : size_t __ggml_quantize_q4_K(const float *src, void *dst, int64_t nrow,
32 : int64_t n_per_row, const float *quant_weights) {
33 8 : return nntr_quantize_q4_K(src, dst, nrow, n_per_row, quant_weights);
34 : }
35 :
36 72 : size_t __ggml_quantize_q6_K(const float *src, void *dst, int64_t nrow,
37 : int64_t n_per_row, const float *quant_weights) {
38 72 : return nntr_quantize_q6_K(src, dst, nrow, n_per_row, quant_weights);
39 : }
40 :
41 0 : size_t __ggml_quantize_q8_0(const float *src, void *dst, int64_t nrow,
42 : int64_t n_per_row, const float *quant_weights) {
43 0 : return nntr_quantize_q8_0(src, dst, nrow, n_per_row, quant_weights);
44 : }
45 :
46 63 : void __ggml_quantize_row_q6_K(const float *src, void *dst, int64_t k) {
47 63 : __ggml_quantize_q6_K(src, dst, 1, k, nullptr);
48 63 : }
49 :
50 : template <>
51 63 : void __ggml_quantize_row_q8_K(const float *src, void *dst, int64_t k) {
52 63 : nntr_quantize_row_q8_K(src, dst, k);
53 63 : }
54 :
55 49 : void __ggml_dequantize_row_q4_0(const void *x_raw, float *y, int64_t k) {
56 49 : nntr_dequantize_row_q4_0(x_raw, y, k);
57 49 : }
58 :
59 1 : void __ggml_dequantize_row_q4_K(const void *x_raw, float *y, int64_t k) {
60 1 : nntr_dequantize_row_q4_K(x_raw, y, k);
61 1 : }
62 :
63 2 : void __ggml_dequantize_row_q6_K(const void *x, float *y, int64_t k) {
64 2 : nntr_dequantize_row_q6_K(x, y, k);
65 2 : }
66 :
67 : template <>
68 0 : void __ggml_dequantize_row_q8_K(const void *x, float *y, int64_t k) {
69 0 : nntr_dequantize_row_q8_K(x, y, k);
70 0 : }
71 :
72 63 : float __ggml_vec_dot_q6_K_q8_K(const unsigned int K,
73 : const void *__restrict v_q6_K,
74 : const void *__restrict v_q8_K) {
75 : float result;
76 : int bs = 1, bx = 1, by = 1,
77 : nrc = 1; // unused variables in ggml_vec_dot_q6_K_q8_K
78 63 : nntr_vec_dot_q6_K_q8_K(K, &result, bs, v_q6_K, bx, v_q8_K, by, nrc);
79 63 : return result;
80 : }
81 :
82 0 : float __ggml_vec_dot_q6_K_f32(const unsigned int K, const void *v_q6_K,
83 : const float *f) {
84 : // Quantization of activations
85 0 : int blocks_per_row = (K + QK_K - 1) / QK_K;
86 0 : int q8_K_activation_size = sizeof(block_q8_K) * blocks_per_row;
87 0 : std::vector<char> v_q8_activation = std::vector<char>(q8_K_activation_size);
88 0 : __ggml_quantize_row_q8_K(f, v_q8_activation.data(), K);
89 :
90 0 : return __ggml_vec_dot_q6_K_q8_K(K, v_q6_K, v_q8_activation.data());
91 0 : }
92 :
93 0 : float __ggml_vec_dot_q6_K(const unsigned int K, const void *__restrict v_q6_K,
94 : const float *__restrict activation) {
95 : float result;
96 : int bs = 1, bx = 1, by = 1,
97 : nrc = 1; // unused variables in ggml_vec_dot_q6_K_q8_K
98 :
99 0 : int blocks_per_row = (K + QK_K - 1) / QK_K;
100 0 : int q8_K_activation_size = sizeof(block_q8_K) * blocks_per_row;
101 0 : std::vector<char> v_q8_activation = std::vector<char>(q8_K_activation_size);
102 0 : __ggml_quantize_row_q8_K(activation, v_q8_activation.data(), K);
103 :
104 0 : nntr_vec_dot_q6_K_q8_K(K, &result, bs, v_q6_K, bx, v_q8_activation.data(), by,
105 : nrc);
106 0 : return result;
107 0 : }
108 :
109 0 : void __ggml_repack_q4_0_to_q4_0_4(void *W, void *repacked_W, size_t data_size,
110 : const unsigned int M, const unsigned int N) {
111 0 : nntr_repack_q4_0_to_q4_0_4_bl(W, 8, repacked_W, data_size, M, N);
112 0 : }
113 :
114 30 : void __ggml_repack_q4_0_to_q4_0_8(void *W, void *repacked_W, size_t data_size,
115 : const unsigned int M, const unsigned int N) {
116 30 : nntr_repack_q4_0_to_q4_0_8_bl(W, 8, repacked_W, data_size, M, N);
117 30 : }
118 :
119 7 : void __ggml_repack_q4_K_to_q4_K_8(void *W, void *repacked_W, size_t data_size,
120 : const unsigned int M, const unsigned int N) {
121 7 : nntr_repack_q4_K_to_q4_K_8_bl(W, 8, repacked_W, data_size, M, N);
122 7 : }
123 :
124 : } // namespace nntrainer
|