Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file weight.cpp
6 : * @date 22 September 2020
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief This is Weight Class for Neural Network
11 : *
12 : */
13 :
14 : #include <util_func.h>
15 : #include <weight.h>
16 :
17 : #include <nntrainer_error.h>
18 :
19 : namespace nntrainer {
20 :
21 2 : Weight::Weight(const TensorDim &dim, const Initializer init,
22 : const WeightRegularizer reg, const float reg_const,
23 : const float decay_const, const float max_norm, bool train,
24 : bool alloc_now_, std::string name, unsigned int axis,
25 2 : float loss_scale_, bool is_mixed_) :
26 : Var_Grad(dim, init, train, alloc_now_, name),
27 2 : regularizer(reg),
28 2 : regularizer_constant(reg_const),
29 2 : decay(decay_const),
30 2 : clip_by_global_norm(max_norm),
31 2 : output_axis(axis),
32 2 : loss_scale(loss_scale_),
33 2 : is_mixed(is_mixed_) {
34 2 : if (init == Initializer::NONE)
35 0 : throw std::invalid_argument("Weight initializer cannot be none");
36 2 : if (regularizer == WeightRegularizer::UNKNOWN)
37 0 : throw std::invalid_argument("Weight regularizer unknown");
38 :
39 2 : std::string var32_suffix = ":fp32";
40 : std::string var32_name = name + var32_suffix;
41 :
42 : /**
43 : * @note We assume if the Weight Data Type is not FP32, then FP32 Weight is
44 : * necessary to maintain the accuracy.
45 : * We could think it can be other data type and if there is the case to
46 : * support other data type, then the code below needs to be udpated.
47 : *
48 : * Also, the loss_scale is not used in Weight but leave as it is for later
49 : * usage.
50 : */
51 :
52 2 : if (train && dim.getDataType() != ml::train::TensorDim::DataType::FP32) {
53 0 : TensorDim var32_dim(dim);
54 : var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
55 :
56 0 : var32 = std::make_shared<Tensor>(var32_dim, alloc_now_, init, var32_name);
57 : } else {
58 2 : var32 = std::make_shared<Tensor>(var32_name);
59 : }
60 2 : }
61 :
62 258 : Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g,
63 : const Initializer init, const WeightRegularizer reg,
64 : const float reg_const, const float decay_const,
65 : const float max_norm, bool train, bool alloc_now_,
66 : std::string name, unsigned int axis, float loss_scale_,
67 258 : bool is_mixed_) :
68 : Var_Grad(dim_v, dim_g, init, train, alloc_now_, name),
69 258 : regularizer(reg),
70 258 : regularizer_constant(reg_const),
71 258 : decay(decay_const),
72 258 : clip_by_global_norm(max_norm),
73 258 : output_axis(axis),
74 258 : loss_scale(loss_scale_),
75 258 : is_mixed(is_mixed_) {
76 258 : if (init == Initializer::NONE)
77 0 : throw std::invalid_argument("Weight initializer cannot be none");
78 258 : if (regularizer == WeightRegularizer::UNKNOWN)
79 0 : throw std::invalid_argument("Weight regularizer unknown");
80 :
81 258 : std::string var32_suffix = ":fp32";
82 : std::string var32_name = name + var32_suffix;
83 :
84 258 : if (train && dim_v.getDataType() != ml::train::TensorDim::DataType::FP32) {
85 0 : TensorDim var32_dim(dim_v);
86 : var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
87 0 : var32 = std::make_shared<Tensor>(var32_dim, alloc_now_, init, var32_name);
88 : } else {
89 258 : var32 = std::make_shared<Tensor>(var32_name);
90 : }
91 258 : }
92 :
93 4184 : Weight::Weight(const Tensor &v, const Tensor &g, const Tensor &v32,
94 : const std::string &n, bool is_dependent,
95 4184 : unsigned int output_axis_) :
96 : Var_Grad(v, g, n, is_dependent),
97 4184 : regularizer(WeightRegularizer::NONE),
98 4184 : regularizer_constant(1.0f),
99 4184 : decay(0.0f),
100 4184 : clip_by_global_norm(0.0f),
101 4184 : output_axis(output_axis_),
102 4184 : loss_scale(1.0),
103 4184 : is_mixed(false),
104 4184 : var32(std::make_shared<Tensor>(n + ":fp32")) {
105 :
106 4184 : if (!g.empty() && isMixedPrecision()) {
107 0 : TensorDim var32_dim(v.getDim());
108 : var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
109 0 : if (!v32.empty())
110 0 : var32 = std::make_shared<Tensor>(
111 0 : v32.getSharedDataTensor(var32_dim, 0, false, n + ":fp32"));
112 : }
113 4184 : }
114 :
115 4842 : Weight::Weight(Tensor *v, Tensor *g, Tensor *v32, const WeightRegularizer reg,
116 : const float reg_const, const float decay, bool is_dependent,
117 : const float max_norm, unsigned int output_axis_,
118 4842 : float loss_scale_, bool is_mixed_) :
119 : Var_Grad(v, g, is_dependent),
120 4842 : regularizer(reg),
121 4842 : regularizer_constant(reg_const),
122 4842 : decay(decay),
123 4842 : clip_by_global_norm(max_norm),
124 4842 : output_axis(output_axis_),
125 4842 : loss_scale(loss_scale_),
126 4842 : is_mixed(is_mixed_),
127 4842 : var32(std::shared_ptr<Tensor>(v32, [](void *) {})) {
128 4842 : if (!v32)
129 4842 : var32 = std::make_shared<Tensor>();
130 4842 : }
131 :
132 15627 : void Weight::applyGradient(double lr, Tensor &updated_grad) {
133 0 : if (isMixedPrecision() &&
134 15627 : updated_grad.getDataType() == ml::train::TensorDim::DataType::FP32 &&
135 0 : var->getDataType() != ml::train::TensorDim::DataType::FP32) {
136 0 : var32->add_i(updated_grad, -lr);
137 0 : quantizeWeight();
138 0 : return;
139 : } else {
140 15627 : return applyGradient(lr);
141 : }
142 : }
143 :
144 0 : void Weight::quantizeWeight() {
145 0 : if (!isMixedPrecision())
146 : return;
147 :
148 : Tensor &var = getVariableRef();
149 0 : ml::train::TensorDim::DataType type = var.getDataType();
150 0 : switch (type) {
151 : case ml::train::TensorDim::DataType::QINT4:
152 : // NYI
153 : break;
154 : case ml::train::TensorDim::DataType::QINT8:
155 : // NYI
156 : break;
157 0 : case ml::train::TensorDim::DataType::FP16:
158 0 : getVariableRef().copyData(getVariableFP32Ref());
159 0 : break;
160 : case ml::train::TensorDim::DataType::FP32:
161 : break;
162 : default:
163 : break;
164 : }
165 :
166 : return;
167 : }
168 :
169 : } // namespace nntrainer
|