Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 hyeonseok lee <hs89.lee@samsung.com>
4 : *
5 : * @file rnncell.cpp
6 : * @date 29 Oct 2021
7 : * @brief This is Recurrent Cell Layer Class of Neural Network
8 : * @see https://github.com/nnstreamer/nntrainer
9 : * @author hyeonseok lee <hs89.lee@samsung.com>
10 : * @bug No known bugs except for NYI items
11 : *
12 : */
13 :
14 : #include <cmath>
15 : #include <common_properties.h>
16 :
17 : #include <layer_context.h>
18 : #include <nntrainer_error.h>
19 : #include <nntrainer_log.h>
20 : #include <node_exporter.h>
21 : #include <rnncell.h>
22 : #include <util_func.h>
23 :
24 : namespace nntrainer {
25 :
26 : static constexpr size_t SINGLE_INOUT_IDX = 0;
27 :
28 : // - weight_ih ( weights of input to hidden )
29 : // - weight_hh ( weights of hidden to hidden )
30 : // - bias_h ( input bias, hidden_bias )
31 : // - bias_ih ( input bias )
32 : // - bias_hh ( hidden bias )
33 : enum RNNCellParams {
34 : weight_ih,
35 : weight_hh,
36 : bias_h,
37 : bias_ih,
38 : bias_hh,
39 : dropout_mask
40 : };
41 :
42 49 : RNNCellLayer::RNNCellLayer() :
43 : LayerImpl(),
44 98 : rnncell_props(props::Unit(), props::IntegrateBias(),
45 98 : props::HiddenStateActivation() = ActivationType::ACT_TANH,
46 49 : props::DropOutRate()),
47 49 : acti_func(ActivationType::ACT_NONE, true),
48 98 : epsilon(1e-3f) {
49 : wt_idx.fill(std::numeric_limits<unsigned>::max());
50 49 : }
51 :
52 29 : void RNNCellLayer::finalize(InitLayerContext &context) {
53 : const nntrainer::WeightRegularizer weight_regularizer =
54 29 : std::get<props::WeightRegularizer>(*layer_impl_props);
55 : const float weight_regularizer_constant =
56 29 : std::get<props::WeightRegularizerConstant>(*layer_impl_props);
57 : const Initializer weight_initializer =
58 29 : std::get<props::WeightInitializer>(*layer_impl_props);
59 : const Initializer bias_initializer =
60 29 : std::get<props::BiasInitializer>(*layer_impl_props);
61 : auto &weight_decay = std::get<props::WeightDecay>(*layer_impl_props);
62 : auto &bias_decay = std::get<props::BiasDecay>(*layer_impl_props);
63 : const bool disable_bias =
64 29 : std::get<props::DisableBias>(*layer_impl_props).get();
65 :
66 29 : NNTR_THROW_IF(std::get<props::Unit>(rnncell_props).empty(),
67 : std::invalid_argument)
68 : << "unit property missing for rnncell layer";
69 29 : const unsigned int unit = std::get<props::Unit>(rnncell_props).get();
70 : const bool integrate_bias =
71 29 : std::get<props::IntegrateBias>(rnncell_props).get();
72 : const nntrainer::ActivationType hidden_state_activation_type =
73 29 : std::get<props::HiddenStateActivation>(rnncell_props).get();
74 29 : const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
75 :
76 29 : NNTR_THROW_IF(context.getNumInputs() != 2, std::invalid_argument)
77 : << "RNNCell layer expects 2 inputs(one for the input and hidden state for "
78 0 : "the other) but got " +
79 0 : std::to_string(context.getNumInputs()) + " input(s)";
80 :
81 : // input_dim = [ batch, 1, 1, feature_size ]
82 : const TensorDim &input_dim = context.getInputDimensions()[INOUT_INDEX::INPUT];
83 29 : NNTR_THROW_IF(input_dim.channel() != 1 || input_dim.height() != 1,
84 : std::invalid_argument)
85 : << "Input must be single time dimension for RNNCell (shape should be "
86 : "[batch_size, 1, 1, feature_size])";
87 : // input_hidden_state_dim = [ batch, 1, 1, unit ]
88 : const TensorDim &input_hidden_state_dim =
89 : context.getInputDimensions()[INOUT_INDEX::INPUT_HIDDEN_STATE];
90 29 : NNTR_THROW_IF(input_hidden_state_dim.channel() != 1 ||
91 : input_hidden_state_dim.height() != 1,
92 : std::invalid_argument)
93 : << "Input hidden state's dimension should be [batch, 1, 1, unit] for "
94 : "RNNCell";
95 :
96 29 : const unsigned int batch_size = input_dim.batch();
97 29 : const unsigned int feature_size = input_dim.width();
98 :
99 : // output_hidden_state_dim = [ batch, 1, 1, unit ]
100 29 : TensorDim output_hidden_state_dim(batch_size, 1, 1, unit);
101 29 : context.setOutputDimensions({output_hidden_state_dim});
102 :
103 : // weight_initializer can be set seperately. weight_ih initializer,
104 : // weight_hh initializer kernel initializer & recurrent_initializer in keras
105 : // for now, it is set same way.
106 :
107 : // weight_ih_dim : [ 1, 1, feature_size, unit ]
108 29 : const TensorDim weight_ih_dim({feature_size, unit});
109 29 : wt_idx[RNNCellParams::weight_ih] = context.requestWeight(
110 : weight_ih_dim, weight_initializer, weight_regularizer,
111 : weight_regularizer_constant, weight_decay, "weight_ih", true);
112 : // weight_hh_dim : [ 1, 1, unit, unit ]
113 29 : const TensorDim weight_hh_dim({unit, unit});
114 58 : wt_idx[RNNCellParams::weight_hh] = context.requestWeight(
115 : weight_hh_dim, weight_initializer, weight_regularizer,
116 : weight_regularizer_constant, weight_decay, "weight_hh", true);
117 29 : if (!disable_bias) {
118 29 : if (integrate_bias) {
119 : // bias_h_dim : [ 1, 1, 1, unit ]
120 1 : const TensorDim bias_h_dim({unit});
121 1 : wt_idx[RNNCellParams::bias_h] = context.requestWeight(
122 : bias_h_dim, bias_initializer, WeightRegularizer::NONE, 1.0f, bias_decay,
123 : "bias_h", true);
124 : } else {
125 : // bias_ih_dim : [ 1, 1, 1, unit ]
126 28 : const TensorDim bias_ih_dim({unit});
127 28 : wt_idx[RNNCellParams::bias_ih] = context.requestWeight(
128 : bias_ih_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
129 : bias_decay, "bias_ih", true);
130 : // bias_hh_dim : [ 1, 1, 1, unit ]
131 28 : const TensorDim bias_hh_dim({unit});
132 56 : wt_idx[RNNCellParams::bias_hh] = context.requestWeight(
133 : bias_hh_dim, bias_initializer, WeightRegularizer::NONE, 1.0f,
134 : bias_decay, "bias_hh", true);
135 : }
136 : }
137 :
138 29 : if (dropout_rate > epsilon) {
139 : // dropout_mask_dim = [ batch, 1, 1, unit ]
140 0 : const TensorDim dropout_mask_dim(batch_size, 1, 1, unit);
141 0 : wt_idx[RNNCellParams::dropout_mask] =
142 0 : context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE,
143 : false, TensorLifespan::ITERATION_LIFESPAN);
144 : }
145 :
146 29 : acti_func.setActiFunc(hidden_state_activation_type);
147 :
148 29 : if (!acti_func.supportInPlace()) {
149 : throw exception::not_supported(
150 0 : "Out of place activation functions not supported");
151 : }
152 29 : }
153 :
154 190 : void RNNCellLayer::setProperty(const std::vector<std::string> &values) {
155 : const std::vector<std::string> &remain_props =
156 190 : loadProperties(values, rnncell_props);
157 189 : LayerImpl::setProperty(remain_props);
158 189 : }
159 :
160 24 : void RNNCellLayer::exportTo(Exporter &exporter,
161 : const ml::train::ExportMethods &method) const {
162 24 : LayerImpl::exportTo(exporter, method);
163 24 : exporter.saveResult(rnncell_props, method, this);
164 24 : }
165 :
166 41 : void RNNCellLayer::forwarding(RunLayerContext &context, bool training) {
167 : const bool disable_bias =
168 41 : std::get<props::DisableBias>(*layer_impl_props).get();
169 :
170 : const bool integrate_bias =
171 41 : std::get<props::IntegrateBias>(rnncell_props).get();
172 41 : const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
173 :
174 41 : const Tensor &input = context.getInput(INOUT_INDEX::INPUT);
175 : const Tensor &prev_hidden_state =
176 41 : context.getInput(INOUT_INDEX::INPUT_HIDDEN_STATE);
177 41 : Tensor &hidden_state = context.getOutput(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
178 :
179 41 : const Tensor &weight_ih = context.getWeight(wt_idx[RNNCellParams::weight_ih]);
180 41 : const Tensor &weight_hh = context.getWeight(wt_idx[RNNCellParams::weight_hh]);
181 41 : Tensor empty;
182 41 : const Tensor &bias_h = !disable_bias && integrate_bias
183 41 : ? context.getWeight(wt_idx[RNNCellParams::bias_h])
184 : : empty;
185 : const Tensor &bias_ih = !disable_bias && !integrate_bias
186 41 : ? context.getWeight(wt_idx[RNNCellParams::bias_ih])
187 : : empty;
188 : const Tensor &bias_hh = !disable_bias && !integrate_bias
189 41 : ? context.getWeight(wt_idx[RNNCellParams::bias_hh])
190 : : empty;
191 :
192 41 : input.dot(weight_ih, hidden_state);
193 41 : prev_hidden_state.dot(weight_hh, hidden_state, false, false, 1.0f);
194 41 : if (!disable_bias) {
195 41 : if (integrate_bias) {
196 5 : hidden_state.add_i(bias_h);
197 : } else {
198 36 : hidden_state.add_i(bias_ih);
199 36 : hidden_state.add_i(bias_hh);
200 : }
201 : }
202 :
203 : acti_func.run_fn(hidden_state, hidden_state);
204 :
205 41 : if (dropout_rate > epsilon && training) {
206 : Tensor &dropout_mask =
207 0 : context.getTensor(wt_idx[RNNCellParams::dropout_mask]);
208 0 : dropout_mask.dropout_mask(dropout_rate);
209 0 : hidden_state.multiply_i(dropout_mask);
210 : }
211 41 : }
212 :
213 19 : void RNNCellLayer::calcDerivative(RunLayerContext &context) {
214 19 : const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
215 :
216 : Tensor &outgoing_derivative =
217 19 : context.getOutgoingDerivative(INOUT_INDEX::INPUT);
218 : Tensor &d_prev_hidden_state =
219 19 : context.getOutgoingDerivative(INOUT_INDEX::INPUT_HIDDEN_STATE);
220 : const Tensor &hidden_state =
221 19 : context.getOutput(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
222 : const Tensor &d_hidden_state =
223 19 : context.getIncomingDerivative(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
224 19 : const Tensor &weight_ih = context.getWeight(wt_idx[RNNCellParams::weight_ih]);
225 19 : const Tensor &weight_hh = context.getWeight(wt_idx[RNNCellParams::weight_hh]);
226 :
227 : /// @note calculate d_hidden_state is duplicated with calcGradient. Needs
228 : /// optimization
229 19 : Tensor d_hidden_state_;
230 19 : if (dropout_rate > epsilon) {
231 : const Tensor &dropout_mask =
232 0 : context.getTensor(wt_idx[RNNCellParams::dropout_mask]);
233 0 : d_hidden_state.multiply(dropout_mask, d_hidden_state_);
234 : } else {
235 19 : d_hidden_state_.copy(d_hidden_state);
236 : }
237 :
238 19 : Tensor hidden_state_;
239 19 : hidden_state_.copy(hidden_state);
240 19 : acti_func.run_prime_fn(hidden_state_, d_hidden_state_, d_hidden_state_);
241 :
242 19 : d_hidden_state_.dot(weight_ih, outgoing_derivative, false, true);
243 19 : d_hidden_state_.dot(weight_hh, d_prev_hidden_state, false, true);
244 19 : }
245 :
246 19 : void RNNCellLayer::calcGradient(RunLayerContext &context) {
247 : const bool disable_bias =
248 19 : std::get<props::DisableBias>(*layer_impl_props).get();
249 :
250 : const bool integrate_bias =
251 19 : std::get<props::IntegrateBias>(rnncell_props).get();
252 19 : const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
253 :
254 19 : const Tensor &input = context.getInput(INOUT_INDEX::INPUT);
255 : const Tensor &prev_hidden_state =
256 19 : context.getInput(INOUT_INDEX::INPUT_HIDDEN_STATE);
257 : const Tensor &hidden_state =
258 19 : context.getOutput(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
259 : const Tensor &d_hidden_state =
260 19 : context.getIncomingDerivative(INOUT_INDEX::OUTPUT_HIDDEN_STATE);
261 :
262 19 : Tensor &d_weight_ih = context.getWeightGrad(wt_idx[RNNCellParams::weight_ih]);
263 19 : Tensor &d_weight_hh = context.getWeightGrad(wt_idx[RNNCellParams::weight_hh]);
264 19 : Tensor empty;
265 19 : Tensor &d_bias_h = !disable_bias && integrate_bias
266 19 : ? context.getWeightGrad(wt_idx[RNNCellParams::bias_h])
267 : : empty;
268 : Tensor &d_bias_ih = !disable_bias && !integrate_bias
269 19 : ? context.getWeightGrad(wt_idx[RNNCellParams::bias_ih])
270 : : empty;
271 : Tensor &d_bias_hh = !disable_bias && !integrate_bias
272 19 : ? context.getWeightGrad(wt_idx[RNNCellParams::bias_hh])
273 : : empty;
274 :
275 19 : if (context.isGradientFirstAccess(wt_idx[RNNCellParams::weight_ih])) {
276 9 : d_weight_ih.setZero();
277 : }
278 19 : if (context.isGradientFirstAccess(wt_idx[RNNCellParams::weight_hh])) {
279 9 : d_weight_hh.setZero();
280 : }
281 19 : if (!disable_bias) {
282 19 : if (integrate_bias) {
283 1 : if (context.isGradientFirstAccess(wt_idx[RNNCellParams::bias_h])) {
284 0 : d_bias_h.setZero();
285 : }
286 : } else {
287 18 : if (context.isGradientFirstAccess(wt_idx[RNNCellParams::bias_ih])) {
288 9 : d_bias_ih.setZero();
289 : }
290 18 : if (context.isGradientFirstAccess(wt_idx[RNNCellParams::bias_hh])) {
291 9 : d_bias_hh.setZero();
292 : }
293 : }
294 : }
295 :
296 19 : Tensor d_hidden_state_;
297 19 : if (dropout_rate > epsilon) {
298 : const Tensor &dropout_mask =
299 0 : context.getTensor(wt_idx[RNNCellParams::dropout_mask]);
300 0 : d_hidden_state.multiply(dropout_mask, d_hidden_state_);
301 : } else {
302 19 : d_hidden_state_.copy(d_hidden_state);
303 : }
304 :
305 19 : Tensor hidden_state_;
306 19 : hidden_state_.copy(hidden_state);
307 19 : acti_func.run_prime_fn(hidden_state_, d_hidden_state_, d_hidden_state_);
308 :
309 19 : input.dot(d_hidden_state_, d_weight_ih, true, false, 1.0);
310 19 : prev_hidden_state.dot(d_hidden_state_, d_weight_hh, true, false, 1.0);
311 19 : if (!disable_bias) {
312 19 : if (integrate_bias) {
313 1 : d_hidden_state_.sum(0, d_bias_h, 1.0, 1.0);
314 : } else {
315 18 : d_hidden_state_.sum(0, d_bias_ih, 1.0, 1.0);
316 18 : d_hidden_state_.sum(0, d_bias_hh, 1.0, 1.0);
317 : }
318 : }
319 19 : }
320 :
321 24 : void RNNCellLayer::setBatch(RunLayerContext &context, unsigned int batch) {
322 24 : const float dropout_rate = std::get<props::DropOutRate>(rnncell_props).get();
323 24 : if (dropout_rate > epsilon) {
324 0 : context.updateTensor(wt_idx[RNNCellParams::dropout_mask], batch);
325 : }
326 24 : }
327 :
328 : } // namespace nntrainer
|