Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 Jijoong Moon <jijoong.moon@samsung.com>
4 : *
5 : * @file lstm.h
6 : * @date 31 March 2021
7 : * @brief This is Long Short-Term Memory Layer Class of Neural Network
8 : * @see https://github.com/nnstreamer/nntrainer
9 : * @author Jijoong Moon <jijoong.moon@samsung.com>
10 : * @bug No known bugs except for NYI items
11 : *
12 : */
13 :
14 : #ifndef __LSTM_H__
15 : #define __LSTM_H__
16 : #ifdef __cplusplus
17 :
18 : #include <acti_func.h>
19 : #include <common_properties.h>
20 : #include <lstmcell_core.h>
21 :
22 : namespace nntrainer {
23 :
24 : /**
25 : * @class LSTMLayer
26 : * @brief LSTMLayer
27 : */
28 : class LSTMLayer : public LSTMCore {
29 : public:
30 : /**
31 : * @brief Constructor of LSTMLayer
32 : */
33 : LSTMLayer();
34 :
35 : /**
36 : * @brief Destructor of LSTMLayer
37 : */
38 76 : ~LSTMLayer() = default;
39 :
40 : /**
41 : * @brief Move constructor.
42 : * @param[in] LSTMLayer &&
43 : */
44 : LSTMLayer(LSTMLayer &&rhs) noexcept;
45 :
46 : /**
47 : * @brief Move assignment operator.
48 : * @parma[in] rhs LSTMLayer to be moved.
49 : */
50 : LSTMLayer &operator=(LSTMLayer &&rhs);
51 :
52 : /**
53 : * @copydoc Layer::finalize(InitLayerContext &context)
54 : */
55 : void finalize(InitLayerContext &context) override;
56 :
57 : /**
58 : * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
59 : */
60 : void forwarding(RunLayerContext &context, bool training) override;
61 :
62 : /**
63 : * @copydoc Layer::calcDerivative(RunLayerContext &context)
64 : */
65 : void calcDerivative(RunLayerContext &context) override;
66 :
67 : /**
68 : * @copydoc Layer::calcGradient(RunLayerContext &context)
69 : */
70 : void calcGradient(RunLayerContext &context) override;
71 :
72 : /**
73 : * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
74 : * method)
75 : */
76 : void exportTo(Exporter &exporter,
77 : const ml::train::ExportMethods &method) const override;
78 :
79 : /**
80 : * @copydoc Layer::getType()
81 : */
82 1745 : const std::string getType() const override { return LSTMLayer::type; };
83 :
84 : /**
85 : * @copydoc Layer::supportBackwarding()
86 : */
87 96 : bool supportBackwarding() const override { return true; }
88 :
89 : /**
90 : * @copydoc Layer::setProperty(const PropertyType type, const std::string
91 : * &value)
92 : */
93 : void setProperty(const std::vector<std::string> &values) override;
94 :
95 : /**
96 : * @copydoc Layer::setBatch(RunLayerContext &context, unsigned int batch)
97 : */
98 : void setBatch(RunLayerContext &context, unsigned int batch) override;
99 :
100 : static constexpr const char *type = "lstm";
101 :
102 : private:
103 : static constexpr unsigned int NUM_GATE = 4;
104 :
105 : /** common properties like Unit, IntegrateBias, HiddenStateActivation and
106 : * RecurrentActivation are in lstmcore_props */
107 :
108 : /**
109 : * ReturnSequence: option for return sequence
110 : * Bidirectional: option for bidirectional
111 : * DropOutRate: dropout rate
112 : * MaxTimestep: maximum timestep for lstm
113 : *
114 : * */
115 : std::tuple<props::ReturnSequences, props::Bidirectional, props::DropOutRate,
116 : props::MaxTimestep>
117 : lstm_props;
118 : std::array<unsigned int, 17> wt_idx; /**< indices of the weights */
119 :
120 : /**
121 : * @brief run lstm fowarding for batch_first input
122 : *
123 : * @param NUM_GATE Number of gate which is 4 for lstm
124 : * @param batch_size batch size
125 : * @param feature_size feature size
126 : * @param disable_bias whether to disable bias or not
127 : * @param unit number of output neurons
128 : * @param integrate_bias integrate bias_ih, bias_hh to bias_h
129 : * @param acti_func activation function for memory cell, cell state
130 : * @param recurrent_acti_func activation function for input/output/forget
131 : * gate
132 : * @param enable_dropout whether to apply dropout
133 : * @param dropout_rate dropout rate
134 : * @param max_timestep maximum timestep for lstm
135 : * @param reverse indicate forward/backward direction for input in
136 : * bidirectional lstm
137 : * @param input_ input
138 : * @param weight_ih weight for input to hidden
139 : * @param weight_hh weight for hidden to hidden
140 : * @param bias_h bias for input and hidden.
141 : * @param bias_ih bias for input
142 : * @param bias_hh bias for hidden
143 : * @param hidden_state_ hidden state
144 : * @param cell_state_ cell state
145 : * @param ifgo_ input gate, forget gate, memory cell, output gate
146 : * @param mask_ dropout mask
147 : */
148 : void forwardingBatchFirstLSTM(
149 : unsigned int NUM_GATE, const unsigned int batch_size,
150 : const unsigned int feature_size, const bool disable_bias,
151 : const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
152 : ActiFunc &recurrent_acti_func, const bool enable_dropout,
153 : const float dropout_rate, const unsigned int max_timestep,
154 : const bool reverse, const Tensor &input_, const Tensor &weight_ih,
155 : const Tensor &weight_hh, const Tensor &bias_h, const Tensor &bias_ih,
156 : const Tensor &bias_hh, Tensor &hidden_state_, Tensor &cell_state_,
157 : Tensor &ifgo_, const Tensor &mask_);
158 :
159 : /**
160 : * @brief calculate lstm gradient for batch_first input
161 : *
162 : * @param NUM_GATE Number of gate which is 4 for lstm
163 : * @param batch_size batch size
164 : * @param feature_size feature size
165 : * @param disable_bias whether to disable bias or not
166 : * @param unit number of output neurons
167 : * @param integrate_bias integrate bias_ih, bias_hh to bias_h
168 : * @param acti_func activation function for memory cell, cell state
169 : * @param recurrent_acti_func activation function for input/output/forget
170 : * gate
171 : * @param return_sequences return sequeces
172 : * @param bidirectional bidirectional lstm
173 : * @param enable_dropout whether to apply dropout
174 : * @param dropout_rate dropout rate
175 : * @param max_timestep maximum timestep for lstm
176 : * @param reverse indicate forward/backward direction for input in
177 : * bidirectional lstm
178 : * @param input_ input
179 : * @param incoming_derivative derivative for output which is incoming
180 : * derivative
181 : * @param d_weight_ih weight_ih(weight for input to hidden) gradient
182 : * @param weight_hh weight for hidden to hidden
183 : * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
184 : * @param d_bias_h bias_h(bias for input and hidden) gradient
185 : * @param d_bias_ih bias_ih(bias for input) gradient
186 : * @param d_bias_hh bias_hh(bias for hidden) gradient
187 : * @param hidden_state_ hidden state
188 : * @param d_hidden_state_ hidden state gradient
189 : * @param cell_state_ cell state
190 : * @param d_cell_state_ cell state gradient
191 : * @param ifgo_ input gate, forget gate, memory cell, output gate
192 : * @param d_ifgo_ gradient for input gate, forget gate, memory cell, output
193 : * gate
194 : * @param mask_ dropout mask
195 : */
196 : void calcGradientBatchFirstLSTM(
197 : unsigned int NUM_GATE, const unsigned int batch_size,
198 : const unsigned int feature_size, const bool disable_bias,
199 : const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
200 : ActiFunc &recurrent_acti_func, const bool return_sequences,
201 : const bool bidirectional, const bool enable_dropout,
202 : const float dropout_rate, const unsigned int max_timestep,
203 : const bool reverse, const Tensor &input_, const Tensor &incoming_derivative,
204 : Tensor &d_weight_ih, const Tensor &weight_hh, Tensor &d_weight_hh,
205 : Tensor &d_bias_h, Tensor &d_bias_ih, Tensor &d_bias_hh,
206 : const Tensor &hidden_state_, Tensor &d_hidden_state_,
207 : const Tensor &cell_state_, Tensor &d_cell_state_, const Tensor &ifgo_,
208 : Tensor &d_ifgo_, const Tensor &mask_);
209 : };
210 : } // namespace nntrainer
211 :
212 : #endif /* __cplusplus */
213 : #endif /* __LSTM_H__ */
|