Line data Source code
1 : /**
2 : * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved.
3 : *
4 : * Licensed under the Apache License, Version 2.0 (the "License");
5 : * you may not use this file except in compliance with the License.
6 : * You may obtain a copy of the License at
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : * Unless required by applicable law or agreed to in writing, software
9 : * distributed under the License is distributed on an "AS IS" BASIS,
10 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 : * See the License for the specific language governing permissions and
12 : * limitations under the License.
13 : *
14 : * @file layer_devel.h
15 : * @date 10 June 2021
16 : * @brief This is Layer classes of Neural Network
17 : * @see https://github.com/nnstreamer/nntrainer
18 : * @author Jijoong Moon <jijoong.moon@samsung.com>
19 : * @bug No known bugs except for NYI items
20 : *
21 : */
22 : #ifndef __LAYER_DEVEL_H__
23 : #define __LAYER_DEVEL_H__
24 : #ifdef __cplusplus
25 :
26 : #include <memory>
27 : #include <string>
28 : #include <vector>
29 :
30 : #include <base_properties.h>
31 : #include <common.h>
32 : #include <layer_context.h>
33 : #include <tensor_dim.h>
34 :
35 : namespace ml::train {
36 : class Layer;
37 : }
38 :
39 : namespace nntrainer {
40 :
41 : class InitLayerContext;
42 : class Exporter;
43 :
44 : /**
45 : * @brief Enum class for the various types of inplace modes supported by layer
46 : *
47 : */
48 : enum class InPlaceType {
49 : NONE, /**< layer is not inplace */
50 : RESTRICTING, /**< layer is in-place and does place restriction on layers
51 : ahead of it to be in-place */
52 : NON_RESTRICTING /**< layer is in-place and does NOT place restriction on the
53 : layers ahead of it to be in-place */
54 : };
55 :
56 : /**
57 : * @brief Emum class for the direction of inplace
58 : *
59 : * @details When the In-Place option is enabled and the layer has binary inputs,
60 : * you can specify the direction of the in-place operation using this
61 : * enumeration. For instance, if a layer is in-place with the direction set to
62 : * LEFT, the output of the layer will be written directly into the 'input[0]' of
63 : * the preceding layer. Conversely, if the direction is set to RIGHT, the output
64 : * of the layer will be written directly into the 'input[1]' of the preceding
65 : * layer.
66 : */
67 : enum class InPlaceDirection {
68 : NONE, /**< default. It will be set to LEFT or RIGHT only when the type of the
69 : operation layer is binary and the is_inplace setting is true */
70 : LEFT, /**< left side of the layer is in-place */
71 : RIGHT, /**< right side of the layer is in-place */
72 : };
73 :
74 : /**
75 : * @class Layer Base class for layers
76 : * @brief Base class for all layers
77 : *
78 : * @details nntrainer::Layer inherits ml::train::Layer but has been omitted to
79 : * disallow static_cast between nntrainer::Layer and ml::train::Layer objects.
80 : */
81 2281 : class Layer {
82 :
83 : public:
84 : /**
85 : * @brief Property Enumeration
86 : * 0. input shape : string
87 : * 1. normalization : bool
88 : * 2. standardization : bool
89 : * 3. activation : string (type)
90 : * 4. epsilon : float
91 : * 5. weight_regularizer : string (type)
92 : * 6. weight_regularizer_constant : float
93 : * 7. unit : int
94 : * 8. weight_initializer : string (type)
95 : * 9. bias initializer : string (type)
96 : * 10. filter_size : int
97 : * 11. kernel_size : ( n , m )
98 : * 12. stride : ( n, m )
99 : * 13. padding : ( n, m )
100 : * 14. pool_size : ( n,m )
101 : * 15. pooling : max, average, global_max, global_average
102 : * 16. flatten : bool
103 : * 17. name : string (type)
104 : * 18. momentum : float,
105 : * 19. moving_mean_initializer : string (type),
106 : * 20. moving_variance_initializer : string (type),
107 : * 21. gamma_initializer : string (type),
108 : * 22. beta_initializer" : string (type)
109 : * 23. modelfile : model file for loading config for backbone layer
110 : * 24. input_layers : string (type)
111 : * 25. output_layers : string (type)
112 : * 26. trainable :
113 : * 27. flip_direction
114 : * 28. random_translate
115 : * 29. in_dim : int ( input dimension for embedding layer )
116 : * 30. out_dim : int ( output dimesion for embedding layer )
117 : * 31. recurrent_activation : string (type) - lstm
118 : * 32. distribute : bool
119 : * 33. axis : string (type)
120 : * 34. return_sequences : bool (type) - lstm
121 : * 35. hidden_state_activation : string (type) - lstm
122 : * 36. dropout : bool
123 : */
124 : enum class PropertyType {
125 : input_shape = 0,
126 : normalization = 1,
127 : standardization = 2,
128 : activation = 3,
129 : epsilon = 4,
130 : weight_regularizer = 5,
131 : weight_regularizer_constant = 6,
132 : unit = 7,
133 : weight_initializer = 8,
134 : bias_initializer = 9,
135 : filters = 10,
136 : kernel_size = 11,
137 : stride = 12,
138 : padding = 13,
139 : pool_size = 14,
140 : pooling = 15,
141 : flatten = 16,
142 : name = 17,
143 : momentum = 18,
144 : moving_mean_initializer = 19,
145 : moving_variance_initializer = 20,
146 : gamma_initializer = 21,
147 : beta_initializer = 22,
148 : modelfile = 23, /** model file for loading config for backbone layer */
149 : input_layers = 24,
150 : output_layers = 25,
151 : trainable = 26,
152 : flip_direction = 27,
153 : random_translate = 28,
154 : in_dim = 29,
155 : out_dim = 30,
156 : recurrent_activation = 31,
157 : distribute = 32,
158 : axis = 33,
159 : return_sequences = 34,
160 : hidden_state_activation = 35,
161 : dropout = 36,
162 : num_inputs = 37,
163 : unknown
164 : };
165 :
166 : /**
167 : * @brief Destructor of Layer Class
168 : */
169 : virtual ~Layer() = default;
170 :
171 : /**
172 : * @brief Get the layer type
173 : * @return const std::string type representation
174 : */
175 : virtual const std::string getType() const = 0;
176 :
177 : /**
178 : * @brief Finalize creating the layer
179 : * @param context Context of the layer
180 : *
181 : * @details Input dimensions will be provided set in the context. This
182 : * function must set output dimensions in the given context. Further, context
183 : * can be used to request weights for the layer, and any extra tensor required
184 : * for the operation of the layer.
185 : * @note After calling this it is not allowed to
186 : * change properties.
187 : * @note No memory allocation must be performed in the initialization
188 : * step. Any tensor memory required must be requested to the context which
189 : * will be made available during execution of the layer with the context.
190 : */
191 : virtual void finalize(InitLayerContext &context) = 0;
192 :
193 : /**
194 : * @brief Initialize the layer
195 : */
196 0 : virtual void initialize(RunLayerContext &context){};
197 :
198 : /**
199 : * @brief Forward Propagation of a layer
200 : * @param context Context of the layer
201 : * @param training true if training, false if inference
202 : *
203 : * @note Output must be set in the output tensors.
204 : * @details context provides access to the weights (if any), inputs,
205 : * outputs, and tensors (if any) for the layer. Input and output dimensions
206 : * can be access from the inputs/outputs tensors themselves.
207 : */
208 : virtual void forwarding(RunLayerContext &context, bool training) = 0;
209 :
210 : /**
211 : * @brief Incremental forward Propagation of a layer
212 : * @param context Context of the layer
213 : * @param from start step
214 : * @param to end step
215 : * @param training true if training, false if inference
216 : *
217 : * @note Output must be set in the output tensors.
218 : * @details context provides access to the weights (if any), inputs,
219 : * outputs, and tensors (if any) for the layer. Input and output dimensions
220 : * can be access from the inputs/outputs tensors themselves.
221 : */
222 15 : virtual void incremental_forwarding(RunLayerContext &context,
223 : unsigned int from, unsigned int to,
224 : bool training) {
225 15 : forwarding(context, training);
226 15 : };
227 :
228 : /**
229 : * @brief calc the derivative to be passed to the previous layer
230 : * @param context Context of the layer
231 : * @note Return derivatives must be set in input gradient tensors.
232 : * @details context provides access to the weights (if any), inputs,
233 : * outputs, and tensors (if any) for the layer. Input and output dimensions
234 : * can be access from the inputs/outputs tensors themselves.
235 : */
236 : virtual void calcDerivative(RunLayerContext &context) = 0;
237 :
238 : /**
239 : * @brief Calculate the derivative of a layer
240 : * @details context provides access to the weights (if any), inputs,
241 : * outputs, and tensors (if any) for the layer. Input and output dimensions
242 : * can be access from the inputs/outputs tensors themselves.
243 : * @note Gradients must be set in weight gradient tensors.
244 : */
245 16 : virtual void calcGradient(RunLayerContext &context) {}
246 :
247 : /**
248 : * @brief set Property of layer
249 : * @param values values of property
250 : * @throw std::invalid_argument invalid parameter.
251 : */
252 : virtual void setProperty(const std::vector<std::string> &values) = 0;
253 :
254 : /**
255 : * @brief Get property value of layer
256 : * @param key Property key to retrieve
257 : * @return Property value as string (empty string if not found)
258 : */
259 0 : virtual std::string getProperty(const std::string &key) { return ""; }
260 :
261 : /**
262 : * @brief this function helps exporting the layer in a predefined format,
263 : * while workarounding issue caused by templated function type eraser
264 : *
265 : * @param exporter exporter that conatins exporting logic
266 : * @param method enum value to identify how it should be exported to
267 : */
268 219 : virtual void exportTo(Exporter &exporter,
269 219 : const ml::train::ExportMethods &method) const {}
270 :
271 : /**
272 : * @brief Set the batch for the layer
273 : * @param context Context of the layer
274 : * @param batch Batch value to be set
275 : * @details Update the run context based on the updated batch size if required
276 : */
277 1994 : virtual void setBatch(RunLayerContext &context, unsigned int batch) {}
278 :
279 : /**
280 : * @brief Update the tensor dimensions of layer by input dimensions
281 : * @param context Context of the layer
282 : * @param input_dimensions input dimensions of layer
283 : * @details Update the dimensions of inputs, outputs, weights and tensors
284 : * based on the input dimensions
285 : */
286 : virtual void
287 0 : updateTensorsByInputDimensions(RunLayerContext &context,
288 : std::vector<TensorDim> input_dimensions) {
289 : throw std::invalid_argument("updateTensorsByInputDimensions() is currently "
290 0 : "not supported for layer type " +
291 0 : getType());
292 : }
293 :
294 : /**
295 : * @brief If the current layer can support in-place
296 : *
297 : * @return true if inplace, else false
298 : * @details all layers default to out of place execution
299 : * @note all layers default to out of place execution
300 : */
301 3352 : virtual bool supportInPlace() const { return is_inplace; }
302 :
303 : /**
304 : * @brief Get the inplace direction for the tensor operation layer
305 : *
306 : * @return InPlaceDirection
307 : */
308 2390 : virtual InPlaceDirection getInPlaceDirection() {
309 2390 : return InPlaceDirection::NONE;
310 : };
311 :
312 : /**
313 : * @brief Initialize the in-place settings of the layer
314 : * @details If it is a layer that supports in-place, the default in-place type
315 : * is NONE_RESTRICTING, but if there is a RESTRICTING type among the input
316 : * layers, it is set to NONE in the network_graph.cpp.
317 : * Layers with exceptional behavior such as No-Operation layers should
318 : * override this function.
319 : * @return InPlaceType
320 : */
321 1616 : virtual InPlaceType initializeInPlace() {
322 1616 : if (!supportInPlace())
323 : return InPlaceType::NONE;
324 : else
325 280 : return InPlaceType::NON_RESTRICTING;
326 : }
327 :
328 : /**
329 : * @brief check if this layer requires label to be passed
330 : * @note if requireLabel() == true means, for now, that it is endpoint of a
331 : * graph(numOutlayers == 0). label will be fed to the gradient of hidden if
332 : * requireLabel is true
333 : * @return true if requires a label when training, else false
334 : */
335 34139 : virtual bool requireLabel() const { return false; }
336 :
337 : /**
338 : * @brief check if this layer supports backwarding
339 : * @note support backwarding primarily means that the layer can process the
340 : * derivatives and return back the gradients to the previous layer.
341 : * @return true if supports backwarding, else false
342 : */
343 : virtual bool supportBackwarding() const = 0;
344 :
345 : /**
346 : * @brief save layer Weight & Bias data from file
347 : * @param file output file stream
348 : * @param run_context run context for the layer
349 : * @param opt_var boolean variable whether saving optimizer variables
350 : * @param mode Execution mode
351 : * @param trainable is there trainable weight
352 : * @param definedWeightDataTey current data type of the layer
353 : */
354 3134 : virtual void save(std::ofstream &file, RunLayerContext &run_context,
355 : bool opt_var, ml::train::ExecutionMode mode, bool trainable,
356 : TensorDim::DataType definedWeightDataType) const {
357 :
358 3134 : if (opt_var) {
359 109 : for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
360 42 : if (run_context.isGradientFirstAccess(i) && trainable) {
361 : // @note save optimizer variables
362 42 : if (run_context.weightHasGradient(i)) {
363 126 : for (unsigned int j = 0; j < run_context.getNumWeightOptVar(i);
364 : ++j) {
365 84 : run_context.getWeightOptVar(i, j).save(file);
366 : }
367 : }
368 : }
369 : }
370 : } else {
371 : // @note shared weights are only be saved at the first access
372 5109 : for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
373 2042 : if (run_context.isGradientFirstAccess(i)) {
374 2042 : run_context.getWeight(i).save(file);
375 : }
376 : }
377 : }
378 3134 : }
379 :
380 : /**
381 : * @brief read layer Weight & Bias data from file
382 : * @param file input file stream
383 : * @param run context for layer
384 : * @param bool read optimizer variables
385 : * @param mode execution mode
386 : * @param bool trainable
387 : * @param type Required Weight Tensor Type from Network
388 : * @param bool fsu flag
389 : *
390 : */
391 0 : virtual void read(std::ifstream &file, RunLayerContext &run_context,
392 : bool opt_var, ml::train::ExecutionMode mode, bool trainable,
393 : TensorDim::DataType defineWeightDataType, bool fsu,
394 : size_t start_offset = 0, bool read_from_offset = false,
395 : int file_fd = -1) {
396 0 : if (fsu) {
397 0 : for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
398 0 : if (run_context.getWeight(i).getDataType() ==
399 : TensorDim::DataType::BCQ) {
400 0 : run_context.getWeight(i).readFSU();
401 : }
402 : }
403 : } else {
404 0 : if (opt_var) {
405 0 : for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
406 0 : if (run_context.isGradientLastAccess(i) && trainable) {
407 : /// @note read optimizer variables
408 0 : for (unsigned int j = 0; j < run_context.getNumWeightOptVar(i);
409 : ++j) {
410 0 : run_context.getWeightOptVar(i, j).read(file, start_offset);
411 : }
412 : }
413 : }
414 : } else {
415 0 : for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
416 : /// @note shared weights are only be read at the first acecss
417 0 : if (run_context.isGradientFirstAccess(i)) {
418 0 : run_context.getWeight(i).read(file, start_offset, read_from_offset,
419 : file_fd);
420 0 : if (run_context.isMixedPrecision(i) && trainable &&
421 0 : !run_context.getWeightFP32(i).empty()) {
422 0 : run_context.getWeightFP32(i).copyData(run_context.getWeight(i));
423 : }
424 : }
425 : }
426 : }
427 : }
428 0 : }
429 :
430 : /**
431 : * @brief read layer Weight & Bias data from file
432 : * @param ReadSource input file stream
433 : * @param run context for layer
434 : * @param bool read optimizer variables
435 : * @param mode execution mode
436 : * @param bool trainable
437 : * @param type Required Weight Tensor Type from Network
438 : * @param bool fsu flag
439 : *
440 : */
441 0 : virtual void read(ReadSource src, RunLayerContext &run_context, bool opt_var,
442 : ml::train::ExecutionMode mode, bool trainable,
443 : TensorDim::DataType defineWeightDataType, bool fsu,
444 : size_t start_offset = 0, bool read_from_offset = false) {
445 0 : if (fsu) {
446 0 : for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
447 0 : if (run_context.getWeight(i).getDataType() ==
448 : TensorDim::DataType::BCQ) {
449 0 : run_context.getWeight(i).readFSU();
450 : }
451 : }
452 : } else {
453 0 : if (opt_var) {
454 0 : for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
455 0 : if (run_context.isGradientLastAccess(i) && trainable) {
456 : /// @note read optimizer variables
457 0 : for (unsigned int j = 0; j < run_context.getNumWeightOptVar(i);
458 : ++j) {
459 0 : run_context.getWeightOptVar(i, j).read(src, start_offset);
460 : }
461 : }
462 : }
463 : } else {
464 0 : for (unsigned int i = 0; i < run_context.getNumWeights(); ++i) {
465 : /// @note shared weights are only be read at the first acecss
466 0 : if (run_context.isGradientFirstAccess(i)) {
467 0 : run_context.getWeight(i).read(src, start_offset, read_from_offset);
468 0 : if (run_context.isMixedPrecision(i) && trainable &&
469 0 : !run_context.getWeightFP32(i).empty()) {
470 0 : run_context.getWeightFP32(i).copyData(run_context.getWeight(i));
471 : }
472 : }
473 : }
474 : }
475 : }
476 0 : }
477 :
478 : protected:
479 : bool is_inplace = false; /**< whether this layer is in-place or not */
480 : };
481 :
482 : /// @todo Decide where to put and how to implement(#986)
483 : // /**
484 : // * @brief Overriding output stream for layers and it's derived class
485 : // */
486 : // template <typename T, typename std::enable_if_t<
487 : // std::is_base_of<Layer, T>::value, T> * = nullptr>
488 : // std::ostream &operator<<(std::ostream &out, T &l) {
489 : // // l.printPreset(out, Layer::PrintPreset::PRINT_SUMMARY);
490 : // return out;
491 : // }
492 :
493 : using CreateLayerFunc = nntrainer::Layer *(*)();
494 : using DestroyLayerFunc = void (*)(nntrainer::Layer *);
495 :
496 : /**
497 : * @brief General Layer Factory function to register Layer
498 : *
499 : * @param props property representation
500 : * @return std::unique_ptr<nntrainer::Layer> created object
501 : */
502 : template <typename T,
503 : std::enable_if_t<std::is_base_of<Layer, T>::value, T> * = nullptr>
504 7305 : std::unique_ptr<Layer> createLayer(const std::vector<std::string> &props = {}) {
505 6755 : std::unique_ptr<Layer> ptr = std::make_unique<T>();
506 7305 : ptr->setProperty(props);
507 7305 : return ptr;
508 : }
509 :
510 : /**
511 : * @brief Layer Pluggable struct that enables pluggable layer
512 : *
513 : */
514 : typedef struct {
515 : CreateLayerFunc createfunc; /**< create layer function */
516 : DestroyLayerFunc destroyfunc; /**< destory function */
517 : } LayerPluggable;
518 :
519 : /**
520 : * @brief pluggable layer must have this structure defined
521 : */
522 : extern "C" LayerPluggable ml_train_layer_pluggable;
523 :
524 : } // namespace nntrainer
525 :
526 : #endif /* __cplusplus */
527 : #endif /* __LAYER_DEVEL_H__ */
|