Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file preprocess_layer.cpp
6 : * @date 31 December 2020
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief This is Preprocess Translate Layer Class for Neural Network
11 : *
12 : * @todo Add support without opencv
13 : */
14 :
15 : #include <random>
16 :
17 : #include <layer_context.h>
18 : #include <nntrainer_error.h>
19 : #include <nntrainer_log.h>
20 : #include <node_exporter.h>
21 : #include <preprocess_translate_layer.h>
22 : #include <util_func.h>
23 :
24 : #if defined(ENABLE_DATA_AUGMENTATION_OPENCV)
25 : #include <opencv2/highgui/highgui.hpp>
26 : #include <opencv2/imgproc/imgproc.hpp>
27 : #endif
28 :
29 : namespace nntrainer {
30 0 : PreprocessTranslateLayer::PreprocessTranslateLayer() :
31 : Layer(),
32 0 : epsilon(1e-5f),
33 0 : preprocess_translate_props(props::RandomTranslate()) {}
34 :
35 0 : void PreprocessTranslateLayer::finalize(InitLayerContext &context) {
36 0 : context.setOutputDimensions(context.getInputDimensions());
37 0 : const TensorDim input_dim_0 = context.getInputDimensions()[0];
38 : float random_translate =
39 0 : std::get<props::RandomTranslate>(preprocess_translate_props);
40 :
41 : // rng.seed(getSeed());
42 : rng.seed(0);
43 :
44 : // Made for 3 channel input
45 0 : if (random_translate > epsilon) {
46 0 : NNTR_THROW_IF(input_dim_0.channel() > 3, std::invalid_argument)
47 : << "Preprocess translate layer not supported for over 3 channels";
48 0 : translate_dist = std::uniform_real_distribution<float>(-random_translate,
49 : random_translate);
50 :
51 : #if defined(ENABLE_DATA_AUGMENTATION_OPENCV)
52 : affine_transform_mat = cv::Mat::zeros(2, 3, CV_32FC1);
53 : affine_transform_mat.at<float>(0, 0) = 1;
54 : affine_transform_mat.at<float>(1, 1) = 1;
55 :
56 : input_mat =
57 : cv::Mat::zeros(input_dim_0.height(), input_dim_0.width(), CV_32FC3);
58 : output_mat =
59 : cv::Mat::zeros(input_dim_0.height(), input_dim_0.width(), CV_32FC3);
60 : #else
61 : throw exception::not_supported(
62 0 : "Preprocess translate layer is not supported without opencv");
63 : #endif
64 : }
65 0 : }
66 :
67 0 : void PreprocessTranslateLayer::setProperty(
68 : const std::vector<std::string> &values) {
69 0 : auto remain_props = loadProperties(values, preprocess_translate_props);
70 0 : NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
71 0 : << "[PreprocessTranslateLayer] Unknown Layer Properties count " +
72 0 : std::to_string(values.size());
73 0 : }
74 :
75 0 : void PreprocessTranslateLayer::forwarding(RunLayerContext &context,
76 : bool training) {
77 0 : if (!training) {
78 0 : for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
79 : /** TODO: tell the graph to not include this when not training */
80 0 : context.getOutput(idx) = context.getInput(idx);
81 : }
82 :
83 : return;
84 : }
85 :
86 : float random_translate =
87 0 : std::get<props::RandomTranslate>(preprocess_translate_props);
88 0 : for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
89 0 : Tensor &hidden_ = context.getOutput(idx);
90 0 : Tensor &input_ = context.getInput(idx);
91 0 : const TensorDim input_dim = input_.getDim();
92 :
93 0 : if (random_translate < epsilon) {
94 0 : hidden_ = input_;
95 : continue;
96 : }
97 :
98 : #if defined(ENABLE_DATA_AUGMENTATION_OPENCV)
99 : for (unsigned int b = 0; b < input_dim.batch(); b++) {
100 :
101 : /** random translation */
102 : float translate_x = translate_dist(rng) * input_dim.width();
103 : float translate_y = translate_dist(rng) * input_dim.height();
104 : affine_transform_mat.at<cv::Vec2f>(0, 0)[2] = translate_x;
105 : affine_transform_mat.at<cv::Vec2f>(1, 0)[2] = translate_y;
106 :
107 : for (unsigned int c = 0; c < input_dim.channel(); c++)
108 : for (unsigned int h = 0; h < input_dim.height(); h++)
109 : for (unsigned int w = 0; w < input_dim.width(); w++)
110 : input_mat.at<cv::Vec3f>(h, w)[c] = input_.getValue(b, c, h, w);
111 :
112 : cv::warpAffine(input_mat, output_mat, affine_transform_mat,
113 : output_mat.size(), cv::WARP_INVERSE_MAP,
114 : cv::BORDER_REFLECT);
115 :
116 : for (unsigned int c = 0; c < input_dim.channel(); c++)
117 : for (unsigned int h = 0; h < input_dim.height(); h++)
118 : for (unsigned int w = 0; w < input_dim.width(); w++)
119 : input_.setValue(b, c, h, w, output_mat.at<cv::Vec3f>(h, w)[c]);
120 : }
121 :
122 : hidden_ = input_;
123 : #else
124 : throw exception::not_supported(
125 0 : "Preprocess translate layer is not supported without opencv");
126 : #endif
127 : }
128 : }
129 :
130 0 : void PreprocessTranslateLayer::calcDerivative(RunLayerContext &context) {
131 : throw exception::not_supported(
132 0 : "calcDerivative for preprocess layer is not supported");
133 : }
134 :
135 0 : void PreprocessTranslateLayer::exportTo(
136 : Exporter &exporter, const ml::train::ExportMethods &method) const {
137 0 : exporter.saveResult(preprocess_translate_props, method, this);
138 0 : }
139 :
140 : } /* namespace nntrainer */
|