Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
4 : *
5 : * @file concat_layer.cpp
6 : * @date 27 Oct 2020
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Jijoong Moon <jijoong.moon@samsung.com>
9 : * @author Donghyeon Jeong <dhyeon.jeong@samsung.com>
10 : * @bug No known bugs except for NYI items
11 : * @brief This is Concat Layer Class for Neural Network
12 : *
13 : * @todo merge concat and split layer to a common implementation
14 : */
15 :
16 : #include <cstring>
17 : #include <vector>
18 :
19 : #include <concat_layer.h>
20 : #include <layer_context.h>
21 : #include <nntr_threads.h>
22 : #include <nntrainer_error.h>
23 : #include <nntrainer_log.h>
24 : #include <node_exporter.h>
25 : #include <tensor_dim.h>
26 : #include <util_func.h>
27 :
28 : namespace nntrainer {
29 132 : ConcatLayer::ConcatLayer() : Layer(), leading_helper_dim(1) {}
30 :
31 : static constexpr size_t SINGLE_INOUT_IDX = 0;
32 :
33 108 : void ConcatLayer::finalize(InitLayerContext &context) {
34 : auto &concat_dimension_prop = std::get<props::ConcatDimension>(concat_props);
35 : /** for backward compatibility, default concat dimension will be channel */
36 : /// @todo this is hacky way to force concat dimension to width if channel
37 : /// dimension is taken, this is because recurrent realizer, return sequence
38 : /// exploits concat layer but have no control over where to stack/axis
39 : unsigned int concat_dimension =
40 108 : context.getInputDimensions().front().channel() > 1 ? 3 : 1;
41 108 : if (!concat_dimension_prop.empty())
42 3 : concat_dimension = concat_dimension_prop.get();
43 :
44 : /**
45 : * The concat is only done along the axis dimension.
46 : * For example, consider 2 inputs a, b with dimensions [b,c,h,w] each
47 : * 1. concat_dimension = 1, output_dim = [b,c_a+c_b,h,w]
48 : * 2. concat_dimension = 2, output_dim = [b,c,h_a+h_b,w]
49 : * 3. concat_dimension = 3, output_dim = [b,c,h,w_a+w_b]
50 : */
51 : auto const &input_dims = context.getInputDimensions();
52 : const TensorDim &input_dim_0 = input_dims[SINGLE_INOUT_IDX];
53 108 : unsigned int concat_dim_val = input_dim_0.getTensorDim(concat_dimension);
54 :
55 212 : for (unsigned int idx = 1; idx < input_dims.size(); ++idx) {
56 : const TensorDim &dim = input_dims[idx];
57 :
58 520 : for (unsigned int i = 0; i < ml::train::TensorDim::getNumDim(); ++i) {
59 416 : if (i == concat_dimension)
60 104 : continue;
61 312 : NNTR_THROW_IF(input_dim_0[i] != dim[i], std::runtime_error)
62 : << "Error: concat layer requires same shape from all input layers "
63 : "along non-concat dimension";
64 : }
65 104 : concat_dim_val += dim[concat_dimension];
66 : }
67 :
68 108 : TensorDim output_dim = input_dim_0;
69 108 : output_dim.setTensorDim(concat_dimension, concat_dim_val);
70 :
71 108 : context.setOutputDimensions({output_dim});
72 :
73 : /**
74 : * The following helper shapes facilitate efficient concatenation and split of
75 : * the data.
76 : *
77 : * The helper shapes are created by consolidating all the dimensions before
78 : * the concat dimension to the first axis and all the remaining dimensions to
79 : * the last axis.
80 : *
81 : * @note This is possible since the data starting from the concat dimension to
82 : * the end is always continuous.
83 : *
84 : * @example the following shows how the helper dimension will look with given
85 : * inputs and concat dimension.
86 : *
87 : * | cat_dim 1 | cat_dim 2 | cat_dim 3
88 : * --------|-----------|-----------|-----------
89 : * input0 | 2:1:2:3 | 1:2:1:3 | 1:2:2:3
90 : * input1 | 2:3:2:3 | 1:2:3:3 | 1:2:2:1
91 : * --------|-----------|-----------|-----------
92 : * helper0 | 2:1:1:6 | 2:1:1:3 | 4:1:1:3
93 : * helper1 | 2:1:1:18 | 2:1:1:9 | 4:1:1:1
94 : *
95 : */
96 : /// Setup output_reshape_helper (how output should be reshaped)
97 108 : output_reshape_helper.channel(1);
98 108 : output_reshape_helper.height(1);
99 108 : output_reshape_helper.width(1);
100 321 : for (unsigned int axis = concat_dimension;
101 429 : axis < ml::train::TensorDim::getNumDim(); ++axis) {
102 642 : output_reshape_helper.width(output_reshape_helper.width() *
103 321 : output_dim.getTensorDim(axis));
104 : }
105 :
106 : /// Setup input_reshape_helper (how inputs should be reshaped)
107 108 : input_reshape_helper.resize(input_dims.size());
108 :
109 320 : for (unsigned int idx = 0; idx < input_reshape_helper.size(); idx++) {
110 212 : input_reshape_helper[idx].channel(1);
111 212 : input_reshape_helper[idx].height(1);
112 212 : input_reshape_helper[idx].width(1);
113 :
114 630 : for (unsigned int axis = concat_dimension;
115 842 : axis < ml::train::TensorDim::getNumDim(); ++axis) {
116 :
117 1260 : input_reshape_helper[idx].width(input_reshape_helper[idx].width() *
118 630 : input_dims[idx].getTensorDim(axis));
119 : }
120 : }
121 :
122 108 : leading_helper_dim = 1;
123 111 : for (unsigned int idx = 1; idx < concat_dimension; ++idx) {
124 3 : leading_helper_dim *= output_dim.getTensorDim(idx);
125 : }
126 :
127 108 : setBatch(input_dims[SINGLE_INOUT_IDX].batch());
128 108 : }
129 :
130 365 : void ConcatLayer::forwarding(RunLayerContext &context, bool training) {
131 : /**
132 : * Forwarding in ConcatLayer works as follows
133 : *
134 : * in1 in2 in3 output
135 : * |---0---| |----3----| |--6--| |---0---||----3----||--6--|
136 : * |---1---| |----4----| |--7--| => |---1---||----4----||--7--|
137 : * |---2---| |----5----| |--8--| |---2---||----5----||--8--|
138 : *
139 : * @note For each input tensor, it iterates batches and copies the entire
140 : * width size to the corresponding output position. In the diagram above, the
141 : * row would be a batch, and the column would be a width. the number of each
142 : * block in the diagram indicates the order of copy to output.
143 : *
144 : * @todo avoid copy by creating input here as a shared_tensor of the output
145 : * here and then this layer can be in_place as well
146 : */
147 365 : Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
148 :
149 365 : const TensorDim out_dim = output.getDim();
150 365 : output.reshape(output_reshape_helper);
151 : unsigned int output_width_offset = 0;
152 365 : TensorDim::TensorType tensor_type = output.getTensorType();
153 :
154 1095 : for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
155 730 : Tensor &input = context.getInput(idx);
156 730 : const TensorDim in_dim = input.getDim();
157 730 : auto const &irh = input_reshape_helper[idx];
158 730 : input.reshape(irh);
159 730 : unsigned int data_copy_size = irh.width();
160 :
161 : /** loop over the dimensions before the concat dimension */
162 730 : if (in_dim.getDataType() == TensorDim::DataType::FP32) {
163 : /** copy continous tensor data (reshaped width) */
164 9458 : for (unsigned int batch = 0; batch < output.batch(); batch++) {
165 : Tensor dest_tensor = Tensor::Map<float>(
166 : output.getAddress<float>(batch, 0, 0, output_width_offset),
167 : data_copy_size * sizeof(float),
168 8728 : {1, 1, 1, data_copy_size, tensor_type});
169 : const Tensor source_tensor =
170 : Tensor::Map<float>(input.getAddress<float>(batch, 0, 0, 0),
171 : data_copy_size * sizeof(float),
172 8728 : {1, 1, 1, data_copy_size, tensor_type});
173 8728 : dest_tensor.copy(source_tensor);
174 8728 : }
175 0 : } else if (in_dim.getDataType() == TensorDim::DataType::FP16) {
176 : #ifdef ENABLE_FP16
177 : /** copy continous tensor data (reshaped width) */
178 : for (unsigned int batch = 0; batch < output.batch(); batch++) {
179 : Tensor dest_tensor = Tensor::Map<_FP16>(
180 : output.getAddress<_FP16>(batch, 0, 0, output_width_offset),
181 : data_copy_size * sizeof(_FP16),
182 : {1, 1, 1, data_copy_size, tensor_type});
183 : const Tensor source_tensor =
184 : Tensor::Map<_FP16>(input.getAddress<_FP16>(batch, 0, 0, 0),
185 : data_copy_size * sizeof(_FP16),
186 : {1, 1, 1, data_copy_size, tensor_type});
187 : dest_tensor.copy(source_tensor);
188 : }
189 : #else
190 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
191 : #endif
192 : }
193 :
194 730 : output_width_offset += irh.width();
195 730 : input.reshape(in_dim);
196 : }
197 :
198 365 : output.reshape(out_dim);
199 365 : }
200 :
201 0 : void ConcatLayer::incremental_forwarding(RunLayerContext &context,
202 : unsigned int from, unsigned int to,
203 : bool training) {
204 : /**
205 : * @todo avoid copy by creating input here as a shared_tensor of the output
206 : * here and then this layer can be in_place as well
207 : */
208 0 : Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
209 :
210 0 : const TensorDim out_dim = output.getDim();
211 0 : output.reshape(output_reshape_helper);
212 : unsigned int output_height_offset = 0;
213 0 : unsigned int data_copy_size = output_reshape_helper.width();
214 :
215 : // @todo: this implementation is only works when axis is 3(width). Consider
216 : // for other axes
217 0 : unsigned int batch_channel = out_dim.batch() * out_dim.channel();
218 :
219 0 : for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
220 0 : Tensor &input = context.getInput(idx);
221 0 : const TensorDim in_dim = input.getDim();
222 0 : auto const &irh = input_reshape_helper[idx];
223 0 : input.reshape(irh);
224 :
225 : /** loop over the dimensions before the concat dimension */
226 0 : for (unsigned int batch = batch_channel * from; batch < batch_channel * to;
227 : batch++) {
228 : /** loop over the concat dimension itself */
229 0 : for (unsigned int count = 0; count < irh.height(); count++) {
230 : Tensor dest_tensor = Tensor::Map(
231 : output.getAddress(batch, 0, output_height_offset + count, 0),
232 0 : data_copy_size * sizeof(float), {1, 1, 1, data_copy_size});
233 : const Tensor source_tensor = Tensor::Map(
234 : input.getAddress(batch, 0, count, 0), data_copy_size * sizeof(float),
235 0 : {1, 1, 1, data_copy_size});
236 0 : dest_tensor.copy(source_tensor);
237 0 : }
238 : }
239 :
240 0 : input.reshape(in_dim);
241 0 : output_height_offset += irh.height();
242 : }
243 :
244 0 : output.reshape(out_dim);
245 0 : }
246 :
247 178 : void ConcatLayer::calcDerivative(RunLayerContext &context) {
248 : /**
249 : * calcDerivative in ConcatLayer works as follows
250 : *
251 : * output in1 in2 in3
252 : * |---0---||----3----||--6--| |---0---| |----3----| |--6--|
253 : * |---1---||----4----||--7--| => |---1---| |----4----| |--7--|
254 : * |---2---||----5----||--8--| |---2---| |----5----| |--8--|
255 : *
256 : * @note For each input tensor, it iterates batches and copies the entire
257 : * input width size from the output tensor to the corresponding input. In the
258 : * diagram above, the row would be a batch, and the column would be a width.
259 : * The number of each block in the diagram indicates the order of copy to
260 : * inputs.
261 : *
262 : * @todo avoid copy by creating input here as a shared_tensor of the output
263 : * here and then this layer can be in_place as well
264 : */
265 178 : Tensor output = context.getIncomingDerivative(SINGLE_INOUT_IDX);
266 :
267 178 : output.reshape(output_reshape_helper);
268 : unsigned int output_width_offset = 0;
269 178 : TensorDim::TensorType tensor_type = output.getTensorType();
270 :
271 534 : for (unsigned int idx = 0; idx < context.getNumInputs(); idx++) {
272 356 : Tensor &input = context.getOutgoingDerivative(idx);
273 356 : const TensorDim in_dim = input.getDim();
274 356 : auto const &irh = input_reshape_helper[idx];
275 356 : input.reshape(irh);
276 356 : unsigned int data_copy_size = irh.width();
277 :
278 356 : if (in_dim.getDataType() == TensorDim::DataType::FP32) {
279 : /** loop over the dimensions before the concat dimension */
280 4642 : for (unsigned int batch = 0; batch < output.batch(); batch++) {
281 : /** copy continous data (reshaped width size) in a tensor */
282 : const Tensor source_tensor = Tensor::Map<float>(
283 : output.getAddress<float>(batch, 0, 0, output_width_offset),
284 : data_copy_size * sizeof(float),
285 4286 : {1, 1, 1, data_copy_size, tensor_type});
286 : Tensor dest_tensor =
287 : Tensor::Map<float>(input.getAddress<float>(batch, 0, 0, 0),
288 : data_copy_size * sizeof(float),
289 4286 : {1, 1, 1, data_copy_size, tensor_type});
290 4286 : dest_tensor.copy(source_tensor);
291 4286 : }
292 0 : } else if (in_dim.getDataType() == TensorDim::DataType::FP16) {
293 : #ifdef ENABLE_FP16
294 : /** loop over the dimensions before the concat dimension */
295 : for (unsigned int batch = 0; batch < output.batch(); batch++) {
296 : /** copy continous data (reshaped width size) in a tensor */
297 : const Tensor source_tensor = Tensor::Map<_FP16>(
298 : output.getAddress<_FP16>(batch, 0, 0, output_width_offset),
299 : data_copy_size * sizeof(_FP16),
300 : {1, 1, 1, data_copy_size, tensor_type});
301 : Tensor dest_tensor =
302 : Tensor::Map<_FP16>(input.getAddress<_FP16>(batch, 0, 0, 0),
303 : data_copy_size * sizeof(_FP16),
304 : {1, 1, 1, data_copy_size, tensor_type});
305 : dest_tensor.copy(source_tensor);
306 : }
307 : #else
308 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
309 : #endif
310 : }
311 :
312 356 : input.reshape(in_dim);
313 356 : output_width_offset += irh.width();
314 : }
315 178 : }
316 :
317 612 : void ConcatLayer::setProperty(const std::vector<std::string> &values) {
318 612 : auto remain_props = loadProperties(values, concat_props);
319 611 : NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
320 2 : << "[ConcatLayer] Unknown Layer Properties count " +
321 4 : std::to_string(values.size());
322 611 : }
323 :
324 58 : void ConcatLayer::exportTo(Exporter &exporter,
325 : const ml::train::ExportMethods &method) const {
326 : Layer::exportTo(exporter, method);
327 58 : exporter.saveResult(concat_props, method, this);
328 58 : }
329 :
330 : } /* namespace nntrainer */
|