Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
4 : *
5 : * @file pooling2d_layer.cpp
6 : * @date 12 June 2020
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Jijoong Moon <jijoong.moon@samsung.com>
9 : * @author Donghak Park <donghak.park@samsung.com>
10 : * @author Jiho Chu <jiho.chu@samsung.com>
11 : * @bug No known bugs except for NYI items
12 : * @brief This is 2 Dimensional Pooling Layer Class for Neural Network
13 : *
14 : */
15 :
16 : #include <cstring>
17 : #include <limits>
18 :
19 : #include <common_properties.h>
20 : #include <layer_context.h>
21 : #include <nntr_threads.h>
22 : #include <nntrainer_error.h>
23 : #include <nntrainer_log.h>
24 : #include <node_exporter.h>
25 : #include <pooling2d_layer.h>
26 : #include <util_func.h>
27 : namespace nntrainer {
28 :
29 : static constexpr size_t SINGLE_INOUT_IDX = 0;
30 :
31 : /**
32 : * @brief Help function for Pooling Handler
33 : */
34 : template <typename T> struct PoolFunc {
35 : typedef std::function<T(const T *, int, int, int)> Type;
36 : };
37 :
38 125 : Pooling2DLayer::Pooling2DLayer(
39 125 : const std::array<unsigned int, POOLING2D_DIM * 2> &padding_) :
40 : Layer(),
41 125 : padding(padding_),
42 125 : pooling2d_props(props::PoolingType(), std::vector<props::PoolSize>(),
43 250 : std::array<props::Stride, POOLING2D_DIM>(),
44 250 : props::Padding2D()),
45 250 : pool_helper_idx(0) {}
46 :
47 65 : void Pooling2DLayer::finalize(InitLayerContext &context) {
48 65 : NNTR_THROW_IF(context.getNumInputs() != 1, std::invalid_argument)
49 : << "[Pooling2D] pooling layer only takes one input";
50 :
51 : const TensorDim &in_dim = context.getInputDimensions()[SINGLE_INOUT_IDX];
52 65 : TensorDim out_dim;
53 :
54 : auto &pool_size = std::get<std::vector<props::PoolSize>>(pooling2d_props);
55 65 : NNTR_THROW_IF(!(pool_size.empty() || pool_size.size() == 2),
56 : std::invalid_argument)
57 : << "[Pooling2D] the number of pool size should be 0 or 2";
58 : /** @todo update default value of pooling stride to be pool size */
59 : auto &stride =
60 : std::get<std::array<props::Stride, POOLING2D_DIM>>(pooling2d_props);
61 65 : auto &pooling_type = std::get<props::PoolingType>(pooling2d_props).get();
62 :
63 65 : if (pooling_type == props::PoolingTypeInfo::Enum::global_max ||
64 : pooling_type == props::PoolingTypeInfo::Enum::global_average) {
65 18 : if (!pool_size.empty()) {
66 10 : ml_logw(
67 : "[Pooling2D] global_max, global_average does not accept pool size");
68 : pool_size.clear();
69 : }
70 18 : pool_size.emplace_back(props::PoolSize(in_dim.height()));
71 36 : pool_size.emplace_back(props::PoolSize(in_dim.width()));
72 : }
73 :
74 : padding = std::get<props::Padding2D>(pooling2d_props)
75 65 : .compute(in_dim, {pool_size[0], pool_size[1]},
76 : {stride[0], stride[1]}, {1, 1});
77 :
78 65 : auto [pt, pb, pl, pr] = padding;
79 :
80 65 : if (pooling_type == props::PoolingTypeInfo::Enum::global_max ||
81 : pooling_type == props::PoolingTypeInfo::Enum::global_average) {
82 18 : NNTR_THROW_IF(pt + pb + pl + pr != 0, std::invalid_argument)
83 : << "[Pooling2D] global_max, global_average does not accept padding";
84 :
85 36 : NNTR_THROW_IF(static_cast<int>(stride[0]) != 1 ||
86 : static_cast<int>(stride[1]) != 1,
87 : std::invalid_argument)
88 : << "[Pooling2D] global_max, global_average does not accept stride";
89 : }
90 :
91 65 : unsigned int eff_in_height = in_dim.height() + pt + pb;
92 65 : unsigned int eff_in_width = in_dim.width() + pl + pr;
93 :
94 65 : NNTR_THROW_IF(eff_in_height < pool_size[0] || eff_in_width < pool_size[1],
95 : std::invalid_argument)
96 : << "[Pooling2D] Failed to initialize: in size + padding is smaller than "
97 : "effective kernel";
98 :
99 : unsigned int IM = std::numeric_limits<int>::max();
100 :
101 65 : NNTR_THROW_IF(eff_in_height - pt - pool_size[0] > IM ||
102 : eff_in_width - pl - pool_size[1] > IM,
103 : std::invalid_argument)
104 : << "[Pooling2D] Failed to initialize: Calculated patch end is over int max";
105 :
106 65 : out_dim.batch(in_dim.batch());
107 65 : out_dim.channel(in_dim.channel());
108 65 : out_dim.height((eff_in_height - pool_size[0]) / stride[0] + 1);
109 65 : out_dim.width((eff_in_width - pool_size[1]) / stride[1] + 1);
110 : out_dim.setDataType(in_dim.getDataType());
111 65 : context.setOutputDimensions({out_dim});
112 :
113 : /**
114 : * in case of max pool, idx that points to the first max item
115 : // * in case of average pool, effective average counter is number of patches
116 : * actually counted into when calculating the average
117 : * // clang-format off
118 : * eg) pooling of below
119 : * x x x
120 : * x 3 3
121 : * x 3 3
122 : * = 12 / 4 = 3
123 : * // clang-format on
124 : */
125 65 : if (pooling_type == props::PoolingTypeInfo::Enum::global_max) {
126 8 : auto helper_dim = in_dim;
127 : helper_dim.setDataType(ml::train::TensorDim::DataType::FP32);
128 8 : pool_helper_idx =
129 8 : context.requestTensor(helper_dim, "helper_idx", Initializer::NONE, false,
130 : TensorLifespan::ITERATION_LIFESPAN);
131 8 : pool_helper_size.resize(helper_dim.batch() * helper_dim.channel());
132 : } else {
133 57 : auto helper_dim = out_dim;
134 : helper_dim.setDataType(ml::train::TensorDim::DataType::FP32);
135 57 : pool_helper_idx =
136 114 : context.requestTensor(helper_dim, "helper_idx", Initializer::NONE, false,
137 : TensorLifespan::ITERATION_LIFESPAN);
138 : }
139 65 : }
140 :
141 217 : void Pooling2DLayer::forwarding(RunLayerContext &context, bool training) {
142 217 : Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
143 217 : Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
144 217 : Tensor &pool_helper = context.getTensor(pool_helper_idx);
145 :
146 217 : const TensorDim &in_dim = input_.getDim();
147 :
148 217 : auto forwarding_job = [&](unsigned int s, unsigned int e, unsigned int pid,
149 : void *user_data) {
150 2564 : for (unsigned int b = s; b < e; ++b) {
151 2347 : Tensor in_sub = input_.getBatchSlice(b, 1);
152 2347 : Tensor result = hidden_.getBatchSlice(b, 1);
153 2347 : Tensor helper = pool_helper.getBatchSlice(b, 1);
154 2347 : pooling2d(in_sub, training, result, helper, b);
155 2347 : }
156 217 : };
157 :
158 434 : auto workers = ParallelBatch(forwarding_job, in_dim.batch(), nullptr);
159 :
160 217 : if (workers.getNumWorkers() > 1) {
161 0 : workers.run();
162 : } else {
163 217 : forwarding_job(0, in_dim.batch(), 0, nullptr);
164 : }
165 217 : }
166 :
167 54 : void Pooling2DLayer::calcDerivative(RunLayerContext &context) {
168 : auto &pool_size = std::get<std::vector<props::PoolSize>>(pooling2d_props);
169 : auto &stride =
170 : std::get<std::array<props::Stride, POOLING2D_DIM>>(pooling2d_props);
171 54 : auto &pooling_type = std::get<props::PoolingType>(pooling2d_props).get();
172 :
173 54 : const Tensor &deriv = context.getIncomingDerivative(SINGLE_INOUT_IDX);
174 54 : Tensor &result = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
175 54 : Tensor &pool_helper = context.getTensor(pool_helper_idx);
176 :
177 54 : const TensorDim &in_dim = result.getDim();
178 54 : unsigned int batch = in_dim.batch();
179 54 : unsigned int channel = in_dim.channel();
180 54 : int height = in_dim.height();
181 54 : int width = in_dim.width();
182 :
183 54 : auto pt = padding[0];
184 54 : auto pl = padding[2];
185 54 : unsigned int p_height = pool_size[0];
186 54 : unsigned int p_width = pool_size[1];
187 :
188 : unsigned int J, K;
189 :
190 54 : result.setZero();
191 :
192 54 : unsigned int out_map_size = deriv.height() * deriv.width();
193 54 : unsigned int in_map_size = height * width;
194 1 : auto apply_max = [&]<typename T>(T *result_data) {
195 1 : const int *iter = pool_helper.getData<int>();
196 1 : const T *deriv_data = deriv.getData<T>();
197 2 : for (unsigned int b = 0; b < batch; ++b) {
198 65 : for (unsigned int c = 0; c < channel; ++c) {
199 5248 : for (unsigned int i = 0; i < out_map_size; ++i) {
200 : /// pool_helper = -1 means the max idx was at the padding, so no need
201 : /// to update
202 5184 : if (*iter != -1) {
203 5184 : result_data[*iter] += *deriv_data;
204 : }
205 5184 : iter++;
206 5184 : deriv_data++;
207 : }
208 64 : result_data += in_map_size;
209 : }
210 : }
211 55 : };
212 :
213 53 : auto apply_average = [&]<typename T>(T *result_data) {
214 53 : int height_stride_end = height - p_height + pt;
215 53 : int width_stride_end = width - p_width + pl;
216 53 : const int *iter = pool_helper.getData<int>();
217 1058 : for (unsigned int b = 0; b < batch; ++b) {
218 9797 : for (unsigned int i = 0; i < channel; ++i) {
219 8792 : J = 0;
220 66544 : for (int j = -(int)pt; j <= height_stride_end; j += stride[0]) {
221 57752 : K = 0;
222 564784 : for (int k = -(int)pl; k <= width_stride_end; k += stride[1]) {
223 507032 : T del = deriv.getValue<T>(b, i, J, K) / *iter;
224 507032 : int patch_height_end =
225 507032 : std::min(static_cast<int>(j + p_height), height);
226 507032 : int patch_width_end =
227 507032 : std::min(static_cast<int>(k + p_width), width);
228 507032 : int start_h = std::max(0, j);
229 507032 : int start_w = std::max(0, k);
230 1521416 : for (int h = start_h; h < patch_height_end; ++h) {
231 3045392 : for (int w = start_w; w < patch_width_end; ++w) {
232 2031008 : result.setValue(b, i, h, w,
233 2031008 : result.getValue<T>(b, i, h, w) + del);
234 : }
235 : }
236 507032 : iter++;
237 507032 : K++;
238 : }
239 57752 : J++;
240 : }
241 : }
242 : }
243 107 : };
244 :
245 0 : auto apply_global_max = [&]<typename T>(T *result_data) {
246 0 : const T *deriv_data = deriv.getData<T>();
247 0 : for (unsigned int b = 0; b < batch; b++) {
248 0 : for (unsigned int c = 0; c < channel; c++) {
249 : const int *iter =
250 0 : pool_helper.getData<int>() + pool_helper.getIndex(b, c, 0, 0);
251 0 : unsigned int helper_size = pool_helper_size[b * channel + c];
252 0 : T der = *deriv_data / static_cast<T>(helper_size);
253 :
254 0 : for (unsigned int idx = 0; idx < helper_size; idx++)
255 0 : result_data[iter[idx]] += der;
256 0 : deriv_data++;
257 0 : result_data += in_map_size;
258 : }
259 : }
260 54 : };
261 :
262 : auto in_data_type = in_dim.getDataType();
263 :
264 54 : if (in_data_type == ml::train::TensorDim::DataType::FP32) {
265 54 : switch (pooling_type) {
266 : case props::PoolingTypeInfo::Enum::max:
267 1 : apply_max(result.getData<float>());
268 : break;
269 : case props::PoolingTypeInfo::Enum::global_average:
270 : case props::PoolingTypeInfo::Enum::average:
271 53 : apply_average(result.getData<float>());
272 : break;
273 : case props::PoolingTypeInfo::Enum::global_max:
274 0 : apply_global_max(result.getData<float>());
275 : break;
276 0 : default:
277 0 : throw std::runtime_error("Error: Unknown Pooling Type");
278 : break;
279 : }
280 : }
281 : #ifdef ENABLE_FP16
282 : else if (in_data_type == ml::train::TensorDim::DataType::FP16) {
283 :
284 : switch (pooling_type) {
285 : case props::PoolingTypeInfo::Enum::max:
286 : apply_max(result.getData<_FP16>());
287 : break;
288 : case props::PoolingTypeInfo::Enum::global_average:
289 : case props::PoolingTypeInfo::Enum::average:
290 : apply_average(result.getData<_FP16>());
291 : break;
292 : case props::PoolingTypeInfo::Enum::global_max:
293 : apply_global_max(result.getData<_FP16>());
294 : break;
295 : default:
296 : throw std::runtime_error("Error: Unknown Pooling Type");
297 : }
298 : }
299 : #endif
300 : else {
301 0 : throw std::runtime_error("Unsupported datatype");
302 : }
303 54 : }
304 :
305 23 : void Pooling2DLayer::exportTo(Exporter &exporter,
306 : const ml::train::ExportMethods &method) const {
307 23 : exporter.saveResult(pooling2d_props, method, this);
308 23 : }
309 :
310 406 : void Pooling2DLayer::setProperty(const std::vector<std::string> &values) {
311 406 : auto remain_props = loadProperties(values, pooling2d_props);
312 399 : NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
313 8 : << "[Pooling2dLayer] Unknown Layer Properties count " +
314 16 : std::to_string(values.size());
315 399 : }
316 :
317 2347 : void Pooling2DLayer::pooling2d(Tensor &in, bool training, Tensor &output,
318 : Tensor &pool_helper, int batch_idx) {
319 :
320 : auto &pool_size = std::get<std::vector<props::PoolSize>>(pooling2d_props);
321 : auto &stride =
322 : std::get<std::array<props::Stride, POOLING2D_DIM>>(pooling2d_props);
323 2347 : auto &pooling_type = std::get<props::PoolingType>(pooling2d_props).get();
324 :
325 2347 : unsigned int channel = in.channel();
326 2347 : auto [pt, pb, pl, pr] = padding;
327 :
328 2347 : int in_height = in.height();
329 2347 : int in_width = in.width();
330 2347 : unsigned int height = in_height + pt + pb;
331 2347 : unsigned int width = in_width + pl + pr;
332 2347 : unsigned int patch_height = pool_size[0];
333 2347 : unsigned int patch_width = pool_size[1];
334 :
335 2347 : NNTR_THROW_IF(output.empty(), std::invalid_argument)
336 : << "[Pooling2D] output is uninitialized, this is not supported";
337 :
338 : /**
339 : * @brief pooling function
340 : * @param in_c channel sliced data
341 : * @param start_h (height index pointing the start of the patch)
342 : * @param start_w (width index pointing the start of the patch)
343 : * @return result value of pooling
344 : */
345 : PoolFunc<float>::Type pool_fn_fp32;
346 : #ifdef ENABLE_FP16
347 : PoolFunc<_FP16>::Type pool_fn_fp16;
348 : #endif
349 :
350 2347 : unsigned int max_idx_count = 0;
351 :
352 12528 : auto pool_fn_max = [&]<typename T>(const T *in_data, int channel_idx,
353 : int start_h, int start_w) {
354 12528 : int end_h = start_h + patch_height;
355 12528 : int end_w = start_w + patch_width;
356 :
357 : T max_val = std::numeric_limits<T>::lowest();
358 :
359 : int cur_max_idx = -1;
360 12528 : int eff_end_h = std::min(end_h, in_height);
361 12528 : int eff_end_w = std::min(end_w, in_width);
362 12528 : start_w = std::max(0, start_w);
363 59508 : for (int h = std::max(0, start_h); h < eff_end_h; ++h) {
364 142956 : for (int w = start_w; w < eff_end_w; ++w) {
365 106452 : int cur_idx = h * in_width + w;
366 106452 : T val = in_data[cur_idx];
367 106452 : if (max_val < val) {
368 : max_val = val;
369 23615 : if (training) {
370 : cur_max_idx = cur_idx;
371 : }
372 : }
373 : }
374 : }
375 :
376 12528 : if (training) {
377 11952 : pool_helper.setValueInt(max_idx_count++, cur_max_idx);
378 : }
379 :
380 12528 : return max_val;
381 2347 : };
382 :
383 90 : auto pool_fn_global_max = [&, this]<typename T>(const T *in_data,
384 : int channel_idx, int start_h,
385 : int start_w) {
386 90 : int end_h = start_h + patch_height;
387 90 : int end_w = start_w + patch_width;
388 :
389 : T max_val = std::numeric_limits<T>::lowest();
390 90 : int *helper_data = pool_helper.getData<int>();
391 90 : helper_data += channel_idx * in_height * in_width;
392 :
393 540 : for (int h = start_h; h < end_h; ++h) {
394 1800 : for (int w = start_w; w < end_w; ++w) {
395 1350 : int cur_idx = h * in_width + w;
396 1350 : T val = in_data[cur_idx];
397 1350 : if (max_val < val) {
398 : max_val = val;
399 251 : max_idx_count = 0;
400 : }
401 :
402 1350 : if (training && max_val == val) {
403 236 : *(helper_data + max_idx_count++) = cur_idx;
404 : }
405 : }
406 : }
407 :
408 90 : pool_helper_size[batch_idx * in.channel() + channel_idx] = max_idx_count;
409 90 : return max_val;
410 2347 : };
411 :
412 1018023 : auto pool_fn_average = [&]<typename T>(const T *in_data, int channel_idx,
413 : int start_h, int start_w) {
414 1018023 : int end_h = start_h + patch_height;
415 1018023 : int end_w = start_w + patch_width;
416 : T total = static_cast<T>(0.0f);
417 :
418 1018023 : int eff_end_h = std::min(end_h, in_height);
419 1018023 : int eff_end_w = std::min(end_w, in_width);
420 1018023 : int eff_start_h = std::max(0, start_h);
421 1018023 : int eff_start_w = std::max(0, start_w);
422 :
423 1018023 : int cnt = (eff_end_h - eff_start_h) * (eff_end_w - eff_start_w);
424 3056058 : for (int h = eff_start_h; h < eff_end_h; ++h) {
425 6121374 : for (int w = eff_start_w; w < eff_end_w; ++w) {
426 4083339 : T val = in_data[h * in_width + w];
427 4083339 : total += val;
428 : }
429 : }
430 :
431 1018023 : if (training) {
432 508746 : pool_helper.setValueInt(max_idx_count++, cnt);
433 : }
434 1018023 : return total / cnt;
435 2347 : };
436 :
437 2347 : switch (pooling_type) {
438 137 : case props::PoolingTypeInfo::Enum::max:
439 137 : pool_fn_fp32 = pool_fn_max;
440 : #ifdef ENABLE_FP16
441 : pool_fn_fp16 = pool_fn_max;
442 : #endif
443 : break;
444 45 : case props::PoolingTypeInfo::Enum::global_max:
445 45 : pool_fn_fp32 = pool_fn_global_max;
446 : #ifdef ENABLE_FP16
447 : pool_fn_fp16 = pool_fn_global_max;
448 : #endif
449 : break;
450 2165 : case props::PoolingTypeInfo::Enum::global_average:
451 : case props::PoolingTypeInfo::Enum::average:
452 2165 : pool_fn_fp32 = pool_fn_average;
453 : #ifdef ENABLE_FP16
454 : pool_fn_fp16 = pool_fn_average;
455 : #endif
456 : break;
457 0 : case props::PoolingTypeInfo::Enum::unknown:
458 : default:
459 0 : throw std::invalid_argument("unknown pooling type given");
460 : break;
461 : }
462 :
463 2347 : if (in.getDataType() == ml::train::TensorDim::DataType::FP32) {
464 : const float *in_data = in.getData<float>();
465 : float *out_data = output.getData<float>();
466 :
467 2347 : unsigned int map_size = in_height * in_width;
468 :
469 2347 : int height_stride_end = height - patch_height - pt;
470 2347 : int width_stride_end = width - patch_width - pl;
471 20742 : for (unsigned int i = 0; i < channel; ++i) {
472 18395 : const float *in_data_channel_sliced = in_data + i * map_size;
473 137144 : for (int j = -(int)pt; j <= height_stride_end; j += stride[0]) {
474 1149390 : for (int k = -(int)pl; k <= width_stride_end; k += stride[1]) {
475 1030641 : float pool_value = pool_fn_fp32(in_data_channel_sliced, i, j, k);
476 1030641 : *out_data = pool_value;
477 1030641 : out_data++;
478 : }
479 : }
480 : }
481 : }
482 : #ifdef ENABLE_FP16
483 : else if (in.getDataType() == ml::train::TensorDim::DataType::FP16) {
484 : const _FP16 *in_data = in.getData<_FP16>();
485 : _FP16 *out_data = output.getData<_FP16>();
486 :
487 : unsigned int map_size = in_height * in_width;
488 :
489 : int height_stride_end = height - patch_height - pt;
490 : int width_stride_end = width - patch_width - pl;
491 : for (unsigned int i = 0; i < channel; ++i) {
492 : const _FP16 *in_data_channel_sliced = in_data + i * map_size;
493 : for (int j = -pt; j <= height_stride_end; j += stride[0]) {
494 : for (int k = -pl; k <= width_stride_end; k += stride[1]) {
495 : _FP16 pool_value = pool_fn_fp16(in_data_channel_sliced, i, j, k);
496 : *out_data = pool_value;
497 : out_data++;
498 : }
499 : }
500 : }
501 : }
502 : #endif
503 : else {
504 0 : throw std::runtime_error("Not supported datatype");
505 : }
506 2347 : }
507 :
508 40 : void Pooling2DLayer::setBatch(RunLayerContext &context, unsigned int batch) {
509 40 : context.updateTensor(pool_helper_idx, batch);
510 : props::PoolingTypeInfo::Enum pooling_type =
511 40 : std::get<props::PoolingType>(pooling2d_props).get();
512 40 : if (pooling_type == props::PoolingTypeInfo::Enum::global_max)
513 4 : pool_helper_size.resize(batch * context.getInput(0).channel());
514 40 : }
515 :
516 : } /* namespace nntrainer */
|