Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2022 Hyeonseok Lee <hs89.lee@samsung.com>
4 : *
5 : * @file positional_encoding_layer.cpp
6 : * @date 16 August 2022
7 : * @brief This file contains the positional encoding layer in transformer
8 : * @see https://github.com/nnstreamer/nntrainer
9 : * https://arxiv.org/abs/1607.06450
10 : * @author Hyeonseok Lee <hs89.lee@samsung.com>
11 : * @bug No known bugs except for NYI items
12 : *
13 : */
14 :
15 : #include <math.h>
16 : #include <regex>
17 :
18 : #include <positional_encoding_layer.h>
19 : #include <tensor_dim.h>
20 :
21 : namespace nntrainer {
22 :
23 : static constexpr size_t SINGLE_INOUT_IDX = 0;
24 :
25 : enum PositionalEncodingParams {
26 : positional_encoding,
27 : };
28 :
29 24 : PositionalEncodingLayer::PositionalEncodingLayer() :
30 24 : isPEcalculated(false), positional_encoding_props(props::MaxTimestep()) {
31 : weight_idx.fill(std::numeric_limits<unsigned>::max());
32 24 : }
33 :
34 48 : PositionalEncodingLayer::~PositionalEncodingLayer() {}
35 :
36 10 : void PositionalEncodingLayer::finalize(InitLayerContext &context) {
37 : unsigned int max_token_size =
38 10 : std::get<props::MaxTimestep>(positional_encoding_props);
39 :
40 10 : std::vector<ml::train::TensorDim> input_dims = context.getInputDimensions();
41 10 : context.setOutputDimensions(input_dims);
42 :
43 10 : unsigned int model_dim = input_dims[SINGLE_INOUT_IDX].width();
44 :
45 : ml::train::TensorDim pe_dim(
46 10 : {max_token_size, model_dim},
47 10 : {context.getFormat(), context.getWeightDataType()});
48 10 : weight_idx[PositionalEncodingParams::positional_encoding] =
49 10 : context.requestTensor(pe_dim, "positional_encoding",
50 : nntrainer::Initializer::NONE, false,
51 : nntrainer::TensorLifespan::MAX_LIFESPAN);
52 10 : }
53 :
54 15 : void PositionalEncodingLayer::forwarding(RunLayerContext &context,
55 : bool training) {
56 15 : const nntrainer::Tensor &input = context.getInput(SINGLE_INOUT_IDX);
57 15 : nntrainer::Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
58 :
59 15 : nntrainer::Tensor &pe = context.getTensor(
60 : weight_idx[PositionalEncodingParams::positional_encoding]);
61 :
62 15 : TensorDim input_dim = input.getDim();
63 15 : TensorDim pe_partial_dim({input_dim.height(), input_dim.width()},
64 30 : context.getTensor(0).getTensorType());
65 15 : nntrainer::Tensor pe_partial = pe.getSharedDataTensor(pe_partial_dim, 0);
66 :
67 15 : if (!isPEcalculated) {
68 4 : calculatePositionalEncoding(context);
69 : }
70 :
71 15 : input.add(pe_partial, output);
72 15 : }
73 :
74 2 : void PositionalEncodingLayer::calcDerivative(RunLayerContext &context) {
75 : const nntrainer::Tensor &incoming_derivative =
76 2 : context.getIncomingDerivative(SINGLE_INOUT_IDX);
77 : nntrainer::Tensor &outgoing_derivative =
78 2 : context.getOutgoingDerivative(SINGLE_INOUT_IDX);
79 :
80 2 : outgoing_derivative.copyData(incoming_derivative);
81 2 : }
82 :
83 4 : void PositionalEncodingLayer::calculatePositionalEncoding(
84 : nntrainer::RunLayerContext &context) {
85 : unsigned int max_token_size =
86 4 : std::get<props::MaxTimestep>(positional_encoding_props);
87 :
88 4 : unsigned int model_dim = context.getInput(SINGLE_INOUT_IDX).getDim().width();
89 :
90 4 : nntrainer::Tensor &pe = context.getTensor(
91 : weight_idx[PositionalEncodingParams::positional_encoding]);
92 :
93 : float value;
94 38 : for (unsigned int i = 0; i < max_token_size; ++i) {
95 238 : for (unsigned int j = 0; j < model_dim; ++j) {
96 204 : unsigned int jj = (j >> 1) << 1;
97 204 : value = i / powf(10000.0f, jj / (float)model_dim);
98 204 : if (j & 1) {
99 102 : value = cosf(value);
100 : } else {
101 102 : value = sinf(value);
102 : }
103 204 : pe.setValue(0, 0, i, j, value);
104 : }
105 : }
106 :
107 4 : isPEcalculated = true;
108 4 : }
109 :
110 59 : void PositionalEncodingLayer::setProperty(
111 : const std::vector<std::string> &values) {
112 59 : auto remain_props = loadProperties(values, positional_encoding_props);
113 58 : NNTR_THROW_IF(!remain_props.empty(), std::invalid_argument)
114 2 : << "[positional encoding layer] Unknown Layer Properties count " +
115 4 : std::to_string(values.size());
116 58 : }
117 :
118 2 : void PositionalEncodingLayer::exportTo(
119 : Exporter &exporter, const ml::train::ExportMethods &method) const {
120 2 : exporter.saveResult(positional_encoding_props, method, this);
121 2 : }
122 :
123 : } /* namespace nntrainer */
|