Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
4 : *
5 : * @file embedding.cpp
6 : * @date 04 March 2021
7 : * @brief This is Embedding Layer Class of Neural Network
8 : * @see https://github.com/nnstreamer/nntrainer
9 : * @author Jijoong Moon <jijoong.moon@samsung.com>
10 : * @bug No known bugs except for NYI items
11 : *
12 : */
13 :
14 : #include <embedding.h>
15 : #include <layer_context.h>
16 : #include <lazy_tensor.h>
17 : #include <nntrainer_error.h>
18 : #include <nntrainer_log.h>
19 : #include <node_exporter.h>
20 : #include <util_func.h>
21 :
22 : #include <iostream>
23 :
24 : namespace nntrainer {
25 :
26 : static constexpr size_t SINGLE_INOUT_IDX = 0;
27 :
28 : enum EmbeddingParams { weight };
29 :
30 21 : EmbeddingLayer::EmbeddingLayer() :
31 : LayerImpl(),
32 21 : embedding_props(props::InDim(), props::OutDim()),
33 21 : weight_idx(std::numeric_limits<unsigned>::max()) {}
34 :
35 7 : void EmbeddingLayer::finalize(InitLayerContext &context) {
36 7 : NNTR_THROW_IF(context.getNumInputs() != 1, std::invalid_argument)
37 : << "Embedding layer takes only one input";
38 :
39 : const TensorDim &input_dim = context.getInputDimensions()[SINGLE_INOUT_IDX];
40 7 : NNTR_THROW_IF(input_dim.channel() != 1, std::invalid_argument)
41 : << "Embedding layer takes only one for channel size";
42 :
43 7 : NNTR_THROW_IF(input_dim.getDataType() != TensorDim::DataType::FP32,
44 : std::invalid_argument)
45 : << "Embedding layer takes only FP32 input data";
46 :
47 : auto &weight_regularizer =
48 : std::get<props::WeightRegularizer>(*layer_impl_props);
49 : auto &weight_regularizer_constant =
50 : std::get<props::WeightRegularizerConstant>(*layer_impl_props);
51 : auto &weight_initializer =
52 : std::get<props::WeightInitializer>(*layer_impl_props);
53 : auto &weight_decay = std::get<props::WeightDecay>(*layer_impl_props);
54 :
55 7 : unsigned int in_dim = std::get<props::InDim>(embedding_props);
56 7 : unsigned int out_dim = std::get<props::OutDim>(embedding_props);
57 :
58 7 : TensorDim output_dim = input_dim;
59 :
60 7 : output_dim.height(input_dim.width());
61 7 : output_dim.width(out_dim);
62 : output_dim.setTensorType(
63 : {context.getFormat(), context.getActivationDataType()});
64 7 : context.setOutputDimensions({output_dim});
65 :
66 7 : TensorDim dim = output_dim;
67 :
68 : dim.setTensorType({context.getFormat(), context.getWeightDataType()});
69 :
70 7 : dim.height(in_dim);
71 7 : dim.width(out_dim);
72 7 : dim.batch(1);
73 :
74 7 : weight_idx = context.requestWeight(
75 : dim, weight_initializer, weight_regularizer, weight_regularizer_constant,
76 : weight_decay, "Embedding", true);
77 7 : }
78 :
79 44 : void EmbeddingLayer::setProperty(const std::vector<std::string> &values) {
80 44 : auto remain_props = loadProperties(values, embedding_props);
81 43 : LayerImpl::setProperty(remain_props);
82 43 : }
83 :
84 405 : void EmbeddingLayer::forwarding(RunLayerContext &context, bool training) {
85 : /// @todo get input and output dimension from input_ and hidden itself
86 405 : unsigned int in_dim = std::get<props::InDim>(embedding_props);
87 405 : unsigned int out_dim = std::get<props::OutDim>(embedding_props);
88 :
89 405 : Tensor &weight = context.getWeight(weight_idx);
90 405 : Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
91 405 : Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
92 : TensorDim out_tensor_dim =
93 405 : TensorDim({1, 1, 1, out_dim}, hidden_.getTensorType());
94 :
95 8410 : for (unsigned int b = 0; b < input_.batch(); ++b) {
96 : float *in_data =
97 8005 : input_.getAddress<float>(b * input_.getDim().getFeatureLen());
98 :
99 8005 : Tensor batchsliced_hidden = hidden_.getBatchSlice(b, 1);
100 16055 : for (unsigned int i = 0; i < input_.width(); ++i) {
101 8050 : unsigned int embed_idx = static_cast<unsigned int>(in_data[i]);
102 8050 : if (embed_idx >= in_dim) {
103 0 : throw std::invalid_argument("input word index is greater than in_dim");
104 : }
105 :
106 : Tensor cur_weight =
107 8050 : weight.getSharedDataTensor(out_tensor_dim, out_dim * embed_idx);
108 : Tensor out_tensor =
109 8050 : batchsliced_hidden.getSharedDataTensor(out_tensor_dim, out_dim * i);
110 8050 : out_tensor.copyData(cur_weight);
111 8050 : }
112 8005 : }
113 405 : }
114 :
115 0 : void EmbeddingLayer::incremental_forwarding(RunLayerContext &context,
116 : unsigned int from, unsigned int to,
117 : bool training) {
118 :
119 : /// @todo get input and output dimension from input_ and hidden itself
120 0 : unsigned int in_dim = std::get<props::InDim>(embedding_props);
121 0 : unsigned int out_dim = std::get<props::OutDim>(embedding_props);
122 :
123 0 : if (from) {
124 0 : NNTR_THROW_IF(to - from != 1, std::invalid_argument)
125 : << "incremental step size is not 1";
126 : from = 0;
127 : to = 1;
128 : }
129 :
130 0 : Tensor &weight = context.getWeight(weight_idx);
131 0 : Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
132 0 : Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
133 :
134 : TensorDim out_tensor_dim =
135 0 : TensorDim({1, 1, 1, out_dim}, hidden_.getTensorType());
136 :
137 0 : for (unsigned int b = 0; b < input_.batch(); ++b) {
138 : float *in_data =
139 0 : input_.getAddress<float>(b * input_.getDim().getFeatureLen());
140 :
141 0 : Tensor batchsliced_hidden = hidden_.getBatchSlice(b, 1);
142 0 : for (unsigned int i = from; i < to; ++i) {
143 0 : unsigned int embed_idx = static_cast<unsigned int>(in_data[i]);
144 0 : if (embed_idx >= in_dim) {
145 0 : throw std::invalid_argument("input word index is greater than in_dim");
146 : }
147 :
148 : Tensor cur_weight =
149 0 : weight.getSharedDataTensor(out_tensor_dim, out_dim * embed_idx);
150 :
151 : Tensor out_tensor = batchsliced_hidden.getSharedDataTensor(
152 0 : out_tensor_dim, out_dim * (i - from));
153 :
154 0 : out_tensor.copyData(cur_weight);
155 0 : }
156 0 : }
157 0 : }
158 :
159 0 : void EmbeddingLayer::calcDerivative(RunLayerContext &context) {
160 : throw exception::not_supported(
161 0 : "calcDerivative for Embedding layer is not supported");
162 : }
163 :
164 201 : void EmbeddingLayer::calcGradient(RunLayerContext &context) {
165 201 : unsigned int out_dim = std::get<props::OutDim>(embedding_props);
166 :
167 201 : Tensor &djdw = context.getWeightGrad(weight_idx);
168 201 : const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
169 201 : Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
170 :
171 201 : djdw.setZero();
172 :
173 : // TODO:
174 : // This is to calculate gradient with current implementation of optimizer.
175 : // In order to accelerate, we need to better way like using index to weight.
176 :
177 : /// @todo
178 : // Current nntrainer gradient Tensor shape is identical to its
179 : // weight shape. However, this creates a sparse Tensor since we are only using
180 : // certain indices of the Tensor that we are interested in. Since we have such
181 : // indices before accessing to the Tensor, we can optimize it by deleting the
182 : // sparse-value indices. Also left as an Issue as well.
183 :
184 4202 : for (unsigned int b = 0; b < input_.batch(); ++b) {
185 : float *in_data =
186 4001 : input_.getAddress<float>(b * input_.getDim().getFeatureLen());
187 :
188 4001 : if (djdw.getDataType() == TensorDim::DataType::FP32) {
189 8011 : for (unsigned int i = 0; i < input_.width(); ++i) {
190 4010 : unsigned int embed_idx = (unsigned int)((float *)(in_data))[i];
191 : // Assume padding is 0 and index always start from 1.
192 : // If in_data[i] - 1 < 0, then it skips.
193 : // if (embed_idx == 0)
194 : // continue;
195 :
196 4010 : float *djdw_data = djdw.getAddress<float>(embed_idx * out_dim);
197 4010 : const float *grad_data = derivative_.getAddress<float>(
198 4010 : b * derivative_.getDim().getFeatureLen() + i * out_dim);
199 :
200 4010 : std::transform(djdw_data, djdw_data + out_dim, grad_data, djdw_data,
201 : std::plus<float>());
202 : }
203 0 : } else if (djdw.getDataType() == TensorDim::DataType::FP16) {
204 : #ifdef ENABLE_FP16
205 : for (unsigned int i = 0; i < input_.width(); ++i) {
206 : unsigned int embed_idx = (unsigned int)((float *)(in_data))[i];
207 : // Assume padding is 0 and index always start from 1.
208 : // If in_data[i] - 1 < 0, then it skips.
209 : // if (embed_idx == 0)
210 : // continue;
211 :
212 : _FP16 *djdw_data = djdw.getAddress<_FP16>(embed_idx * out_dim);
213 : const _FP16 *grad_data = derivative_.getAddress<_FP16>(
214 : b * derivative_.getDim().getFeatureLen() + i * out_dim);
215 :
216 : std::transform(djdw_data, djdw_data + out_dim, grad_data, djdw_data,
217 : std::plus<_FP16>());
218 : }
219 : #else
220 0 : throw std::invalid_argument("Error: enable-fp16 is not enabled");
221 : #endif
222 : }
223 : }
224 201 : }
225 :
226 0 : void EmbeddingLayer::exportTo(Exporter &exporter,
227 : const ml::train::ExportMethods &method) const {
228 0 : LayerImpl::exportTo(exporter, method);
229 0 : exporter.saveResult(embedding_props, method, this);
230 0 : }
231 :
232 : } // namespace nntrainer
|