Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
4 : *
5 : * @file dynamic_training_optimization.cpp
6 : * @date 5 January 2021
7 : * @see https://github.com/nnstreamer/nntrainer
8 : * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 : * @bug No known bugs except for NYI items
10 : * @brief This is Dynamic Training Optimization for Neural Network
11 : *
12 : */
13 :
14 : #include <numeric>
15 : #include <random>
16 : #include <vector>
17 :
18 : #include <dynamic_training_optimization.h>
19 : #include <optimizer_wrapped.h>
20 : #include <tensor.h>
21 : #include <util_func.h>
22 : #include <weight.h>
23 :
24 : namespace nntrainer {
25 836 : DynamicTrainingOptimization::DynamicTrainingOptimization(float threshold_,
26 836 : int skip_n_iter) :
27 836 : threshold(threshold_),
28 836 : enabled(false),
29 836 : epsilon(1e-7f),
30 836 : skip_n_iterations(skip_n_iter) {
31 836 : reduce_op = reduceByNorm;
32 836 : calc_ratio_op = ratioUsingDerivative;
33 : // rng.seed(getSeed());
34 : rng.seed(0);
35 836 : dist = std::uniform_real_distribution<float>(0.0, 1.0);
36 836 : }
37 :
38 : /**
39 : * @brief Check if the given weights can skip updating
40 : * @note true if should be applied, else false
41 : */
42 0 : bool DynamicTrainingOptimization::checkIfApply(
43 : const std::vector<Weight> &weights, const std::shared_ptr<Var_Grad> &input,
44 : const std::shared_ptr<Var_Grad> &output,
45 : const std::shared_ptr<OptimizerWrapped> &opt, int iteration) {
46 0 : if (!enabled || iteration < skip_n_iterations)
47 : return true;
48 :
49 : std::vector<bool> apply;
50 0 : apply.reserve(weights.size());
51 :
52 0 : for (auto const &weight : weights)
53 0 : apply.push_back(checkIfApply(weight, input, output, opt, iteration));
54 :
55 : return std::accumulate(apply.begin(), apply.end(), true,
56 : std::logical_and<bool>());
57 : }
58 :
59 : /**
60 : * @brief Check if the given weight can skip updating
61 : * @note true if should be applied, else false
62 : */
63 0 : bool DynamicTrainingOptimization::checkIfApply(
64 : const Weight &weight, const std::shared_ptr<Var_Grad> &input,
65 : const std::shared_ptr<Var_Grad> &output,
66 : const std::shared_ptr<OptimizerWrapped> &opt, int iteration) {
67 0 : if (iteration < skip_n_iterations)
68 : return true;
69 :
70 0 : if (!weight.hasGradient() || weight.getGradientRef().empty())
71 0 : return true;
72 :
73 0 : float reduced_ratio = calc_ratio_op(weight, input, output, reduce_op);
74 :
75 0 : return checkIfApply(reduced_ratio, (float)opt->getLearningRate(iteration));
76 : }
77 :
78 : /**
79 : * @brief Calculate the ratio of update to the weight using derivative
80 : */
81 0 : float DynamicTrainingOptimization::ratioUsingDerivative(
82 : const Weight &weight, const std::shared_ptr<Var_Grad> &input,
83 : const std::shared_ptr<Var_Grad> &output,
84 : std::function<float(Tensor const &)> reduce_op) {
85 : float reduced_derivative = reduce_op(output->getGradientRef());
86 : float reduced_input = reduce_op(input->getVariableRef());
87 : float reduced_weight = reduce_op(weight.getVariableRef());
88 0 : float reduced_grad = reduced_derivative * reduced_input;
89 :
90 0 : return reduced_grad / reduced_weight;
91 : }
92 :
93 : /**
94 : * @brief Calculate the ratio of update to the weight using gradient
95 : */
96 0 : float DynamicTrainingOptimization::ratioUsingGradient(
97 : const Weight &weight, const std::shared_ptr<Var_Grad> &input,
98 : const std::shared_ptr<Var_Grad> &output,
99 : std::function<float(Tensor const &)> reduce_op) {
100 0 : Tensor ratio = weight.getGradientRef().divide(weight.getVariableRef());
101 0 : return reduce_op(ratio);
102 0 : }
103 :
104 : /**
105 : * @brief Check if the update should be applied or skipped
106 : * @note true if should be applied, else false
107 : */
108 0 : bool DynamicTrainingOptimization::checkIfApply(float reduced_ratio,
109 : float learning_rate) {
110 : /**
111 : * If the reduced update ratio is higher than 1, then always apply update.
112 : * If the reduced update ratio is less than 1, then apply it with
113 : * probability = update ratio
114 : */
115 0 : if (dist(rng) < reduced_ratio * learning_rate / threshold)
116 0 : return true;
117 :
118 : return false;
119 : }
120 :
121 : /**
122 : * @brief Operation to decide if update should be skipped
123 : * @note Calculate l0 norm of the tensor
124 : */
125 0 : float DynamicTrainingOptimization::reduceByMax(Tensor const &ratio) {
126 0 : return ratio.max_abs();
127 : }
128 :
129 : /**
130 : * @brief Operation to decide if update should be skipped
131 : * @note Calculate l2 norm of the tensor averaged by its size
132 : */
133 0 : float DynamicTrainingOptimization::reduceByNorm(Tensor const &ratio) {
134 0 : float l2norm = ratio.l2norm();
135 0 : return l2norm / std::sqrt(ratio.size());
136 : }
137 :
138 : } /* namespace nntrainer */
|