Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : /**
3 : * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
4 : *
5 : * @file random_data_producers.cpp
6 : * @date 09 July 2021
7 : * @brief This file contains various random data producers
8 : * @see https://github.com/nnstreamer/nntrainer
9 : * @author Jihoon Lee <jhoon.it.lee@samsung.com>
10 : * @bug No known bugs except for NYI items
11 : *
12 : */
13 : #include <random_data_producers.h>
14 :
15 : #include <base_properties.h>
16 : #include <node_exporter.h>
17 : #include <util_func.h>
18 :
19 : namespace nntrainer {
20 :
21 : /**
22 : * @brief Props containing min value
23 : *
24 : */
25 79 : class PropsMin : public Property<float> {
26 : public:
27 : /**
28 : * @brief Construct a new props min object with a default value
29 : *
30 : * @param value default value
31 : */
32 84 : PropsMin(float value = 0.0f) : nntrainer::Property<float>(value) {}
33 : static constexpr const char *key = "min"; /**< unique key to access */
34 : using prop_tag = float_prop_tag; /**< property type */
35 : };
36 :
37 : /**
38 : * @brief Props containing max value
39 : *
40 : */
41 158 : class PropsMax : public Property<float> {
42 : public:
43 : /**
44 : * @brief Construct a new props max object with a default value
45 : *
46 : * @param value default value
47 : */
48 84 : PropsMax(float value = 1.0f) : nntrainer::Property<float>(value) {}
49 : static constexpr const char *key = "max"; /**< unique key to access */
50 : using prop_tag = float_prop_tag; /**< property type */
51 : };
52 :
53 : /**
54 : * @brief Props containing number of samples
55 : * A random data producer has theoretical size. number of samples is used to set
56 : * theoretical size of the random data producer's data size
57 : *
58 : */
59 : class PropsNumSamples : public Property<unsigned int> {
60 : public:
61 : /**
62 : * @brief Construct a new props data size object with a default value
63 : *
64 : * @param value default value
65 : */
66 84 : PropsNumSamples(unsigned int value = 512) :
67 84 : nntrainer::Property<unsigned int>(value) {}
68 : static constexpr const char *key = "num_samples"; /**< unique key to access */
69 : using prop_tag = uint_prop_tag; /**< property type */
70 : };
71 :
72 84 : RandomDataOneHotProducer::RandomDataOneHotProducer() :
73 168 : rd_one_hot_props(new Props()) {}
74 :
75 168 : RandomDataOneHotProducer::~RandomDataOneHotProducer() {}
76 :
77 0 : const std::string RandomDataOneHotProducer::getType() const {
78 0 : return RandomDataOneHotProducer::type;
79 : }
80 :
81 0 : bool RandomDataOneHotProducer::isMultiThreadSafe() const {
82 : /// @todo make this true, it is needed to test multiple worker scenario
83 0 : return false;
84 : }
85 :
86 84 : void RandomDataOneHotProducer::setProperty(
87 : const std::vector<std::string> &properties) {
88 84 : auto left = loadProperties(properties, *rd_one_hot_props);
89 84 : NNTR_THROW_IF(!left.empty(), std::invalid_argument)
90 : << "There are unparsed properties, size: " << left.size();
91 84 : }
92 :
93 : DataProducer::Generator
94 79 : RandomDataOneHotProducer::finalize(const std::vector<TensorDim> &input_dims,
95 : const std::vector<TensorDim> &label_dims,
96 : void *user_data) {
97 : /** check if the given producer is ready to finalize */
98 79 : nntrainer::PropsMin min_;
99 79 : nntrainer::PropsMax max_;
100 : std::tie(min_, max_, std::ignore) = *rd_one_hot_props;
101 :
102 : /// @todo expand this to non onehot case
103 227 : NNTR_THROW_IF(std::any_of(label_dims.begin(), label_dims.end(),
104 : [](const TensorDim &dim) {
105 : return dim.channel() != 1 || dim.height() != 1;
106 : }),
107 : std::invalid_argument)
108 : << "Label dimension containing channel or height not allowed";
109 :
110 79 : NNTR_THROW_IF(min_.get() > max_.get(), std::invalid_argument)
111 : << "Min value is bigger then max value, min: " << min_.get()
112 2 : << "max: " << max_.get();
113 :
114 : /// @todo move this to higher order component
115 79 : NNTR_THROW_IF(size(input_dims, label_dims) == 0, std::invalid_argument)
116 : << "size is zero, data producer does not provide anything";
117 :
118 : /** prepare states for the generator */
119 : std::vector<std::uniform_int_distribution<unsigned int>> label_chooser_;
120 77 : label_chooser_.reserve(label_dims.size());
121 77 : std::transform(label_dims.begin(), label_dims.end(),
122 : std::back_inserter(label_chooser_),
123 : [](const TensorDim &label_dim) {
124 145 : return std::uniform_int_distribution<unsigned int>(
125 145 : 0, label_dim.width() - 1);
126 : });
127 :
128 : std::mt19937 rng;
129 : rng.seed(0);
130 77 : auto sz = size(input_dims, input_dims);
131 :
132 : /** DataProducer::Generator */
133 77 : return [rng, sz, min_ = min_.get(), max_ = max_.get(),
134 158 : label_chooser = std::move(label_chooser_)](
135 : unsigned int idx, std::vector<Tensor> &inputs,
136 : std::vector<Tensor> &labels) mutable -> bool {
137 15685 : auto populate_input = [&](Tensor &t) { t.setRandUniform(min_, max_); };
138 :
139 : auto populate_label =
140 15685 : [&](Tensor &t, std::uniform_int_distribution<unsigned int> &label_dist_) {
141 15685 : t.setZero();
142 15685 : t.setValue(0, 0, 0, label_dist_(rng), 1);
143 15685 : return t;
144 7860 : };
145 :
146 : std::for_each(inputs.begin(), inputs.end(), populate_input);
147 7860 : std::transform(labels.begin(), labels.end(), label_chooser.begin(),
148 : labels.begin(), populate_label);
149 :
150 7860 : return idx == sz - 1;
151 154 : };
152 77 : }
153 :
154 : unsigned int
155 159 : RandomDataOneHotProducer::size(const std::vector<TensorDim> &input_dims,
156 : const std::vector<TensorDim> &label_dims) const {
157 159 : return std::get<PropsNumSamples>(*rd_one_hot_props).get();
158 : }
159 : } // namespace nntrainer
|