Line data Source code
1 :
2 : // SPDX-License-Identifier: Apache-2.0
3 : /**
4 : * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
5 : *
6 : * @file raw_file_data_producer.cpp
7 : * @date 12 July 2021
8 : * @brief This file contains raw file data producers, reading from a file
9 : * @see https://github.com/nnstreamer/nntrainer
10 : * @author Jihoon Lee <jhoon.it.lee@samsung.com>
11 : * @bug No known bugs except for NYI items
12 : *
13 : */
14 :
15 : #include <raw_file_data_producer.h>
16 :
17 : #include <memory>
18 : #include <numeric>
19 : #include <random>
20 : #include <vector>
21 :
22 : #include <common_properties.h>
23 : #include <nntrainer_error.h>
24 : #include <node_exporter.h>
25 : #include <util_func.h>
26 :
27 : namespace nntrainer {
28 :
29 9 : RawFileDataProducer::RawFileDataProducer() : raw_file_props(new PropTypes()) {}
30 :
31 211 : RawFileDataProducer::RawFileDataProducer(const std::string &path) :
32 422 : raw_file_props(new PropTypes(props::FilePath(path))) {}
33 654 : RawFileDataProducer::~RawFileDataProducer() {}
34 :
35 4 : const std::string RawFileDataProducer::getType() const {
36 4 : return RawFileDataProducer::type;
37 : }
38 :
39 232 : void RawFileDataProducer::setProperty(
40 : const std::vector<std::string> &properties) {
41 232 : auto left = loadProperties(properties, *raw_file_props);
42 232 : NNTR_THROW_IF(!left.empty(), std::invalid_argument)
43 : << "There is unparsed properties, size: " << left.size();
44 232 : }
45 :
46 : DataProducer::Generator
47 53 : RawFileDataProducer::finalize(const std::vector<TensorDim> &input_dims,
48 : const std::vector<TensorDim> &label_dims,
49 : void *user_data) {
50 53 : auto sz = size(input_dims, label_dims);
51 : auto path_prop = std::get<props::FilePath>(*raw_file_props);
52 :
53 : auto size_accumulator = [](const size_t &a, const TensorDim &b) {
54 106 : return a + b.getFeatureLen();
55 : };
56 :
57 : auto sample_size = std::accumulate(input_dims.begin(), input_dims.end(),
58 : (size_t)0, size_accumulator);
59 : sample_size = std::accumulate(label_dims.begin(), label_dims.end(),
60 : sample_size, size_accumulator);
61 :
62 : /// as we are passing the reference of file, this means created lamabda is
63 : /// tightly couple with the file, this is not desirable but working fine for
64 : /// now...
65 53 : file = std::ifstream(path_prop.get(), std::ios::binary);
66 53 : return [sample_size, sz, this](unsigned int idx, std::vector<Tensor> &inputs,
67 : std::vector<Tensor> &labels) {
68 5329 : NNTR_THROW_IF(idx >= sz, std::range_error)
69 : << "given index is out of bound, index: " << idx << " size: " << sz;
70 5329 : std::streamoff offset = static_cast<std::streamoff>(idx) *
71 5329 : static_cast<std::streamoff>(sample_size) *
72 : RawFileDataProducer::pixel_size;
73 5329 : file.seekg(offset, std::ios_base::beg);
74 10658 : for (auto &input : inputs) {
75 5329 : input.read(file);
76 : }
77 10658 : for (auto &label : labels) {
78 5329 : label.read(file);
79 : }
80 :
81 5329 : return idx == sz - 1;
82 106 : };
83 : }
84 :
85 : unsigned int
86 100 : RawFileDataProducer::size(const std::vector<TensorDim> &input_dims,
87 : const std::vector<TensorDim> &label_dims) const {
88 : auto size_accumulator = [](const size_t &a, const TensorDim &b) {
89 200 : return a + b.getFeatureLen();
90 : };
91 :
92 : auto sample_size = std::accumulate(input_dims.begin(), input_dims.end(),
93 : (size_t)0, size_accumulator);
94 : sample_size = std::accumulate(label_dims.begin(), label_dims.end(),
95 : sample_size, size_accumulator);
96 100 : NNTR_THROW_IF(sample_size == 0, std::invalid_argument)
97 : << "The feature size of input_dims and label_dims are zeros";
98 :
99 : auto path_prop = std::get<props::FilePath>(*raw_file_props);
100 100 : auto file_size = path_prop.file_size();
101 :
102 : /// checking alignment is a good way to make check if a file is valid,
103 : /// unfortunately, our test dataset does not have this property
104 : /// (trainingSet.dat, valSet.dat, testSet.dat) after checking, we can
105 : /// uncomment below line.
106 : // NNTR_THROW_IF((file_size % sample_size * RawFileDataProducer::pixel_size !=
107 : // 0),
108 : // std::invalid_argument)
109 : // << " Given file does not align with the given sample size, sample size: "
110 : // << sample_size << " file_size: " << file_size;
111 :
112 100 : return static_cast<unsigned int>(file_size) /
113 100 : (sample_size * RawFileDataProducer::pixel_size);
114 : }
115 :
116 2 : void RawFileDataProducer::exportTo(
117 : Exporter &exporter, const ml::train::ExportMethods &method) const {
118 2 : exporter.saveResult(*raw_file_props, method, this);
119 2 : }
120 : } // namespace nntrainer
|