LCOV - code coverage report
Current view: top level - nntrainer/dataset - raw_file_data_producer.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 100.0 % 34 34
Test Date: 2025-12-14 20:38:17 Functions: 100.0 % 10 10

            Line data    Source code
       1              : 
       2              : // SPDX-License-Identifier: Apache-2.0
       3              : /**
       4              :  * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
       5              :  *
       6              :  * @file   raw_file_data_producer.cpp
       7              :  * @date   12 July 2021
       8              :  * @brief  This file contains raw file data producers, reading from a file
       9              :  * @see    https://github.com/nnstreamer/nntrainer
      10              :  * @author Jihoon Lee <jhoon.it.lee@samsung.com>
      11              :  * @bug    No known bugs except for NYI items
      12              :  *
      13              :  */
      14              : 
      15              : #include <raw_file_data_producer.h>
      16              : 
      17              : #include <memory>
      18              : #include <numeric>
      19              : #include <random>
      20              : #include <vector>
      21              : 
      22              : #include <common_properties.h>
      23              : #include <nntrainer_error.h>
      24              : #include <node_exporter.h>
      25              : #include <util_func.h>
      26              : 
      27              : namespace nntrainer {
      28              : 
      29            9 : RawFileDataProducer::RawFileDataProducer() : raw_file_props(new PropTypes()) {}
      30              : 
      31          211 : RawFileDataProducer::RawFileDataProducer(const std::string &path) :
      32          422 :   raw_file_props(new PropTypes(props::FilePath(path))) {}
      33          654 : RawFileDataProducer::~RawFileDataProducer() {}
      34              : 
      35            4 : const std::string RawFileDataProducer::getType() const {
      36            4 :   return RawFileDataProducer::type;
      37              : }
      38              : 
      39          232 : void RawFileDataProducer::setProperty(
      40              :   const std::vector<std::string> &properties) {
      41          232 :   auto left = loadProperties(properties, *raw_file_props);
      42          232 :   NNTR_THROW_IF(!left.empty(), std::invalid_argument)
      43              :     << "There is unparsed properties, size: " << left.size();
      44          232 : }
      45              : 
      46              : DataProducer::Generator
      47           53 : RawFileDataProducer::finalize(const std::vector<TensorDim> &input_dims,
      48              :                               const std::vector<TensorDim> &label_dims,
      49              :                               void *user_data) {
      50           53 :   auto sz = size(input_dims, label_dims);
      51              :   auto path_prop = std::get<props::FilePath>(*raw_file_props);
      52              : 
      53              :   auto size_accumulator = [](const size_t &a, const TensorDim &b) {
      54          106 :     return a + b.getFeatureLen();
      55              :   };
      56              : 
      57              :   auto sample_size = std::accumulate(input_dims.begin(), input_dims.end(),
      58              :                                      (size_t)0, size_accumulator);
      59              :   sample_size = std::accumulate(label_dims.begin(), label_dims.end(),
      60              :                                 sample_size, size_accumulator);
      61              : 
      62              :   /// as we are passing the reference of file, this means created lamabda is
      63              :   /// tightly couple with the file, this is not desirable but working fine for
      64              :   /// now...
      65           53 :   file = std::ifstream(path_prop.get(), std::ios::binary);
      66           53 :   return [sample_size, sz, this](unsigned int idx, std::vector<Tensor> &inputs,
      67              :                                  std::vector<Tensor> &labels) {
      68         5329 :     NNTR_THROW_IF(idx >= sz, std::range_error)
      69              :       << "given index is out of bound, index: " << idx << " size: " << sz;
      70         5329 :     std::streamoff offset = static_cast<std::streamoff>(idx) *
      71         5329 :                             static_cast<std::streamoff>(sample_size) *
      72              :                             RawFileDataProducer::pixel_size;
      73         5329 :     file.seekg(offset, std::ios_base::beg);
      74        10658 :     for (auto &input : inputs) {
      75         5329 :       input.read(file);
      76              :     }
      77        10658 :     for (auto &label : labels) {
      78         5329 :       label.read(file);
      79              :     }
      80              : 
      81         5329 :     return idx == sz - 1;
      82          106 :   };
      83              : }
      84              : 
      85              : unsigned int
      86          100 : RawFileDataProducer::size(const std::vector<TensorDim> &input_dims,
      87              :                           const std::vector<TensorDim> &label_dims) const {
      88              :   auto size_accumulator = [](const size_t &a, const TensorDim &b) {
      89          200 :     return a + b.getFeatureLen();
      90              :   };
      91              : 
      92              :   auto sample_size = std::accumulate(input_dims.begin(), input_dims.end(),
      93              :                                      (size_t)0, size_accumulator);
      94              :   sample_size = std::accumulate(label_dims.begin(), label_dims.end(),
      95              :                                 sample_size, size_accumulator);
      96          100 :   NNTR_THROW_IF(sample_size == 0, std::invalid_argument)
      97              :     << "The feature size of input_dims and label_dims are zeros";
      98              : 
      99              :   auto path_prop = std::get<props::FilePath>(*raw_file_props);
     100          100 :   auto file_size = path_prop.file_size();
     101              : 
     102              :   /// checking alignment is a good way to make check if a file is valid,
     103              :   /// unfortunately, our test dataset does not have this property
     104              :   /// (trainingSet.dat, valSet.dat, testSet.dat) after checking, we can
     105              :   /// uncomment below line.
     106              :   // NNTR_THROW_IF((file_size % sample_size * RawFileDataProducer::pixel_size !=
     107              :   // 0),
     108              :   //               std::invalid_argument)
     109              :   //   << " Given file does not align with the given sample size, sample size: "
     110              :   //   << sample_size << " file_size: " << file_size;
     111              : 
     112          100 :   return static_cast<unsigned int>(file_size) /
     113          100 :          (sample_size * RawFileDataProducer::pixel_size);
     114              : }
     115              : 
     116            2 : void RawFileDataProducer::exportTo(
     117              :   Exporter &exporter, const ml::train::ExportMethods &method) const {
     118            2 :   exporter.saveResult(*raw_file_props, method, this);
     119            2 : }
     120              : } // namespace nntrainer
        

Generated by: LCOV version 2.0-1