LCOV - code coverage report
Current view: top level - nntrainer/utils - fp16.cpp (source / functions) Coverage Total Hit
Test: coverage_filtered.info Lines: 100.0 % 30 30
Test Date: 2025-12-14 20:38:17 Functions: 100.0 % 4 4

            Line data    Source code
       1              : // SPDX-License-Identifier: The MIT License (MIT)
       2              : /**
       3              :  * Copyright (c) 2017 Facebook Inc.
       4              :  * Copyright (c) 2017 Georgia Institute of Technology
       5              :  * Copyright 2019 Google LLC
       6              :  */
       7              : /**
       8              :  * @file   fp16.cpp
       9              :  * @date   03 Nov 2023
      10              :  * @brief  This is collection of FP16 and FP32 conversion
      11              :  * @see    https://github.com/nnstreamer/nntrainer
      12              :  * @author Marat Dukhan <maratek@gmail.com>
      13              :  * @bug    No known bugs except for NYI items
      14              :  *
      15              :  */
      16              : 
      17              : #include <fp16.h>
      18              : 
      19              : namespace nntrainer {
      20              : 
      21    229450704 : float fp32_from_bits(uint32_t w) {
      22              : #if defined(__OPENCL_VERSION__)
      23              :   return as_float(w);
      24              : #elif defined(__CUDA_ARCH__)
      25              :   return __uint_as_float((unsigned int)w);
      26              : #elif defined(__INTEL_COMPILER)
      27              :   return _castu32_f32(w);
      28              : /// @todo resolve unknown identifier error
      29              : // #elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
      30              : //   return _CopyFloatFromInt32((__int32)w);
      31              : #else
      32              :   union {
      33              :     uint32_t as_bits;
      34              :     float as_value;
      35              :   } fp32 = {w};
      36    229450704 :   return fp32.as_value;
      37              : #endif
      38              : }
      39              : 
      40     59492001 : uint32_t fp32_to_bits(float f) {
      41              : #if defined(__OPENCL_VERSION__)
      42              :   return as_uint(f);
      43              : #elif defined(__CUDA_ARCH__)
      44              :   return (uint32_t)__float_as_uint(f);
      45              : #elif defined(__INTEL_COMPILER)
      46              :   return _castf32_u32(f);
      47              : /// @todo resolve unknown identifier error
      48              : // #elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
      49              : //   return (uint32_t)_CopyInt32FromFloat(f);
      50              : #else
      51              :   union {
      52              :     float as_value;
      53              :     uint32_t as_bits;
      54     59492001 :   } fp32 = {f};
      55     59492001 :   return fp32.as_bits;
      56              : #endif
      57              : }
      58              : 
      59      1703460 : uint16_t compute_fp32_to_fp16(float f) {
      60              : #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ||              \
      61              :   defined(__GNUC__) && !defined(__STRICT_ANSI__)
      62              :   const float scale_to_inf = 0x1.0p+112f;
      63              :   const float scale_to_zero = 0x1.0p-110f;
      64              : #else
      65      1703460 :   const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
      66      1703460 :   const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
      67              : #endif
      68      1703460 :   float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
      69              : 
      70      1703460 :   const uint32_t w = fp32_to_bits(f);
      71      1703460 :   const uint32_t shl1_w = w + w;
      72              :   const uint32_t sign = w & UINT32_C(0x80000000);
      73      1703460 :   uint32_t bias = shl1_w & UINT32_C(0xFF000000);
      74      1703460 :   if (bias < UINT32_C(0x71000000)) {
      75              :     bias = UINT32_C(0x71000000);
      76              :   }
      77              : 
      78      1703460 :   base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
      79      1703460 :   const uint32_t bits = fp32_to_bits(base);
      80      1703460 :   const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
      81      1703460 :   const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
      82      1703460 :   const uint32_t nonsign = exp_bits + mantissa_bits;
      83      1703460 :   return (sign >> 16) |
      84      1703460 :          (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
      85              : }
      86              : 
      87     56085081 : float compute_fp16_to_fp32(uint16_t h) {
      88     56085081 :   const uint32_t w = (uint32_t)h << 16;
      89     56085081 :   const uint32_t sign = w & UINT32_C(0x80000000);
      90     56085081 :   const uint32_t two_w = w + w;
      91              :   const uint32_t exp_offset = UINT32_C(0xE0) << 23;
      92              : #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ||              \
      93              :   defined(__GNUC__) && !defined(__STRICT_ANSI__)
      94              :   const float exp_scale = 0x1.0p-112f;
      95              : #else
      96     56085081 :   const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
      97              : #endif
      98              :   const float normalized_value =
      99     56085081 :     fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
     100              : 
     101              :   const uint32_t magic_mask = UINT32_C(126) << 23;
     102              :   const float magic_bias = 0.5f;
     103              :   const float denormalized_value =
     104     56085081 :     fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
     105              : 
     106              :   const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
     107              :   const uint32_t result =
     108     56085081 :     sign | (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value)
     109     56085052 :                                         : fp32_to_bits(normalized_value));
     110     56085081 :   return fp32_from_bits(result);
     111              : }
     112              : 
     113              : } /* namespace nntrainer */
        

Generated by: LCOV version 2.0-1