Last active
January 24, 2019 09:58
-
-
Save eruffaldi/33a27040ead19e2944a2432e0644d698 to your computer and use it in GitHub Desktop.
posit_one.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// cppPosit 7b7b5ecde436a65923d679d91ddb1b29b711af27 | |
/** | |
* Emanuele Ruffaldi (C) 2017-2018 | |
* | |
* My personal bit hip pop using BMI extensions and a bit of constexpr! | |
*/ | |
#if defined(__SDSVHLS__) && !defined(FPGAHLS) | |
#define FPGAHLS | |
#endif | |
#if defined(__llvm__) && __clang_major__ > 3 | |
#define HAS_bextr_u64 | |
#endif | |
#include <stdint.h> | |
#include <bitset> | |
// CPU detection x86 | |
#if defined(__x86_64) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__) | |
#define __is_x86_any__ | |
#endif | |
#ifdef __is_x86_any__ | |
#ifdef _MSC_VER | |
#include <intrin.h> | |
#include <immintrin.h> | |
#else | |
#include <x86intrin.h> | |
#endif | |
#endif | |
#ifndef CONSTEXPR14 | |
#if __cplusplus >= 201402L | |
#define CONSTEXPR14 constexpr | |
#else | |
#define CONSTEXPR14 | |
#endif | |
#endif | |
#if !defined(FPGAHLS) && !defined(_MSC_VER) | |
#define CLZCONSTEXPR constexpr | |
#else | |
#define CLZCONSTEXPR | |
#endif | |
// C version | |
#define BIT_MASK(__TYPE__, __ONE_COUNT__) \ | |
((__TYPE__)(-((__ONE_COUNT__) != 0))) & (((__TYPE__)-1) >> ((sizeof(__TYPE__) * 8) - (__ONE_COUNT__))) | |
template <typename R> | |
constexpr R bitmask(unsigned int const onecount) | |
{ | |
return static_cast<R>(-(onecount != 0)) & (static_cast<R>(-1) >> ((sizeof(R) * 8) - onecount)); | |
} | |
template <typename T> | |
constexpr typename std::remove_reference<T>::type makeprval(T &&t) | |
{ | |
return t; | |
} | |
#define isprvalconstexpr(e) noexcept(makeprval(e)) | |
#ifdef _MSC_VER | |
// note this is not constexpr due to _BitScanReverse | |
static uint32_t __inline __builtin_clz(uint32_t x) | |
{ | |
unsigned long r = 0; | |
_BitScanReverse(&r, x); | |
return (31-r); | |
} | |
#endif | |
// __builtin_clzll | |
CLZCONSTEXPR inline uint64_t __builtin_clz64(uint64_t v) | |
{ | |
return (v >> 32 != 0 ? __builtin_clz(v >> 32) : 32 + __builtin_clz(v)); | |
} | |
// this is constexpr, others not | |
// never call with input==0 | |
CLZCONSTEXPR inline int findbitleftmostC(uint64_t input) | |
{ | |
return __builtin_clz64(input); | |
} | |
// this is constexpr, others not | |
// never call with input==0 | |
CLZCONSTEXPR inline int findbitleftmostC(uint32_t input) | |
{ | |
return __builtin_clz(input); | |
} | |
// this is constexpr, others not | |
// never call with input==0 | |
CLZCONSTEXPR inline int findbitleftmostC(uint16_t input) | |
{ | |
return __builtin_clz((uint32_t)input) - 16; | |
} | |
// this is constexpr, others not | |
// never call with input==0 | |
CLZCONSTEXPR inline int findbitleftmostC(uint8_t input) | |
{ | |
return __builtin_clz((uint32_t)input) - 24; | |
} | |
#if 0 | |
#if !defined(__arm__) && !defined(FPGAHLS) | |
# make a more realiable detection | |
inline int findbitleftmost(uint8_t input) | |
{ | |
return __lzcnt16(input) - 8; | |
} | |
inline int findbitleftmost(uint16_t input) | |
{ | |
return __lzcnt16(input); | |
} | |
inline int findbitleftmost(uint32_t input) | |
{ | |
return __lzcnt32(input); | |
} | |
/* CSIM | |
inline int findbitleftmost(uint64_t input) | |
{ | |
return __lzcnt64(input); | |
} | |
*/ | |
// detect constexpr for X so we can speedup | |
#define findbitleftmost(X) (isprvalconstexpr(X) ? findbitleftmostC(X) : findbitleftmost(X)) | |
#endif | |
#endif | |
// indices are with 0 on the right | |
template <class T, class Y, int offset, int size> | |
constexpr T bitset_part(T input, Y value) | |
{ | |
return (input & ~bitmask<T>(size)) | ((value & bitmask<T>(size)) << offset); | |
} | |
template <class T, class Y> | |
CONSTEXPR14 T bitset_part(T input, Y value, int offset, int size) | |
{ | |
auto M = bitmask<T>(size); | |
return (input & ~M) | ((value & M) << offset); | |
} | |
template <class T, int offset, int size> | |
constexpr T bitset_get(T input) | |
{ | |
return (input >> offset) & bitmask<T>(size); | |
} | |
template <class T> | |
CONSTEXPR14 T bitset_get(T input, int offset, int size) | |
{ | |
auto M = bitmask<T>(size); | |
return (input >> offset) & M; | |
} | |
#if defined(__is_x86_any__) && !defined(FPGAHLS) && defined(__BMI__) | |
/* CSIM | |
inline uint64_t bitset_gethw(uint64_t input, int offset, int size) | |
{ | |
return _bextr_u64(input, offset, size); | |
} | |
*/ | |
inline uint16_t bitset_gethw(uint16_t input, int offset, int size) | |
{ | |
return _bextr_u32(input, offset, size); | |
} | |
inline uint32_t bitset_gethw(uint32_t input, int offset, int size) | |
{ | |
return _bextr_u32(input, offset, size); | |
} | |
//#ifdef HAS_bextr_u64 | |
template <int offset, int size> | |
uint64_t bitset_gethwT(uint64_t input) | |
{ | |
return _bextr_u64(input, offset, size); | |
} | |
//#endif | |
template <int offset, int size> | |
uint32_t bitset_gethwT(uint32_t input) | |
{ | |
return _bextr_u32(input, offset, size); | |
} | |
template <int offset, int size> | |
uint16_t bitset_gethwT(uint16_t input) | |
{ | |
return _bextr_u32(input, offset, size); | |
} | |
template <int offset, int size> | |
uint8_t bitset_gethwT(uint8_t input) | |
{ | |
return _bextr_u32(input, offset, size); | |
} | |
// with template | |
#define bitset_getT(X, A, B) (isprvalconstexpr(X) ? bitset_gethwT<A, B>(X) : bitset_get<decltype(X), A, B>(X)) | |
// with flexible arguments | |
#define bitset_get(X, A, B) (isprvalconstexpr(X) && isprvalconstexpr(A) && isprvalconstexpr(B) ? bitset_gethw(X, A, B) : bitset_get(X, A, B)) | |
#else | |
#define bitset_getT(X, A, B) (bitset_get<decltype(X), A, B>(X)) | |
#endif | |
template <class T, int N> | |
struct bitset_leftmost_get_const | |
{ | |
constexpr T operator()(T X) const | |
{ | |
return (T)bitset_getT((typename std::make_unsigned<T>::type)(X), sizeof(X) * 8 - N, N); | |
} | |
}; | |
template <class T> | |
struct bitset_leftmost_get_const<T, 0> | |
{ | |
constexpr T operator()(T X) const | |
{ | |
return 0; | |
} | |
}; | |
#define bitset_leftmost_getT(X, N) bitset_getT(X, sizeof(X) * 8 - N, N) | |
template <class T> | |
constexpr std::bitset<sizeof(T) * 8> bitset_all(const T x) | |
{ | |
return std::bitset<sizeof(T) * 8>(x); | |
} | |
template <class T, int N> | |
constexpr std::bitset<N> bitset_msb(const T x) | |
{ | |
return std::bitset<N>(x >> (sizeof(T) * 8 - N)); | |
} | |
template <class T, int N> | |
constexpr std::bitset<N> bitset_lsb(const T x) | |
{ | |
return std::bitset<N>(x); | |
} | |
/// absolute value of signed integer with conditions | |
template <class T> | |
constexpr T pcabs(T x) | |
{ | |
return x < 0 ? -x : x; | |
} | |
/// | |
/// absolute value of signed integer without conditions | |
template <class T> // ,typename std::enable_if<std::is_integral<T>::value ,int>::type* = nullptr> | |
CONSTEXPR14 T pabs(T x) | |
{ | |
T mask = (x >> (sizeof(T) * 8 - 1)); | |
return (x + mask) ^ mask; | |
} | |
// support | |
template <class A, int abits, class B, int bbits, bool abits_gt_bbits> | |
struct cast_msb_ | |
{ | |
}; | |
// support | |
template <class A, int abits, class B, int bbits> | |
struct cast_msb_<A, abits, B, bbits, true> | |
{ | |
constexpr B operator()(A value) const | |
{ | |
// #A > #B | |
return value >> (abits - bbits); | |
} | |
}; | |
// support | |
// #B >= #A | |
template <class A, int abits, class B, int bbits> | |
struct cast_msb_<A, abits, B, bbits, false> | |
{ | |
constexpr B operator()(A value) const | |
{ | |
return ((B)value) << (bbits - abits); | |
} | |
}; | |
/// MSB aligned data cast from type A to type B | |
template <class A, int abits, class B, int bbits> | |
struct cast_msb : public cast_msb_<A, abits, B, bbits, (abits > bbits)> | |
{ | |
static_assert(std::is_unsigned<A>::value, "required unsigned A"); | |
static_assert(std::is_unsigned<B>::value, "required unsigned B"); | |
}; | |
// support | |
template <class A, int abits, class B, int bbits, bool abits_gt_bbits> | |
struct cast_right_to_left_ | |
{ | |
}; | |
// support | |
template <class A, int abits, class B, int bbits> | |
struct cast_right_to_left_<A, abits, B, bbits, true> | |
{ | |
constexpr B operator()(A value) const | |
{ | |
// #A > #B | |
return bitset_getT(value, 0, abits) >> (abits - bbits); | |
} | |
}; | |
// support | |
// #B >= #A | |
template <class A, int abits, class B, int bbits> | |
struct cast_right_to_left_<A, abits, B, bbits, false> | |
{ | |
constexpr B operator()(A value) const | |
{ | |
return ((B)bitset_getT(value, 0, abits)) << (bbits - abits); | |
} | |
}; | |
/// MSB aligned data cast from type A to type B | |
template <class A, int abits, class B, int bbits> | |
struct cast_right_to_left : public cast_right_to_left_<A, abits, B, bbits, (abits > bbits)> | |
{ | |
static_assert(std::is_unsigned<A>::value, "required unsigned A"); | |
static_assert(std::is_unsigned<B>::value, "required unsigned B"); | |
}; | |
template <class T, int N, int F> | |
struct fixedtrait | |
{ | |
static_assert(sizeof(T)*8 <= N,"fixedtrait holding type is too small"); | |
static_assert(N > 0,"fixedtrait total bits should be positive"); | |
static_assert(F <= N && F >= 0,"fraction bits should be less than N and not negative"); | |
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,"only for signed integrals"); | |
using value_t = T; | |
static constexpr int totalbits = N; | |
static constexpr int fraction_bits = F; | |
}; | |
#if 0 | |
#if __cplusplus >= 201402L | |
template <class T> | |
struct fixedtrait<T,sizeof(T)*8,0> | |
{ | |
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,"only for signed integrals"); | |
using value_t = T; | |
static constexpr int totalbits = sizeof(T)*8; | |
static constexpr int fraction_bits = 0; | |
}; | |
#endif | |
#endif | |
#include <limits> | |
#include <cstdint> | |
#if defined(__SDSVHLS__) && !defined(FPGAHLS) | |
#define FPGAHLS | |
#endif | |
// wrapper for custom floats holdi | |
template <class T> | |
struct valuewrap | |
{ | |
T what; | |
constexpr valuewrap(T w): what(w) {} | |
constexpr valuewrap() : what(0) {} | |
}; | |
struct halffloat : public valuewrap<uint16_t> | |
{ | |
using valuewrap<uint16_t>::valuewrap; | |
}; | |
struct halffloatalt : public valuewrap<uint16_t> | |
{ | |
using valuewrap<uint16_t>::valuewrap; | |
}; | |
struct microfloat : public valuewrap<uint8_t> | |
{ | |
using valuewrap<uint8_t>::valuewrap; | |
}; | |
/// holder_T is an unsigned integer capable of storing 1+exp_bits+frac_bits exactly | |
/// value_T is the struct or native type used for this | |
template <int exp_bits, int frac_bits, class value_T, class holder_T, bool with_denorm_ = true> | |
struct any_floattrait | |
{ | |
using value_t = value_T; | |
using holder_t = holder_T; | |
static_assert(exp_bits+1+frac_bits == sizeof(holder_t)*8,"holding size"); | |
//static_assert<!std::is_signed<holder_t> >; | |
static constexpr int data_bits = exp_bits+frac_bits+1; | |
static constexpr int exponent_bits = exp_bits; | |
static constexpr int fraction_bits = frac_bits; | |
static constexpr int exponent_bias = (1<<(exp_bits-1))-1; | |
static constexpr int exponent_max = (1<<(exp_bits))-2; | |
static constexpr int with_denorm = with_denorm_; | |
static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; | |
enum : holder_t { | |
signbit = ((holder_t)(1))<<(data_bits-1), | |
pinfinity_h = bitmask<holder_t>(exp_bits) << frac_bits, // 0 1[e] 0[f] | |
ninfinity_h = pinfinity_h | signbit, // 1 1[e] 0[f] | |
// many nan ar possible, we pick the one with | |
nan_h = bitmask<holder_t>(exp_bits+1) << (frac_bits-1), // 0 1[e] 1 0[f-1] | |
one_h = bitmask<holder_t>(exp_bits-1) << (frac_bits), // 0 0 1[e-1] 0[f] | |
afterone_h = one_h+1, // 0 0 1[e-1] 0[f-1] 1 | |
max_h = (bitmask<holder_t>(exp_bits-1) << (frac_bits+1)) | bitmask<holder_t>(frac_bits), // 0 1[e-1] 0 1[f] | |
lowest_h = max_h | signbit, | |
min_h = ((holder_t)(1)) << frac_bits, // 0 0[e-1] 1 0[f] | |
two_h = ((holder_t)(1)) << (exp_bits-1+frac_bits) // 0 1 0[e-1+f] | |
}; | |
}; | |
// pulp8 alternative | |
using microfloat_trait = any_floattrait<5,2,microfloat,uint8_t>; | |
// PULP 8E,7M vs classic 5E,10P | |
using half_traitalt = any_floattrait<8,7,halffloatalt,uint16_t>; | |
// Intel bfloat16 as 8,7 without denormals | |
using bfloat16_trait = any_floattrait<8,7,halffloatalt,uint16_t, false>; | |
// https://en.wikipedia.org/wiki/16-bit | |
struct half_trait // : public any_floattrait<5,10,halffloat,uint16_t> | |
{ | |
using value_t = halffloat; | |
using holder_t = uint16_t; | |
static constexpr holder_t ninfinity_h = 0xFC00; | |
static constexpr holder_t pinfinity_h = 0x7C00; | |
static constexpr holder_t nan_h = 0x7E00; | |
static constexpr holder_t one_h = 0x3C00; // one next is just + 1 | |
static constexpr holder_t two_h = 0x4000; | |
static constexpr holder_t max_h = 0x7bff; | |
static constexpr holder_t min_h = 0x0400; | |
static constexpr holder_t lowest_h = 0xfbff; // -max_h | |
static constexpr int with_denorm = true; | |
// max subnormal 0 00000 1111111111 2−24 ≈ 6.09756 × 10−5 | |
// min subnormal 0 00000 0000000001 5.96046 × 10−8 | |
static constexpr int data_bits = 16; // can be derived from value_t | |
static constexpr int exponent_bits = 5; | |
static constexpr int fraction_bits = 10; // can be derived from data_bits and exponent_bits | |
static constexpr int exponent_bias = 15; | |
static constexpr int exponent_max = 30; // can be derived from bias and bits | |
static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1); | |
static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits | |
}; | |
// https://en.wikipedia.org/wiki/Single-precision_floating-point_format | |
struct single_trait | |
{ | |
#ifndef FPGAHLS | |
using value_t = float; | |
#endif | |
using holder_t = uint32_t; | |
#ifndef FPGAHLS | |
static constexpr value_t zero = 0.0f; | |
static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity(); | |
static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity(); | |
#endif | |
static constexpr holder_t ninfinity_h = 0xff800000; | |
static constexpr holder_t pinfinity_h = 0x7f800000; | |
static constexpr holder_t nan_h = 0x7fc00000; | |
static constexpr holder_t one_h = 0x3f800000; | |
static constexpr holder_t two_h = 0x40000000; | |
static constexpr holder_t max_h = 0x7f7fffff; | |
static constexpr holder_t min_h = 0x00800000; | |
static constexpr holder_t denorm_min_h = 1; // just 1 after 0 | |
static constexpr holder_t epsilon_h = 0x34000000; // trickier | |
static constexpr holder_t lowest_h = 0xff7fffff; // -max_h | |
static constexpr int with_denorm = true; | |
static constexpr int data_bits = 32; // can be derived from value_t | |
static constexpr int exponent_bits = 8; | |
static constexpr int fraction_bits = 23; // can be derived from data_bits and exponent_bits | |
static constexpr int exponent_bias = 127; | |
static constexpr int exponent_max = 254; // can be derived from bias and bits | |
static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1); | |
static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits | |
}; | |
// https://en.wikipedia.org/wiki/Double-precision_floating-point_format | |
struct double_trait | |
{ | |
#ifndef FPGAHLS | |
using value_t = double; | |
#endif | |
using holder_t = uint64_t; | |
#ifndef FPGAHLS | |
static constexpr value_t zero = 0.0; | |
static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity(); | |
static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity(); | |
#endif | |
static constexpr holder_t ninfinity_h = 0xfff0000000000000ULL; | |
static constexpr holder_t pinfinity_h = 0x7ff0000000000000ULL; | |
static constexpr holder_t nan_h = 0x7ff8000000000000ULL; | |
static constexpr holder_t one_h = 0x3ff0000000000000ULL; | |
static constexpr holder_t two_h = 0x4000000000000000ULL; | |
static constexpr holder_t max_h = 0x7fefffffffffffff; // TODO | |
static constexpr holder_t lowest_h = 0xffefffffffffffff; // TODO | |
static constexpr holder_t min_h = 0x10000000000000; // TODO | |
static constexpr int with_denorm = true; | |
static constexpr int data_bits = 64; // can be derived from value_t | |
static constexpr int exponent_bits = 11; | |
static constexpr int fraction_bits = 52; // can be derived from data_bits and exponent_bits | |
static constexpr int exponent_bias = 1023; | |
static constexpr int exponent_max = 2046; | |
static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1); | |
static constexpr uint32_t exponent_mask = (((holder_t)(1))<<exponent_bits)-1; // 7FF | |
}; | |
#ifdef FLT128_MAX | |
// https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format | |
// https://gcc.gnu.org/onlinedocs/gcc/Floating-Types.html | |
struct float128_trait | |
{ | |
#ifndef FPGAHLS | |
using value_t = __float128; | |
#endif | |
using holder_t = unsigned __int128; | |
#ifndef FPGAHLS | |
static constexpr value_t zero = 0.0; | |
static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity(); | |
static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity(); | |
#endif | |
static constexpr holder_t ninfinity_h = 0xb00000000000ffff8000000000000000; | |
static constexpr holder_t pinfinity_h = 0x900007f975247fff8000000000000000; | |
static constexpr holder_t nan_h = 0xb000000000007fffc000000000000000; | |
static constexpr holder_t one_h = 0xd00007f975243fff8000000000000000; | |
static constexpr holder_t two_h = 0x40000000000000000000000000000000; | |
static constexpr holder_t max_h = 0x4000; // TODO | |
static constexpr holder_t lowest_h = 0x4000; // TODO | |
static constexpr holder_t min_h = 0x4000; // TODO | |
static constexpr int with_denorm = true; | |
static constexpr int data_bits = 128; // can be derived from value_t | |
static constexpr int exponent_bits = 15; | |
static constexpr int fraction_bits = 112; // can be derived from data_bits and exponent_bits | |
static constexpr int exponent_bias = 16383; | |
static constexpr int exponent_max = 16383*2; | |
static constexpr holder_t signbit = (1<<(data_bits-1)); | |
static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits | |
}; | |
#endif | |
/** | |
Casting between differently arbitrary floats requires: | |
- exponent: | |
oldexponent - oldbias +newbias | |
- fraction assumed to be left aligned | |
nothing just keep left aligned, only newfraction bits be used | |
See: | |
template <class FT,class ET> | |
template <class Trait> | |
CONSTEXPR14 typename Trait::holder_t Unpacked<FT,ET>::pack_xfloati() const | |
template <class Trait, typename = typename std::enable_if<!std::is_integral<typename Trait::value_t>::value>::type> | |
explicit CONSTEXPR14 Unpacked(typename Trait::holder_t i) { unpack_xfloat<Trait>(i); } | |
*/ | |
template <class T> | |
struct float2trait | |
{}; | |
template <> | |
struct float2trait<float> | |
{ | |
using type = float; | |
using trait = single_trait; | |
}; | |
template <> | |
struct float2trait<double> | |
{ | |
using type = double; | |
using trait = double_trait; | |
}; | |
template <> | |
struct float2trait<halffloat> | |
{ | |
using type = halffloat; | |
using trait = half_trait; | |
}; | |
template <> | |
struct float2trait<halffloatalt> | |
{ | |
using type = halffloatalt; | |
using trait = half_traitalt; | |
}; | |
#ifdef FLT128_MAX | |
template <> | |
struct float2trait<__float128> | |
{ | |
using type = __float128; | |
using trait = float128_trait; | |
}; | |
#endif | |
#if 0 | |
template <class Trait> | |
struct limithelper | |
{ | |
using T=Trait; | |
static constexpr bool is_specialized = true; | |
static constexpr T min() noexcept { return T::min(); } | |
static constexpr T max() noexcept { return T::max(); } | |
static constexpr T lowest() noexcept { return T::lowest (); } | |
//static constexpr int digits = 0; number of digits (in radix base) in the mantissa | |
//static constexpr int digits10 = 0; | |
static constexpr bool is_signed = true; | |
static constexpr bool is_integer = false; | |
static constexpr bool is_exact = false; | |
static constexpr int radix = 2; | |
static constexpr T epsilon() noexcept { return T::one_next()-T::one(); } | |
//static constexpr T round_error() noexcept { return T(); } | |
// this is also the maximum integer | |
static constexpr int min_exponent = PT::minexponent(); | |
// static constexpr int min_exponent10 = 0; | |
static constexpr int max_exponent = PT::maxexponent(); | |
//static constexpr int max_exponent10 = 0; | |
static constexpr bool has_infinity = true; | |
static constexpr bool has_quiet_NaN = withnan; | |
static constexpr bool has_signaling_NaN = false; | |
//static constexpr float_denorm_style has_denorm = denorm_absent; | |
static constexpr bool has_denorm_loss = false; | |
static constexpr T infinity() noexcept { return T::infinity(); } | |
static constexpr T quiet_NaN() noexcept { return T::nan(); } | |
//static constexpr T signaling_NaN() noexcept { return T(); } | |
static constexpr T denorm_min() noexcept { return T::min(); } | |
static constexpr bool is_iec559 = false; | |
static constexpr bool is_bounded = false; | |
static constexpr bool is_modulo = false; | |
static constexpr bool traps = false; | |
static constexpr bool tinyness_before = false; | |
}; | |
#endif | |
#if 0 | |
namespace std | |
{ | |
template <> | |
struct limits<halffloat>: public limithelper<half_trait> | |
{ | |
}; | |
template <> | |
struct limits<halffloatalt>: public limithelper<half_traitalt> | |
{ | |
}; | |
template <> | |
struct limits<microfloat>: public limithelper<microfloat_trait> | |
{ | |
}; | |
} | |
#endif | |
/** | |
* Various Integr Type Helpers | |
* | |
* Emanuele Ruffaldi 2017 | |
*/ | |
#include <cstdint> | |
#include <inttypes.h> | |
#if (defined(__llvm__) && __clang_major__ > 3) || (defined(__GNUC__) && __GNUC__ >= 4) | |
#define HAS128T | |
#endif | |
#ifdef HAS128T | |
#define int128_t __int128_t | |
#endif | |
/// returns the larges type between two | |
template <class A,class B> | |
using largest_type = typename std::conditional<sizeof(A) >= sizeof(B), A, B>::type; | |
/// Given size in bits returns the integer with given size | |
/// Equivalent to: http://www.boost.org/doc/libs/1_48_0/libs/integer/doc/html/boost_integer/integer.html#boost_integer.integer.sized | |
namespace detail_least | |
{ | |
template< int Category > struct int_least_helper {}; | |
#ifdef HAS128T | |
template<> struct int_least_helper<1> { typedef int128_t least; }; | |
#endif | |
template<> struct int_least_helper<2> { typedef int64_t least; }; | |
template<> struct int_least_helper<3> { typedef int32_t least; }; | |
template<> struct int_least_helper<4> { typedef int16_t least; }; | |
template<> struct int_least_helper<5> { typedef int8_t least; }; | |
} | |
/// Given size in bits returns the integer with given size | |
template <unsigned int N> | |
struct int_least_bits | |
{ | |
typedef typename detail_least::int_least_helper<((N) <= 8) + ((N) <= 16) + ((N) <= 32) + ((N) <= 64) + ((N) <= 128)>::least type; | |
}; | |
/// Helper for avoiding the fact that int8_t and uint8_t are printerd as chars in iostream | |
template <class T> | |
struct printableinttype | |
{ | |
using type = T; | |
}; | |
template <class T> | |
struct printableinttype<const T> | |
{ | |
using typex = typename printableinttype<T>::type; | |
using type = const typex; | |
}; | |
template <> | |
struct printableinttype<uint8_t> | |
{ | |
using type = uint16_t; | |
}; | |
template <> | |
struct printableinttype<int8_t> | |
{ | |
using type = int16_t; | |
}; | |
/// next integer type in size: signed and unsigned | |
template <class T> | |
struct nextinttype | |
{}; | |
#ifdef HAS128T | |
template <> | |
struct nextinttype<uint64_t> | |
{ | |
using type = __uint128_t; | |
}; | |
#endif | |
template <> | |
struct nextinttype<uint32_t> | |
{ | |
using type = uint64_t; | |
}; | |
template <> | |
struct nextinttype<uint16_t> | |
{ | |
using type = uint32_t; | |
}; | |
template <> | |
struct nextinttype<uint8_t> | |
{ | |
using type = uint16_t; | |
}; | |
#ifdef HAS128T | |
template <> | |
struct nextinttype<int64_t> | |
{ | |
using type = __int128_t; | |
}; | |
#endif | |
template <> | |
struct nextinttype<int32_t> | |
{ | |
using type = int64_t; | |
}; | |
template <> | |
struct nextinttype<int16_t> | |
{ | |
using type = int32_t; | |
}; | |
template <> | |
struct nextinttype<int8_t> | |
{ | |
using type = int16_t; | |
}; | |
/** | |
* Emanuele Ruffaldi (C) 2017-2018 | |
* | |
* cppPosit project | |
* gneralized soft float in unpackd form | |
* | |
*/ | |
#include <cstdint> | |
#ifndef FPGAHLS | |
#include <iostream> | |
#endif | |
#include <inttypes.h> | |
#include <math.h> | |
#include <bitset> | |
#include <limits> | |
#include <ratio> | |
#if !defined(FPGAHLS) && defined(HAS128T) | |
inline std::ostream &operator<<(std::ostream &ons, __int128_t x) | |
{ | |
ons << "cannot print int128"; | |
return ons; | |
} | |
#endif | |
template <class T> | |
constexpr const T &clamp(const T &v, const T &lo, const T &hi) | |
{ | |
return v < lo ? lo : v > hi ? hi : v; | |
} | |
template <class T> | |
constexpr T FLOORDIV(T a, T b) | |
{ | |
return ((a) / (b) - ((a) % (b) < 0)); | |
} | |
template <class FT = uint64_t, class ET = int32_t> | |
struct Unpacked | |
{ | |
static_assert(std::is_unsigned<FT>::value, | |
"Unpacked requires unsigned fractiont type"); | |
static_assert(std::is_signed<ET>::value, | |
"Unpacked requires signed exponent type"); | |
using POSIT_LUTYPE = FT; | |
enum | |
{ | |
FT_bits = sizeof(FT) * 8 | |
}; | |
enum : FT | |
{ | |
FT_leftmost_bit = (((FT)1) << (FT_bits - 1)) | |
}; | |
#ifndef UnpackedDualSel | |
#define UnpackedDualSel(a, b) ((a) + (b)*4) | |
#endif | |
enum Type | |
{ | |
Regular, | |
Infinity, | |
NaN, | |
Zero | |
}; /// signed infinity and nan require the extra X bit | |
Type type = Regular; | |
bool negativeSign = false; | |
ET exponent = 0; // with sign | |
FT fraction = 0; // this can be 52bit for holding double. | |
struct single_tag | |
{ | |
}; | |
explicit CONSTEXPR14 Unpacked(single_tag, uint32_t p) | |
{ | |
unpack_xfloati<single_trait>(p); | |
} | |
explicit constexpr Unpacked() {} | |
// assume regular | |
CONSTEXPR14 Unpacked normalized() const | |
{ | |
if (fraction == 0) | |
{ | |
return Unpacked(Zero, false); | |
} | |
else | |
{ | |
int k = findbitleftmostC(fraction); | |
return Unpacked(exponent - k, fraction << (k + 1), negativeSign); | |
} | |
} | |
explicit CONSTEXPR14 Unpacked(halffloat p) { unpack_half(p); } | |
explicit CONSTEXPR14 Unpacked(int i) { unpack_int(i); } | |
explicit CONSTEXPR14 Unpacked(Type t, bool anegativeSign = false) | |
: type(t), negativeSign(anegativeSign){}; | |
template <class Trait, typename = typename std::enable_if<std::is_integral< | |
typename Trait::value_t>::value>::type> | |
explicit CONSTEXPR14 Unpacked(typename Trait::value_t i) | |
{ | |
unpack_xfixed<Trait>(i); | |
} | |
template <class Trait, typename = typename std::enable_if<!std::is_integral< | |
typename Trait::value_t>::value>::type> | |
explicit CONSTEXPR14 Unpacked(typename Trait::holder_t i) | |
{ | |
unpack_xfloat<Trait>(i); | |
} | |
// expect 1.xxxxxx otherwise make it 0.xxxxxxxxx | |
explicit CONSTEXPR14 Unpacked(ET aexponent, FT afraction, bool anegativeSign) | |
: type(Regular), | |
negativeSign(anegativeSign), | |
exponent(aexponent), | |
fraction(afraction) {} | |
#ifndef FPGAHLS | |
explicit CONSTEXPR14 Unpacked(float p) | |
{ | |
unpack_float(p); | |
} | |
explicit CONSTEXPR14 Unpacked(double p) { unpack_double(p); } | |
CONSTEXPR14 Unpacked &unpack_float(float f) | |
{ | |
return unpack_xfloat<single_trait>(f); | |
} | |
CONSTEXPR14 Unpacked &unpack_double(double d) | |
{ | |
return unpack_xfloat<double_trait>(d); | |
} | |
constexpr operator float() const { return pack_xfloat<single_trait>(); } | |
constexpr operator double() const { return pack_xfloat<double_trait>(); } | |
#endif | |
CONSTEXPR14 Unpacked &unpack_half(halffloat d) | |
{ | |
return unpack_xfloat<half_trait>(d); | |
} | |
CONSTEXPR14 Unpacked &unpack_int(int i) | |
{ | |
return unpack_xfixed<fixedtrait<int, sizeof(int) * 8, 0>>(i); | |
} | |
constexpr operator halffloat() const { return pack_xfloat<half_trait>(); } | |
constexpr operator int() const | |
{ | |
return pack_xfixed<fixedtrait<int, sizeof(int) * 8, 0>>(); | |
} | |
template <class Trait> | |
CONSTEXPR14 typename Trait::holder_t pack_xfloati() const; | |
template <class Trait> | |
CONSTEXPR14 typename Trait::value_t pack_xfixed() const; | |
template <class Trait> | |
typename Trait::value_t pack_xfloat() const | |
{ | |
union { | |
typename Trait::holder_t i; | |
typename Trait::value_t f; | |
} uu; | |
uu.i = pack_xfloati<Trait>(); | |
return uu.f; | |
} | |
template <class T> | |
constexpr T pack_float() const | |
{ | |
return pack_xfloat<typename float2trait<T>::trait>(); | |
} | |
constexpr bool isInfinity() const { return type == Infinity; } | |
constexpr bool isRegular() const { return type == Regular; } | |
constexpr bool isNaN() const { return type == NaN; } | |
constexpr bool isZero() const { return type == Zero; } | |
constexpr bool isPositive() const { return !negativeSign; } | |
static constexpr Unpacked infinity() { return Unpacked(Infinity); } | |
static constexpr Unpacked pinfinity() { return Unpacked(Infinity, false); } | |
static constexpr Unpacked ninfinity() { return Unpacked(Infinity, true); } | |
static constexpr Unpacked nan() { return Unpacked(NaN); } | |
static constexpr Unpacked one() { return Unpacked(0, 0, false); } | |
static constexpr Unpacked zero() { return Unpacked(Zero); } | |
template <class Trait> | |
static constexpr Unpacked make_fixed(typename Trait::value_t x) | |
{ | |
return Unpacked().unpack_xfixed<Trait>(x); | |
} | |
template <class Trait> | |
static constexpr Unpacked make_floati(typename Trait::holder_t x) | |
{ | |
return Unpacked().unpack_xfloati<Trait>(x); | |
} | |
template <class Trait> | |
static constexpr Unpacked make_float(typename Trait::value_t x) | |
{ | |
return Unpacked().unpack_xfloat<Trait>(x); | |
} | |
constexpr bool operator==(const Unpacked &u) const | |
{ | |
// nan != nan ALWAYS | |
return type == NaN || u.type == NaN | |
? false | |
: negativeSign == u.negativeSign && type == u.type && | |
(type == Regular | |
? (exponent == u.exponent && fraction == u.fraction) | |
: true); | |
} | |
constexpr bool operator!=(const Unpacked &u) const | |
{ | |
// nan != nan ALWAYS | |
return type == NaN || u.type == NaN ? true : (*this == u); | |
} | |
constexpr Unpacked operator-() const | |
{ | |
return Unpacked(exponent, fraction, !negativeSign); | |
} | |
CONSTEXPR14 Unpacked inv() const | |
{ | |
switch (type) | |
{ | |
case Regular: | |
if (fraction == 0) | |
{ | |
// std::cout << "[exponent inversion " << std::dec << " exponent" << | |
// exponent << "] becomes " << -exponent << std::endl; | |
return Unpacked(-exponent, 0, negativeSign); | |
} | |
else | |
{ | |
// one == 0,0,false | |
// TODO FIX SIGN/INFINITY/NAN | |
// put hidden 1. in mantiss | |
POSIT_LUTYPE afrac = FT_leftmost_bit; | |
POSIT_LUTYPE bfrac = FT_leftmost_bit | (fraction >> 1); | |
// std::cout << "inversion " << std::hex << bfrac << " exponent" << | |
// exponent << std::endl; | |
auto exp = -exponent; | |
if (afrac < bfrac) | |
{ | |
exp--; | |
bfrac >>= 1; | |
} | |
return Unpacked( | |
exp, (((typename nextinttype<FT>::type)afrac) << FT_bits) / bfrac, | |
negativeSign); | |
// return one()/(*this); | |
} | |
break; | |
case Infinity: | |
return zero(); | |
case Zero: | |
return infinity(); | |
case NaN: | |
default: | |
return *this; | |
} | |
} | |
/// unpacks a value stored as fixed or integer. Value and holder match | |
template <class Trait> | |
CONSTEXPR14 Unpacked &unpack_xfixed(typename Trait::value_t value); | |
/// unpacks a floating point value as expressed by its holding type (uint32 | |
/// for single) | |
template <class Trait> | |
CONSTEXPR14 Unpacked &unpack_xfloati(typename Trait::holder_t value); | |
/// unpacks a floating point value by its value type (single) | |
template <class Trait> | |
Unpacked &unpack_xfloat(typename Trait::value_t value) // CANNOT be | |
// constexpr, except | |
// using the expensive | |
// float2bits | |
{ | |
union { | |
typename Trait::holder_t i; | |
typename Trait::value_t f; | |
} uu; | |
uu.f = value; | |
return unpack_xfloati<Trait>(uu.i); | |
} | |
CONSTEXPR14 friend Unpacked operator-(Unpacked a, Unpacked b) | |
{ | |
return a + (-b); | |
} | |
CONSTEXPR14 Unpacked &operator+=(const Unpacked &a) | |
{ | |
Unpacked r = *this + a; | |
*this = r; | |
return *this; | |
} | |
CONSTEXPR14 Unpacked &operator-=(const Unpacked &a) | |
{ | |
*this += (-a); | |
return *this; | |
} | |
// TODO overflow? | |
CONSTEXPR14 friend Unpacked operator+(Unpacked a, Unpacked b) | |
{ | |
// UnpackedDualSel(*,NaN) | |
// UnpackedDualSel(NaN,*) | |
if (a.isNaN() || b.isNaN()) | |
return a; | |
switch (UnpackedDualSel(a.type, b.type)) | |
{ | |
case UnpackedDualSel(Regular, Regular): | |
{ | |
auto dir = a.exponent - b.exponent; | |
const ET exp = (dir < 0 ? b.exponent : a.exponent) + 1; | |
// move right means increment exponent | |
// 1.xxxx => 0.1xxxxxx | |
// 1.yyyy => 0.1yyyyyy | |
POSIT_LUTYPE afrac1 = | |
(FT_leftmost_bit >> 1) | | |
(a.fraction >> 2); // denormalized and shifted right | |
POSIT_LUTYPE bfrac1 = (FT_leftmost_bit >> 1) | (b.fraction >> 2); | |
POSIT_LUTYPE afrac = dir < 0 | |
? (afrac1 >> -dir) | |
: afrac1; // denormalized and shifted right | |
POSIT_LUTYPE bfrac = dir < 0 ? bfrac1 : (bfrac1 >> dir); | |
// 1.xxxx => 0.1xxxxx => 0.0k 1 xxxx | |
// | |
// if dir==0 then: | |
// 0.1xxxxx | |
// 0.1yyyyy | |
// 1.zzzzzz | |
// | |
// but also | |
// 0.1xxxx | |
// 0.0001yyyy | |
// 0.1zzzz | |
// | |
// if 1. we easily normalize by shift | |
// if 0. we pre | |
int mode = | |
a.negativeSign == b.negativeSign ? 0 : afrac > bfrac ? 1 : -1; | |
bool osign = mode >= 0 ? a.negativeSign : b.negativeSign; | |
POSIT_LUTYPE frac = mode == 0 | |
? afrac + bfrac | |
: mode > 0 ? afrac - bfrac : bfrac - afrac; | |
return Unpacked(exp, frac, osign).normalized(); // pass denormalized | |
} | |
case UnpackedDualSel(Regular, Zero): | |
case UnpackedDualSel(Zero, Zero): | |
case UnpackedDualSel(Infinity, Zero): | |
case UnpackedDualSel(Infinity, Regular): | |
return a; | |
case UnpackedDualSel(Zero, Regular): | |
case UnpackedDualSel(Zero, Infinity): | |
case UnpackedDualSel(Regular, Infinity): | |
return b; | |
default: // case UnpackedDualSel(Infinity,Infinity): | |
return (a.negativeSign == b.negativeSign) ? a : nan(); | |
} | |
} | |
// https://www.edwardrosten.com/code/fp_template.html | |
// https://github.com/Melown/half | |
// TODO overflow? | |
CONSTEXPR14 friend Unpacked operator*(const Unpacked &a, const Unpacked &b) | |
{ | |
if (a.isNaN() || b.isNaN()) | |
return a; | |
switch (UnpackedDualSel(a.type, b.type)) | |
{ | |
case UnpackedDualSel(Regular, Regular): | |
{ | |
POSIT_LUTYPE afrac = FT_leftmost_bit | (a.fraction >> 1); | |
POSIT_LUTYPE bfrac = FT_leftmost_bit | (b.fraction >> 1); | |
auto frac = | |
((((typename nextinttype<FT>::type)afrac) * bfrac) >> FT_bits); | |
#ifdef FPGAHLS | |
#pragma HLS RESOURCE variable = frac core = Mul_LUT | |
#endif | |
bool q = (frac & FT_leftmost_bit) == 0; | |
auto rfrac = q ? (frac << 1) : frac; | |
auto exp = a.exponent + b.exponent + (q ? 0 : 1); | |
#if 0 | |
if ((frac & FT_leftmost_bit) == 0) { | |
exp--; | |
frac <<= 1; | |
} | |
#endif | |
return Unpacked(exp, rfrac << 1, a.negativeSign ^ b.negativeSign); | |
} | |
case UnpackedDualSel(Regular, Zero): | |
case UnpackedDualSel(Zero, Regular): | |
case UnpackedDualSel(Zero, Zero): | |
return zero(); | |
case UnpackedDualSel(Infinity, Zero): | |
case UnpackedDualSel(Zero, Infinity): | |
return nan(); | |
default: // case UnpackedDualSel(Infinity,Infinity): | |
// inf inf or inf reg or reg inf | |
return (a.negativeSign ^ b.negativeSign) ? ninfinity() : pinfinity(); | |
} | |
} | |
/** | |
* Division Truth Table | |
*/ | |
// TODO overflow? | |
CONSTEXPR14 friend Unpacked operator/(const Unpacked &a, const Unpacked &b) | |
{ | |
if (a.isNaN() || b.isNaN()) | |
return a; | |
// 9 more cases | |
switch (UnpackedDualSel(a.type, b.type)) | |
{ | |
case UnpackedDualSel(Regular, Regular): | |
{ | |
POSIT_LUTYPE afrac = FT_leftmost_bit | (a.fraction >> 1); | |
POSIT_LUTYPE bfrac1 = FT_leftmost_bit | (b.fraction >> 1); | |
auto exp = a.exponent - b.exponent + (afrac < bfrac1 ? -1 : 0); | |
POSIT_LUTYPE bfrac = afrac < bfrac1 ? (bfrac1 >> 1) : bfrac1; | |
/* | |
if (afrac < bfrac) { | |
exp--; | |
bfrac >>= 1; | |
} | |
*/ | |
return Unpacked( | |
exp, (((typename nextinttype<FT>::type)afrac) << FT_bits) / bfrac, | |
a.negativeSign ^ b.negativeSign); | |
} | |
case UnpackedDualSel(Zero, Zero): | |
case UnpackedDualSel(Infinity, Infinity): | |
return nan(); | |
case UnpackedDualSel(Zero, Infinity): | |
return zero(); | |
case UnpackedDualSel(Zero, Regular): | |
case UnpackedDualSel(Infinity, Zero): | |
return a; | |
case UnpackedDualSel(Regular, Zero): | |
return Unpacked(Unpacked::Infinity, a.negativeSign); | |
default: // case UnpackedDualSel(Infinity,Regular): | |
return (a.negativeSign ^ b.negativeSign) ? ninfinity() : pinfinity(); | |
} | |
} | |
#ifndef FPGAHLS | |
friend std::ostream &operator<<(std::ostream &ons, Unpacked const &o) | |
{ | |
switch (o.type) | |
{ | |
case Unpacked::Regular: | |
ons << "up(" << (o.negativeSign ? "-" : "+") | |
<< " exp (dec) = " << std::dec | |
<< typename printableinttype<const ET>::type(o.exponent) | |
<< " fraction (hex) = " << std::hex | |
<< typename printableinttype<const FT>::type(o.fraction) | |
<< " (bin) = " << std::dec | |
<< (std::bitset<sizeof(o.fraction) * 8>(o.fraction)) << ")"; | |
break; | |
case Unpacked::Infinity: | |
ons << (o.negativeSign ? "up(-infinity)" : "up(+infinity)"); | |
break; | |
case Unpacked::NaN: | |
ons << "up(nan)"; | |
break; | |
case Unpacked::Zero: | |
ons << "up(0)"; | |
break; | |
} | |
return ons; | |
} | |
#endif | |
}; | |
template <class FT, class ET> | |
template <class Trait> | |
CONSTEXPR14 Unpacked<FT, ET> &Unpacked<FT, ET>::unpack_xfixed( | |
typename Trait::value_t nx) | |
{ | |
// TODO: handle infinity or nan in Trait | |
if (nx != 0) | |
{ | |
using UT = typename std::make_unsigned<typename Trait::value_t>::type; | |
type = Regular; | |
negativeSign = nx < 0; | |
UT x = pcabs(nx); | |
const int p = Trait::totalbits - findbitleftmostC(x) - 1; // 31->0,0->31 | |
exponent = (p - Trait::fraction_bits); | |
UT ux = p == 0 ? 0 : (x << (Trait::totalbits - p)); | |
// UT x : 0[N-p-1] 1 ?[p] | |
// corner cases: | |
// x: 1 ?[N-1] | |
// x: 0[N-1] 1 | |
// UT ux: ?[p] 0[N-p] | |
// FT f: ?[min(p,size(FT)] 0[size(FT)-min(p,size(FT)] | |
// take all p bits rightmost of x and make them leftmost | |
fraction = cast_right_to_left<UT, Trait::totalbits, FT, FT_bits>()(ux); | |
} | |
else | |
{ | |
exponent = 0; | |
fraction = 0; | |
type = Zero; | |
negativeSign = false; | |
} | |
return *this; | |
} | |
// https://www.h-schmidt.net/FloatConverter/IEEE754.html | |
template <class FT, class ET> | |
template <class Trait> | |
CONSTEXPR14 Unpacked<FT, ET> &Unpacked<FT, ET>::unpack_xfloati( | |
typename Trait::holder_t value) | |
{ | |
ET rawexp = bitset_getT(value, Trait::fraction_bits, Trait::exponent_bits); | |
type = Regular; | |
negativeSign = | |
value & (((typename Trait::holder_t)1) << (Trait::data_bits - 1)); | |
exponent = rawexp - Trait::exponent_bias; // ((un.u >> Trait::fraction_bits) | |
// & Trait::exponent_mask) | |
// std::cout << "un.u is " << std::hex <<un.u << " for " << value << | |
// std::endl; std::cout << std::dec << "float trait: fraction bits " << | |
// Trait::fraction_bits << " exponent bits " << Trait::exponent_bits << " bias | |
// " << Trait::exponent_bias << " mask " << std::hex << Trait::exponent_mask<< | |
// std::endl; std::cout << std::hex << "exponent output " << std::hex << | |
// exponent << " " << std::dec << exponent << " fraction " << std::hex << | |
// fraction << std::endl; | |
// fractional part is LSB of the holder_t and of length | |
fraction = cast_right_to_left<typename Trait::holder_t, Trait::fraction_bits, | |
FT, FT_bits>()(value); | |
// if(FT_bits < Trait::fraction_bits) | |
// fraction = bitset_getT(value,0,Trait::fraction_bits) >> | |
//(Trait::fraction_bits-FT_bits); else fraction = | |
//((POSIT_LUTYPE)bitset_getT(value,0,Trait::fraction_bits)) << | |
//(FT_bits-Trait::fraction_bits); | |
// stored exponent: 0, x, exponent_mask === 0, any, infinity | |
// biased: -max, -max+1, ..., max, max+1 === 0, min, ..., max, infinity | |
if (rawexp == ((1 << Trait::exponent_bits) - 1)) // AKA 128 for single | |
{ | |
if (fraction == 0) | |
{ | |
type = Infinity; | |
} | |
else | |
{ | |
type = NaN; // don't care which | |
} | |
} | |
else if (rawexp == 0) | |
{ | |
// normalized | |
if (!Trait::with_denorm || fraction == 0) | |
{ | |
type = Zero; | |
negativeSign = false; // don't care signed zero | |
} | |
// denormalized | |
else | |
{ | |
int k = findbitleftmostC(fraction); | |
exponent -= k; | |
fraction <<= (k + 1); | |
//std::cout << typeid(Trait).name() << "unpacking: denormalized (rawexp=0,fraction=" << (int)tmp << ") unpacked as (exp=" << exponent << ",fraction=" << (int)fraction << ")"<< std::endl; | |
} | |
} | |
return *this; | |
} | |
template <int abits, class AT, int bbits, class BT, bool abits_gt_bbits, AT msb> | |
struct fraction_bit_extract | |
{ | |
}; | |
/// specialization when abits >= bbits | |
template <int abits, class AT, int bbits, class BT, AT msb> | |
struct fraction_bit_extract<abits, AT, bbits, BT, true, msb> | |
{ | |
static constexpr BT packdenorm(AT fraction) | |
{ | |
// expand the fractiona part | |
return (msb | (fraction >> 1)) >> (abits - bbits); | |
} | |
static constexpr BT pack(AT fraction) | |
{ | |
return bitset_getT(fraction, abits - bbits, bbits); | |
} | |
}; | |
/// specialization when bbits >= abits | |
template <int abits, class AT, int bbits, class BT, AT msb> | |
struct fraction_bit_extract<abits, AT, bbits, BT, false, msb> | |
{ | |
static constexpr BT packdenorm(AT fraction) | |
{ | |
return ((BT)(msb | (fraction >> 1)) << (bbits - abits)); | |
} | |
static constexpr BT pack(AT fraction) | |
{ | |
return ((BT)fraction) << (bbits - abits); | |
} | |
}; | |
/** | |
* Convert (s,2**E,F) to int | |
*/ | |
template <class FT, class ET> | |
template <class Trait> | |
CONSTEXPR14 typename Trait::value_t Unpacked<FT, ET>::pack_xfixed() const | |
{ | |
switch (type) | |
{ | |
case Infinity: | |
return 0; | |
case Zero: | |
return 0; | |
case NaN: | |
return 0; | |
default: | |
break; | |
} | |
constexpr int intbits = Trait::totalbits - Trait::fraction_bits; | |
if (exponent >= intbits) | |
{ | |
return negativeSign ? std::numeric_limits<typename Trait::value_t>::lowest() | |
: std::numeric_limits<typename Trait::value_t>::max(); | |
} | |
else if (exponent < -Trait::fraction_bits) | |
{ | |
return 0; | |
} | |
else | |
{ | |
using ST = typename Trait::value_t; | |
using UT = typename std::make_unsigned<ST>::type; | |
// fraction 1.xxxxx from left aligned over FT bytes to UT bytes still left | |
// aligned over Trait::totalbits | |
UT f = fraction_bit_extract<FT_bits, FT, Trait::totalbits, UT, | |
(FT_bits > Trait::totalbits), | |
FT_leftmost_bit>::pack(fraction); | |
// add the 1 bit for the current exponent | |
// f[totalbits] -> 0[intbits-exponent+1] 1 | |
// f[exponent+Trait::fraction_bits-1] | |
// | |
// extrema: e.g. for totalbits=32, whatever fraction | |
// - exponent==-Trait::fraction_bits ==> 1 | 0 | |
// - exponent==intbits-1 ==> 0x8000000 | (F >> 1) | |
ST r = (ST(1) << (exponent + Trait::fraction_bits)) | | |
(ST)(f >> (intbits - exponent)); | |
return negativeSign ? -r : r; | |
} | |
} | |
template <class FT, class ET> | |
template <class Trait> | |
CONSTEXPR14 typename Trait::holder_t Unpacked<FT, ET>::pack_xfloati() const | |
{ | |
switch (type) | |
{ | |
case Infinity: | |
return negativeSign ? Trait::ninfinity_h : Trait::pinfinity_h; | |
case Zero: | |
return 0; | |
case NaN: | |
return Trait::nan_h; | |
; // it will cast to double TODO: it will cast to value_t | |
default: | |
break; | |
} | |
largest_type<ET, typename int_least_bits<Trait::exponent_bits>::type> fexp = | |
exponent; | |
fexp += Trait::exponent_bias; | |
// left aligned | |
typename Trait::holder_t fexpbits = 0; | |
typename Trait::holder_t ffracbits = 0; | |
if (fexp > Trait::exponent_max) // this is RAW exponent max | |
{ | |
return negativeSign ? Trait::lowest_h : Trait::max_h; | |
// overflow, set as MAX | |
// fexpbits = ((typename Trait::holder_t)Trait::exponent_max) << | |
// (Trait::fraction_bits); // AKA 254 and 23 ffracbits = -1; | |
} | |
else if (fexp < 1) | |
{ | |
if (Trait::with_denorm) | |
return 0; | |
else | |
{ | |
// denormalized numbers are and happen when raw exponent is below 1 | |
// 0.fractionbits | |
// | |
// 2^exponent * fraction ==> 0.rawfraction | |
// | |
// approach: | |
// FT_bits >= Trait::fraction_bits | |
// FT_leftmost_bit| (fraction>>) | |
// FT_bits < Trait::fraction_bits | |
ffracbits = fraction_bit_extract<FT_bits, FT, Trait::fraction_bits, | |
typename Trait::holder_t, | |
(FT_bits > Trait::fraction_bits), | |
FT_leftmost_bit>::packdenorm(fraction); | |
// use denormalization | |
ffracbits >>= -fexp; | |
//std::cout << typeid(Trait).name()<< "pack - denormalized (exp=" << exponent << ",fraction=" << (int)fraction << ") means rawexp=" << fexp << " results in fraction=" << (int)ffracbits << "\n"; | |
} | |
} | |
else // normal | |
{ | |
fexpbits = ((typename Trait::holder_t)(fexp & Trait::exponent_mask)) | |
<< (Trait::fraction_bits); | |
ffracbits = fraction_bit_extract< | |
FT_bits, FT, Trait::fraction_bits, typename Trait::holder_t, | |
(FT_bits > Trait::fraction_bits), FT_leftmost_bit>::pack(fraction); | |
} | |
typename Trait::holder_t value = | |
ffracbits | fexpbits | (negativeSign ? Trait::signbit : 0); | |
// don't underflow to zero? | |
if (value != 0 && (value << 1) == 0) | |
{ | |
value++; | |
} | |
return value; | |
} | |
template <class SrcTrait, class DstTrait, class FT> | |
constexpr typename DstTrait::holder_t convertfloats( | |
typename SrcTrait::holder_t src) | |
{ | |
return Unpacked<typename largest_type<typename SrcTrait::holder_t, | |
typename DstTrait::holder_t>::type, | |
int>::template make_floati<SrcTrait>(src) | |
.template pack_xfloati<DstTrait>(); | |
} | |
/** | |
* Emanuele Ruffaldi (C) 2017 | |
* Templated C++ Posit | |
Test | |
using X=Posit<int32_t,4,0,uint32_t>; | |
X::PT::decode_posit_rs(1) | |
*/ | |
#ifndef FPGAHLS | |
inline float uint32_to_float(uint32_t i) | |
{ | |
union { | |
float f; | |
uint32_t i; | |
} x; | |
x.i = i; | |
return x.f; | |
} | |
#endif | |
enum class PositSpec { WithNan, WithInf, WithNanInf}; | |
template <class T, int totalbits, int esbits, PositSpec positspec_ > | |
struct PositTrait | |
{ | |
static_assert(std::is_signed<T>::value,"required signed T"); | |
static_assert(sizeof(T)*8 >= totalbits,"required enough storage T for provided bits SREF"); | |
static_assert(esbits <= totalbits-3,"esbits should be at most N-3 for the cases [x01,E] and [x10,E]"); | |
using POSIT_STYPE = typename std::make_signed<T>::type; | |
using POSIT_UTYPE = typename std::make_unsigned<T>::type; | |
static constexpr PositSpec positspec = positspec_; | |
static constexpr bool withnan = positspec_ != PositSpec::WithInf; | |
using exponenttype = typename std::conditional<(totalbits+esbits >= sizeof(T)*8),typename nextinttype<T>::type,T>::type; | |
//enum : POSIT_UTYPE { | |
static constexpr POSIT_UTYPE POSIT_MAXREGIME_BITS = totalbits-1; | |
static constexpr POSIT_UTYPE POIST_ONEHELPER = 1; | |
static constexpr POSIT_UTYPE POSIT_HOLDER_SIZE = sizeof(T)*8; | |
static constexpr POSIT_UTYPE POSIT_SIZE = totalbits; | |
static constexpr POSIT_UTYPE POSIT_ESP_SIZE = esbits; | |
static constexpr POSIT_UTYPE POSIT_MSB = POIST_ONEHELPER<<(totalbits-1); | |
static constexpr POSIT_UTYPE POSIT_HOLDER_MSB = POIST_ONEHELPER<<(POSIT_HOLDER_SIZE-1); | |
static constexpr POSIT_UTYPE POSIT_MASK = ((POSIT_MSB-1)|(POSIT_MSB)); | |
static constexpr POSIT_UTYPE POSIT_ESP_MASK = (POIST_ONEHELPER<< esbits)-1; | |
//POSIT_HOLDER_MSB = 1U<<(POSIT_HOLDER_SIZE-1), | |
//POSIT_HOLDER_MASK = ((POSIT_HOLDER_SIZE-1)|(POSIT_HOLDER_SIZE)), | |
static constexpr POSIT_UTYPE POSIT_EXTRA_BITS = POSIT_HOLDER_SIZE-totalbits; | |
static constexpr POSIT_UTYPE POSIT_SIGNBIT = (POIST_ONEHELPER<<(totalbits-1)); // bit | |
static constexpr POSIT_UTYPE POSIT_INVERTBIT = (POIST_ONEHELPER<<(totalbits-2)); | |
static constexpr POSIT_STYPE POSIT_REG_SCALE = 1<<esbits; | |
// these are portable ways for representing 10000000 and the two adjacents numbers in | |
// the posit circle | |
static constexpr POSIT_STYPE _POSIT_TOP = (POSIT_STYPE)((POSIT_UTYPE(~0) << (totalbits-1))); | |
static constexpr POSIT_STYPE _POSIT_TOPRIGHT = (POSIT_STYPE)((POIST_ONEHELPER<< (totalbits-1))-1); | |
static constexpr POSIT_STYPE _POSIT_TOPLEFT = (POSIT_STYPE)((POSIT_UTYPE(~0) << (totalbits-1)))+1; | |
// Without Nan (classic Posit): there only one Infinity | |
// With NaN: the top element is NaN and then its adjacents correspond to +- Infinity | |
static constexpr POSIT_STYPE POSIT_PINF = positspec_ == PositSpec::WithNanInf ? _POSIT_TOPRIGHT: _POSIT_TOP ; // 1[sign] 000000 or N-1 111 bits | |
static constexpr POSIT_STYPE POSIT_NINF = positspec_ == PositSpec::WithNanInf ? _POSIT_TOPLEFT: _POSIT_TOP; | |
static constexpr POSIT_STYPE POSIT_NAN = _POSIT_TOP; // infinity in withnan=false otherwise it is truly nan | |
static constexpr POSIT_STYPE POSIT_ONE = POSIT_INVERTBIT; // fine due to position of invert bit | |
static constexpr POSIT_STYPE POSIT_MONE = -POSIT_ONE ; // minus one | |
// Two | |
static constexpr POSIT_STYPE POSIT_TWO = (POSIT_INVERTBIT | (POSIT_INVERTBIT>>(1+esbits))); | |
// 1/2 | |
// 00 1[esbits+1] 0[N-2-esbitis-1] | |
static constexpr POSIT_STYPE POSIT_HALF = POSIT_STYPE( (POSIT_UTYPE(-1) >> (totalbits-esbits-1))) << (totalbits-3-esbits); | |
// max value below Infinity | |
// 1[holder-total] 1 0[total-1] | |
static constexpr POSIT_STYPE POSIT_MAXPOS = _POSIT_TOPRIGHT - (positspec_ == PositSpec::WithNanInf ? 1:0); | |
// min value above -Infinity | |
// 0[holder-total] 0 1[total-1] | |
static constexpr POSIT_STYPE POSIT_MINNEG = _POSIT_TOPLEFT + (positspec_ == PositSpec::WithNanInf? 1:0); | |
// minimal number above zero | |
static constexpr POSIT_STYPE POSIT_AFTER0 = 1; // right to 0 | |
static constexpr POSIT_STYPE POSIT_BEFORE0 = -POSIT_AFTER0; // left to 0 | |
//static constexpr exponenttype maxexponent = withnan_ ? POSIT_REG_SCALE * (POSIT_SIZE - 3) : POSIT_REG_SCALE * (POSIT_SIZE - 2); // sign+1st rs | |
//static constexpr exponenttype minexponent = (-((exponenttype)POSIT_REG_SCALE) * (POSIT_SIZE - 2)) // sign+1st rs | |
static constexpr exponenttype maxexponent() { return positspec_ == PositSpec::WithNanInf ? POSIT_REG_SCALE * (totalbits - 3) : POSIT_REG_SCALE * (totalbits - 2); } | |
static constexpr exponenttype minexponent() { return (-((exponenttype)POSIT_REG_SCALE) * (totalbits - 2)) ; } | |
//enum : exponenttype{ | |
//}; | |
//static constexpr POSIT_UTYPE LMASK(POSIT_UTYPE bits, POSIT_UTYPE size) | |
//{ return ((bits) & (POSIT_MASK << (POSIT_SIZE - (size)))); } | |
// pars is T_left | |
static CONSTEXPR14 std::pair<int,int> decode_posit_rs(T pars) | |
{ | |
const bool x = (pars & POSIT_HOLDER_MSB) != 0; // marker bit for > 1 | |
int aindex = x ? (~pars == 0 ? POSIT_MAXREGIME_BITS : findbitleftmostC((POSIT_UTYPE)~pars)) : (pars == 0 ? POSIT_MAXREGIME_BITS : findbitleftmostC((POSIT_UTYPE)pars)); // index is LAST with !x | |
int index = aindex; // aindex > POSIT_SIZE ? POSIT_SIZE : aindex; | |
int reg = x ? index-1 : -index; | |
int rs = POSIT_MAXREGIME_BITS < index+1 ? POSIT_MAXREGIME_BITS : index+1; //std::min((int)POSIT_MAXREGIME_BITS,index+1); | |
/** | |
if(x) | |
{ | |
pars = ~pars; | |
if(!pars) | |
{ | |
return { POSIT_MAXREGIME_BITS-1, POSIT_MAXREGIME_BITS }; | |
} | |
else | |
{ | |
int index = findbitleftmostC((POSIT_UTYPE)pars)); | |
return { index-1, index+1 }; | |
} | |
} | |
else | |
{ | |
if(!pars) | |
{ | |
return { -(int)POSIT_MAXREGIME_BITS, POSIT_MAXREGIME_BITS }; | |
} | |
else | |
{ | |
int index = findbitleftmostC((POSIT_UTYPE)pars)); | |
return { -index, index+1 }; | |
} | |
} | |
*/ | |
//std::cout << "decode posit " << std::hex << std::bitset<sizeof(T)*8>(pars) << " 1first " << x << " index " << index << "(aindex " << aindex << ") rs " << rs << "reg " << reg << std::endl; | |
return {reg,rs}; | |
} | |
static constexpr std::pair<POSIT_STYPE,POSIT_UTYPE> split_reg_exp(exponenttype eexponent) | |
{ | |
// FLOORDIV(a,b) ((a) / (b) - ((a) % (b) < 0)) | |
// int reg = FLOORDIV(up.exp, POW2(es)); | |
// POSIT_UTYPE exp = up.exp - POW2(es) * reg; | |
return {eexponent >> POSIT_ESP_SIZE, eexponent & POSIT_ESP_MASK }; | |
} | |
/// compiler note: it generate simply: shl and or | |
static constexpr exponenttype join_reg_exp(POSIT_STYPE reg, POSIT_UTYPE exp) | |
{ | |
return (((exponenttype)reg) * (1<<POSIT_ESP_SIZE))|exp; | |
} | |
}; | |
//template <class T, int totalbits, int esbits, PositSpec positspec_ > | |
//constexpr typename PositTrait<T,totalbits,esbits,withnan_>::exponenttype PositTrait<T,totalbits,esbits,withnan_>::minexponent; | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
class Posit; | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto unpack_posit(const Posit<T,totalbits,esbits,FT,positspec> & p) -> typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT ; | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> pack_posit(const typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT & x); | |
/** | |
* Minimal Unpacked representaiton of the Posit | |
* UT is UnpackedT | |
* PT is the Trait | |
*/ | |
template <class UT, class PT> | |
struct UnpackedLow_t | |
{ | |
using Type = typename UT::Type; | |
constexpr UnpackedLow_t(Type t): type(t), negativeSign(false), regime(0),fraction(0),exp(0) {} | |
constexpr UnpackedLow_t(Type t, bool anegativeSign): type(t), negativeSign(anegativeSign), regime(0),fraction(0),exp(0) {} | |
constexpr UnpackedLow_t(bool n, typename PT::POSIT_STYPE r, typename PT::POSIT_UTYPE e, typename PT::POSIT_UTYPE f): | |
type(UT::Regular),negativeSign(n), regime(r), exp(e), fraction(f) {} | |
Type type; | |
bool negativeSign; // for Regular and Infinity if applicabl | |
typename PT::POSIT_STYPE regime; // decoded with sign | |
typename PT::POSIT_UTYPE exp; // decoded | |
typename PT::POSIT_UTYPE fraction; // fraction left aligned without 1. | |
}; | |
/** | |
* Stores the data in the MSB totalbits of T | |
* Uses esbits bits | |
* | |
* Notes: | |
* \tparam T is the holding type that has to be signed due to complement 2 sign method | |
* \tparam totalbits is the significant bits of posit stored in T right aligned. Due to the 2 complement scheme the MSB bits are extension of the sign | |
* \tparam esbits is the size of the exponent | |
* \tparam FT is the unisgned type holding the fraction with the 1 explicity specified | |
* | |
*/ | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
class Posit | |
{ | |
public: | |
using PT=PositTrait<T,totalbits,esbits,positspec>; | |
static_assert(std::is_unsigned<FT>::value,"required unsigned FT"); | |
enum { vtotalbits = totalbits, vesbits = esbits}; | |
struct DeepInit{}; | |
static constexpr bool withnan = positspec != PositSpec::WithInf; | |
using value_t=T; | |
using fraction_t=FT; | |
using UnpackedT=Unpacked<FT,typename PT::exponenttype>; | |
using UnpackedLow = UnpackedLow_t<UnpackedT,PT>; | |
using exponenttype = typename PT::exponenttype; | |
T v; // index in the N2 space | |
struct PositMul | |
{ | |
constexpr PositMul(Posit av, Posit bv) : a(av),b(bv) {} | |
Posit a,b; | |
constexpr Posit asPosit() const { return pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()*b.unpack()); } | |
constexpr operator Posit() const { return asPosit(); } | |
constexpr operator UnpackedT() const { return asPosit(); } | |
#ifndef FPGAHLS | |
constexpr operator float() const { return asPosit(); } | |
constexpr operator double() const { return asPosit(); } | |
#endif | |
constexpr operator int() const { return asPosit(); } | |
// pa.a*pa.b+pb.a*pb.b => | |
friend constexpr Posit operator+(const PositMul & pa, const PositMul & pb) | |
{ | |
return pack_posit<T,totalbits,esbits,FT,positspec>(pa.a.unpack()*pa.b.unpack()+pb.a.unpack()*pb.b.unpack()); | |
} | |
// missing operators | |
// & | |
// - | |
}; | |
CONSTEXPR14 Posit half() const; | |
CONSTEXPR14 Posit twice() const; | |
CONSTEXPR14 UnpackedLow unpack_low() const; | |
static CONSTEXPR14 Posit pack_low(UnpackedLow); | |
static CONSTEXPR14 UnpackedT unpacked_low2full(UnpackedLow x); | |
static CONSTEXPR14 UnpackedLow unpacked_full2low(UnpackedT tx); | |
/// diagnostics with full details | |
struct info | |
{ | |
bool infinity = false; | |
bool nan = false; | |
int k = 0; | |
bool sign = false; // true if negative | |
double regime = 0; | |
int rs = 0; | |
int es = 0; | |
int fs = 0; | |
int exp = 0; | |
int exponent = 0; | |
FT ifraction = 0; | |
double fraction = 0; | |
double value = 0; | |
}; | |
info analyze(); | |
friend constexpr bool operator == (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v == u.v; } | |
friend constexpr bool operator != (const Posit & a, const Posit & u) { return !(a == u); } | |
friend constexpr bool operator < (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v < u.v;; } | |
friend constexpr bool operator <= (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v <= u.v; } | |
friend constexpr bool operator > (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v > u.v; } | |
friend constexpr bool operator >= (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v >= u.v; } | |
static constexpr Posit ldexp(const Posit & u, int exp); // exponent product | |
using single_tag = typename UnpackedT::single_tag; | |
constexpr Posit() : v(0) {} | |
CONSTEXPR14 explicit Posit(single_tag t, uint32_t p) { v = pack_posit<T,totalbits,esbits,FT,positspec>(UnpackedT(t,p)).v; } | |
/// construct passing the holding type x | |
CONSTEXPR14 explicit Posit(DeepInit, T x) : v(x) {} | |
/// construct from decomposed (s, R,E,F) | |
CONSTEXPR14 explicit Posit(UnpackedLow u) : v(pack_low(u).v) {} | |
/// construct from fully unpacked floating (s,e,F) | |
CONSTEXPR14 explicit Posit(UnpackedT u) : v(pack_posit<T,totalbits,esbits,FT,positspec>(u).v) {} | |
#ifndef FPGAHLS | |
CONSTEXPR14 explicit Posit(float f): Posit(UnpackedT(f)) {} | |
CONSTEXPR14 explicit Posit(double d): Posit(UnpackedT(d)) {} | |
#endif | |
CONSTEXPR14 Posit(int i): Posit(UnpackedT(i)) {} | |
constexpr UnpackedT unpack() const { return unpack_posit<T,totalbits,esbits,FT,positspec>(*this); } | |
/// absolute value | |
/// TODO: use (v ^ mask) - mask OR (x+mask)^nasj | |
/// where int const mask = v >> sizeof(int) * CHAR_BIT - 1; | |
constexpr Posit abs() const { return Posit(DeepInit(),pcabs(v)); } // could be >= infinity because infinity is sign symmetric | |
/// negation | |
constexpr Posit neg() const { return Posit(DeepInit(),-v); }; | |
/// 1/x | |
CONSTEXPR14 Posit inv() const; | |
// SFINAE optionally: template<typename U = T, class = typename std::enable_if<withnan, U>::type> | |
constexpr bool hasNaN() const { return positspec != PositSpec::WithInf; } | |
constexpr bool isNaN() const { return positspec != PositSpec::WithInf && v == PT::POSIT_NAN; } | |
constexpr bool isnegative() const { return v < 0; } //(v &POSIT_SIGNBIT) != 0; } | |
constexpr bool isinfinity() const { return positspec != PositSpec::WithNan && (v == PT::POSIT_PINF || v == PT::POSIT_NINF); } | |
constexpr bool iszero() const { return v == 0; } | |
constexpr bool isone() const { return v == PT::POSIT_ONE; } | |
constexpr Posit prev() const { return Posit(DeepInit(),v > PT::POSIT_MAXPOS || v <= PT::POSIT_MINNEG ? v : v-1); } | |
constexpr Posit next() const { return Posit(DeepInit(),v <= PT::POSIT_MINNEG || v > PT::POSIT_MAXPOS ? v : v+1); } | |
//TBDconstexpr bool isNaN() const; | |
//TBD constexpr bool isexact() const { return (v&1) == 0; } | |
//TBD constexpr bool isfractional() const { return v > 0 && (abs().v < (N2>>2)); } // (0 < x < 1) or (-1 < x < 0) == (-1,1) removing 0 | |
//TBD constexpr bool isstrictlynegative() const { return v > (N2>>1); } // -inf < x < 0 | |
// Level 1: unpacked | |
// Level 0: something using posit specialties | |
friend constexpr PositMul operator*(const Posit & a, const Posit & b) | |
{ | |
return PositMul(a,b); | |
} | |
friend constexpr Posit operator+(const Posit & a, const PositMul & b) | |
{ | |
return fma(b.a,b.b,a); | |
} | |
friend constexpr Posit operator+(const PositMul & a, const Posit & b) | |
{ | |
return fma(a.a,a.b,b); | |
} | |
friend constexpr Posit fma(const Posit & a, const Posit & b, const Posit & c) | |
{ | |
return pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()*b.unpack()+c.unpack()); | |
} | |
CONSTEXPR14 Posit & operator*= (const Posit & b) | |
{ | |
*this = pack_posit<T,totalbits,esbits,FT,positspec>(unpack()*b.unpack()); | |
return *this; | |
} | |
friend constexpr Posit operator+(const Posit & a, const Posit & b) | |
{ | |
return a.iszero() ? b : b.iszero() ? a: pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()+b.unpack()); | |
} | |
Posit& operator+=(const Posit &a) { Posit r = *this+a; v = r.v; return *this; } | |
static constexpr Posit zero() { return Posit(DeepInit(),0); } | |
static constexpr Posit inf() { return Posit(DeepInit(),PT::POSIT_PINF); } | |
static constexpr Posit pinf() { return Posit(DeepInit(),PT::POSIT_PINF); } | |
static constexpr Posit ninf() { return Posit(DeepInit(),PT::POSIT_NINF); } | |
static constexpr Posit max() { return Posit(DeepInit(),PT::POSIT_MAXPOS); } | |
static constexpr Posit min() { return Posit(DeepInit(),PT::POSIT_AFTER0); } | |
static constexpr Posit lowest() { return Posit(DeepInit(),PT::POSIT_MINNEG); } | |
// SFINAE optionally: template<typename U = T, class = typename std::enable_if<withnan, U>::type> | |
static constexpr Posit nan() { return Posit(DeepInit(),PT::POSIT_NAN); } | |
static constexpr Posit infinity() { return Posit(DeepInit(),PT::POSIT_PINF); } | |
static constexpr Posit one() { return Posit(DeepInit(),PT::POSIT_ONE); } | |
static constexpr Posit two() { return Posit(DeepInit(),PT::POSIT_TWO); } | |
static constexpr Posit mone() { return Posit(DeepInit(),PT::POSIT_MONE); } | |
static constexpr Posit onehalf() { return Posit(DeepInit(),PT::POSIT_HALF); } | |
// custom operators | |
constexpr Posit operator-() const { return neg(); } | |
constexpr Posit operator~() const { return inv(); } | |
friend constexpr Posit operator-(const Posit & a, const Posit & b) { return a + (-b); } | |
friend constexpr Posit operator/(const Posit & a, const Posit & b) { return pack_posit< T,totalbits,esbits,FT,positspec> (a.unpack()/b.unpack()); } | |
/* | |
void setBits(POSIT_UTYPE bits) | |
{ | |
if(bits & POSIT_SIGNBIT) | |
{ | |
v = ((~0) << POSIT_SIZE) | bits; | |
} | |
else | |
{ | |
v = bits; | |
} | |
} | |
*/ | |
// slowproduct | |
// slowsum | |
// exactvalue | |
// iostream | |
// sqrt | |
// exp | |
// conversion | |
// max | |
constexpr uint32_t as_float_bin() const { return unpack().template pack_xfloati<single_trait>(); } | |
constexpr operator UnpackedT() const { return unpack(); } | |
#ifndef FPGAHLS | |
constexpr operator float() const { return unpack(); } | |
constexpr operator double() const { return unpack(); } | |
#endif | |
constexpr operator int() const { return unpack(); } | |
/// 1/(exp(-x)+1) | |
/// TODO: infintity check + __round of result | |
constexpr Posit pseudosigmoid() const { return Posit(DeepInit(),(v ^ PT::POSIT_SIGNBIT) >> 2); }; | |
/// ln(1+exp(x)) | |
constexpr Posit pseudosoftplus() const { return Posit(DeepInit(),(v ^ PT::POSIT_SIGNBIT) >> 1); }; | |
/// returns true if in [0,1] | |
constexpr bool isUnitRange() const { return v >= 0 && v <= PT::POSIT_ONE; }; | |
/// unitary range 1-x | |
constexpr Posit urOneMinus() const { return Posit(DeepInit(),PT::POSIT_INVERTBIT-v); } | |
/// unitary range x(1-x) | |
constexpr Posit urDeltaPs() const { return (*this)*urOneMinus(); } | |
struct FullWriter | |
{ | |
FullWriter(T x): ax(x) {} | |
T ax; | |
}; | |
FullWriter describe() const { return FullWriter(v); } | |
#ifndef FPGAHLS | |
friend std::ostream & operator << (std::ostream &ons, const FullWriter & w) | |
{ | |
return ons; | |
} | |
#endif | |
}; | |
#if 0 | |
//template <class T,int totalbits, int esbits, class FT, PositSpec positspec, class Trait> | |
template <class T, int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec>::Posit(int xvalue) | |
{ | |
using Trait=PT; | |
using POSIT_UTYPE = typename PT::POSIT_UTYPE; | |
using POSIT_STYPE = typename PT::POSIT_STYPE; | |
using UT=Unpacked<FT,typename PT::exponenttype>; | |
if(xvalue == 0) | |
{ | |
v = 0; | |
} | |
else | |
{ | |
bool negativeSign = xvalue < 0; | |
int value = xvalue < 0 ? -xvalue: xvalue; | |
auto exponentF = rawexp - Trait::exponent_bias; // ((un.u >> Trait::fraction_bits) & Trait::exponent_mask) | |
auto fractionF = cast_right_to_left<typename Trait::holder_t,Trait::fraction_bits,FT,UT::FT_bits>()(value); | |
if(rawexp == ((1 << Trait::exponent_bits)-1)) | |
{ | |
if(fractionF == 0) | |
{ | |
return PP(typename PP::DeepInit(),negativeSign ? PT::POSIT_NINF : PT::POSIT_PINF); | |
} | |
else | |
{ | |
return PP(typename PP::DeepInit(),PT::POSIT_NAN); | |
} | |
} | |
else if (rawexp == 0) | |
{ | |
if(fractionF == 0) | |
{ | |
negativeSign = false; | |
return PP::zero(); | |
} | |
else | |
{ | |
int k = findbitleftmostC(fractionF); | |
exponentF -= k; | |
fractionF <<= k+1; // plus normalization | |
} | |
} | |
// Phase 3: compute low as regime (Unpacked_Low) | |
auto eexponent = clamp<decltype(exponentF)>(exponentF,PT::minexponent,PT::maxexponent); // no overflow | |
auto rr = PT::split_reg_exp(exponentF); | |
auto fraction = cast_msb<FT,sizeof(FT)*8,typename PT::POSIT_UTYPE,sizeof(typename PT::POSIT_UTYPE)*8>()(fractionF); | |
auto reg = rr.first; | |
auto exp = rr.second; | |
// Phase 4: UnpackedLow to Posit | |
auto rs = std::max(-reg + 1, reg + 2); | |
auto es = std::min((int)(totalbits-rs-1),(int)esbits); | |
POSIT_UTYPE regbits = reg < 0 ? (PT::POSIT_HOLDER_MSB >> -reg) : (PT::POSIT_MASK << (PT::POSIT_HOLDER_SIZE-(reg+1))); // reg+1 bits on the left | |
POSIT_UTYPE eexp = msb_exp<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,esbits,(esbits == 00)>()(exp); | |
POSIT_STYPE p = ((fraction >> (rs+es+1)) | (eexp >> (rs+1)) | (regbits>>1)) >> (sizeof(PP)*8-totalbits); | |
return PP(typename PP::DeepInit(),negativeSign ? -p : p); | |
} | |
} | |
#endif | |
#ifndef FPGAHLS | |
template <class T, int totalbits, int esbits, class FT, PositSpec positspec> | |
std::ostream & operator << (std::ostream & ons, Posit<T,totalbits,esbits,FT,positspec> const & o) | |
{ | |
ons << o.unpack(); | |
return ons; | |
} | |
#endif | |
/// Level 1: -exponent of unpacked | |
/// Level 0: flip bits of rs | |
template <class T, int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::inv() const -> Posit | |
{ | |
auto u = unpack_low(); | |
if(u.fraction == 0) | |
{ | |
// 2^(R scale + E) with E unsigned | |
// becomes -R scale + (scale-E) | |
if(u.exp == 0) | |
u.regime = -u.regime; | |
else | |
{ | |
u.regime = -(u.regime+1); | |
u.exp = PT::POSIT_REG_SCALE-u.exp; // NOP if esbits == 0 | |
} | |
return pack_low(u); | |
} | |
else | |
{ | |
return pack_posit< T,totalbits,esbits,FT,positspec> (unpacked_low2full(u).inv()); | |
} | |
} | |
template <class T, int totalbits, int esbits, class FT, PositSpec positspec> | |
constexpr Posit<T,totalbits,esbits,FT,positspec> neg(Posit<T,totalbits,esbits,FT,positspec> x) { return -x; } | |
template <class T, int totalbits, int esbits, class FT, PositSpec positspec> | |
constexpr Posit<T,totalbits,esbits,FT,positspec> inv(Posit<T,totalbits,esbits,FT,positspec> x) { return ~x; } | |
template <class T, int hbits,int ebits, bool zeroes> | |
struct msb_exp | |
{ | |
}; | |
template <class T, int hbits,int ebits> | |
struct msb_exp<T,hbits,ebits,true> | |
{ | |
constexpr T operator()(T) const | |
{ | |
return 0; | |
} | |
}; | |
template <class T, int hbits,int ebits> | |
struct msb_exp<T,hbits,ebits,false> | |
{ | |
constexpr T operator()(T exp) const | |
{ | |
return exp << (hbits-ebits); | |
} | |
}; | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpack_low() const -> UnpackedLow | |
{ | |
using PT=PositTrait<T,totalbits,esbits,positspec>; | |
using POSIT_UTYPE = typename PT::POSIT_UTYPE; | |
//using POSIT_STYPE = typename PT::POSIT_STYPE; | |
if(isinfinity()) // infinity | |
{ | |
return UnpackedLow(UnpackedT::Infinity, v < 0); | |
} | |
else if(isNaN()) | |
{ | |
return UnpackedLow(UnpackedT::NaN); | |
} | |
else if(v == 0) | |
return UnpackedLow(UnpackedT::Zero); | |
else | |
{ | |
//constexpr int POSIT_RS_MAX = PT::POSIT_SIZE-1-esbits; | |
//r.type = UnpackedT::Regular; | |
bool negativeSign = (v & PT::POSIT_SIGNBIT) != 0; | |
//std::cout << "unpacking " << std::bitset<sizeof(T)*8>(pa) << " abs " << std::bitset<sizeof(T)*8>(pcabs(pa)) << " r.negativeSign? " << r.negativeSign << std::endl; | |
T pa = negativeSign ? -v : v; | |
// std::cout << "after " << std::hex << pa << std::endl; | |
POSIT_UTYPE pars1 = pa << (PT::POSIT_EXTRA_BITS+1); // MSB: RS ES FS MSB | |
auto q = PT::decode_posit_rs(pars1); | |
int reg = q.first; | |
int rs = q.second; | |
POSIT_UTYPE pars2 = pars1 << rs; // MSB: ES FS | |
POSIT_UTYPE exp = bitset_leftmost_get_const<T,esbits>()(pars2); // bitset_leftmost_getT(pars,esbits); | |
POSIT_UTYPE pars = pars2 << esbits; // MSB: FS left aligned in T | |
return UnpackedLow(negativeSign,reg,exp,pars); | |
//r.fraction = pars; | |
//std::cout << "fraction is " << std::bitset<sizeof(FT)*8>(r.fraction) << " with rs bits " << rs << " for reg " << reg << std::endl; | |
//r.exp = exp; | |
//r.regime = reg; | |
} | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::pack_low(UnpackedLow x) -> Posit | |
{ | |
using PP=Posit<T,totalbits,esbits,FT,positspec>; | |
using PT=typename Posit<T,totalbits,esbits,FT,positspec>::PT; | |
using POSIT_UTYPE = typename PT::POSIT_UTYPE; | |
using POSIT_STYPE = typename PT::POSIT_STYPE; | |
switch(x.type) | |
{ | |
case UnpackedT::Infinity: | |
// if infinity is missing return nan | |
return positspec != PositSpec::WithNan ? (x.negativeSign ? PP::ninf(): PP::pinf()): PP::nan(); | |
case UnpackedT::Zero: | |
return PP(typename PP::DeepInit(),0); | |
case UnpackedT::NaN: | |
// if nan is missing return infinity | |
return positspec != PositSpec::WithInf ? PP::nan() : PP::pinf(); | |
default: | |
break; | |
} | |
auto exp = x.exp; | |
auto reg = x.regime; | |
// for reg>=0: 1 0[reg+1] => size is reg+2 | |
// for reg <0: 0[-reg] 0 => size is reg+1 | |
auto rs = -reg+1 > reg+2 ? -reg+1:reg+2; //std::max(-reg + 1, reg + 2); MSVC issue | |
auto es = (totalbits-rs-1) < esbits ? (totalbits-rs-1): esbits; //std::min((int)(totalbits-rs-1),(int)esbits); MSVC issue | |
POSIT_UTYPE regbits = reg < 0 ? (PT::POSIT_HOLDER_MSB >> -reg) : (PT::POSIT_MASK << (PT::POSIT_HOLDER_SIZE-(reg+1))); // reg+1 bits on the left | |
POSIT_UTYPE eexp = msb_exp<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,esbits,(esbits == 00)>()(exp); | |
POSIT_UTYPE fraction = x.fraction; | |
POSIT_STYPE p = ((fraction >> (rs+es+1)) | (eexp >> (rs+1)) | (regbits>>1)) >> (sizeof(T)*8-totalbits); | |
//std::cout << "incoming " << x << std::endl; | |
//std::cout << "fraction before " << std::bitset<sizeof(FT)*8>(x.fraction) << " and " << " after " << std::bitset<sizeof(POSIT_UTYPE)*8>(fraction) << " residual exponent " << exp << " from " << eexponent << " and regime " << reg << std::endl; | |
//std::cout << "output sign " << std::bitset<sizeof(T)*8>(p) << " then " << std::bitset<sizeof(T)*8>(-p) << std::endl; | |
return PP(typename PP::DeepInit(),x.negativeSign ? -p : p); | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::half() const -> Posit<T,totalbits,esbits,FT,positspec> | |
{ | |
UnpackedLow q = unpack_low(); | |
if(q.type == UnpackedT::Regular) | |
{ | |
// +- 2^(R expmax + E) 1.xyz == +- 2^(exp) 1.xyz | |
// where xyz are decimal digits | |
// 1.xyz / 2 => 0.1xyz ==> just exp-- | |
// | |
// exp-- mean E-- if E s not null | |
// otherwise R-- and exp | |
if(q.exp == 0) | |
{ | |
q.regime--; // will it undrflow? | |
q.exp = PT::POSIT_REG_SCALE-1; // maximum exponent | |
} | |
else | |
{ | |
q.exp--; | |
} | |
return pack_low(q); | |
} | |
else | |
{ | |
return *this; | |
} | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::twice() const -> Posit<T,totalbits,esbits,FT,positspec> | |
{ | |
UnpackedLow q = unpack_low(); | |
if(q.type == UnpackedT::Regular) | |
{ | |
// +- 2^(R expmax + E) 1.xyz == +- 2^(exp) 1.xyz | |
// where xyz are decimal digits | |
// 1.xyz / 2 => 0.1xyz ==> just exp-- | |
// | |
// exp-- mean E-- if E s not null | |
// otherwise R-- and exp | |
if(q.exp == PT::POSIT_REG_SCALE-1) | |
{ | |
q.regime++; // will it overflo?? | |
q.exp = 0; // maximum exponent | |
} | |
else | |
{ | |
q.exp++; | |
} | |
return pack_low(q); | |
} | |
else | |
{ | |
return *this; | |
} | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpacked_low2full(UnpackedLow q) -> UnpackedT | |
{ | |
using POSIT_UTYPE = typename PT::POSIT_UTYPE; | |
UnpackedT r; | |
r.type = q.type; | |
r.negativeSign = q.negativeSign; | |
if(q.type == UnpackedT::Regular) | |
{ | |
r.fraction = cast_msb<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,FT,UnpackedT::FT_bits>()(q.fraction); | |
r.exponent = PT::join_reg_exp(q.regime,q.exp); | |
} | |
return r; | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpacked_full2low(UnpackedT x) -> UnpackedLow | |
{ | |
if(x.type == UnpackedT::Regular) | |
{ | |
auto eexponent = clamp<decltype(x.exponent)>(x.exponent,PT::minexponent(),PT::maxexponent()); // no overflow | |
auto rr = PT::split_reg_exp(eexponent); | |
auto frac = cast_msb<FT,sizeof(FT)*8,typename PT::POSIT_UTYPE,sizeof(typename PT::POSIT_UTYPE)*8>()(x.fraction); | |
return UnpackedLow(x.negativeSign,rr.first,rr.second,frac); | |
} | |
else | |
{ | |
return UnpackedLow(x.type,x.negativeSign); | |
} | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> pack_posit(const typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT & x) | |
{ | |
using PP=Posit<T,totalbits,esbits,FT,positspec>; | |
return PP::pack_low(PP::unpacked_full2low(x)); | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
auto Posit<T,totalbits,esbits,FT,positspec>::analyze() -> info | |
{ | |
using UT=UnpackedT; | |
using POSIT_UTYPE = typename PT::POSIT_UTYPE; | |
//using POSIT_STYPE = typename PT::POSIT_STYPE; | |
auto pa = v; | |
info i; | |
if(isinfinity()) | |
{ | |
if(positspec == PositSpec::WithNanInf) | |
i.sign = (pa & PT::POSIT_SIGNBIT) != 0; | |
i.infinity = true; | |
return i; | |
} | |
else if(isNaN()) | |
{ | |
i.nan = true; | |
return i; | |
} | |
else if(v == 0) | |
{ | |
return i; | |
} | |
else | |
{ | |
//constexpr int POSIT_RS_MAX = PT::POSIT_SIZE-1-esbits; | |
i.sign = (pa & PT::POSIT_SIGNBIT) != 0; | |
pa = pcabs(pa); | |
POSIT_UTYPE pars = pa << (PT::POSIT_EXTRA_BITS+1); // output MSB: RS ES FS | |
auto q = PT::decode_posit_rs(pars); | |
int reg = q.first; | |
int rs = q.second; | |
pars <<= rs; // MSB: ES FS | |
POSIT_UTYPE exp = bitset_leftmost_getT(pars,esbits); | |
pars <<= esbits; // output MSB: FS left aligned in T | |
//std::cout << std::bitset<PT::POSIT_HOLDER_SIZE>(pars) << std::endl; | |
i.ifraction = sizeof(FT) >= sizeof(T) ? pars << (UT::FT_bits-PT::POSIT_HOLDER_SIZE) : pars >> (PT::POSIT_HOLDER_SIZE-UT::FT_bits); // output: FS left aligned in FT (larger or equal to T) | |
i.exponent = PT::join_reg_exp(reg,exp); | |
i.exp = exp; | |
i.rs = rs; | |
i.k = reg; | |
i.es = totalbits-rs-1 < esbits ? totalbits-rs-1 : esbits; // std::min((int)(totalbits-rs-1),(int)esbits); MSVC issue | |
i.fs = totalbits-rs-i.es-1; | |
return i; | |
} | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
CONSTEXPR14 auto unpack_posit(const Posit<T,totalbits,esbits,FT,positspec> & p) -> typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT | |
{ | |
using PP=Posit<T,totalbits,esbits,FT,positspec>; | |
return PP::unpacked_low2full(p.unpack_low()); | |
} | |
#ifndef FPGAHLS | |
template <class X> | |
void printinfo(std::ostream & ons, typename X::value_t v) | |
{ | |
using Q= typename printableinttype<typename X::value_t>::type; | |
X x(typename X::DeepInit(),v); // load the posit OK | |
typename X::UnpackedT u(x.unpack()); // unpack it OK | |
X xux(u); // pack | |
typename X::info ii = x.analyze(); | |
if(ii.infinity) | |
ons << (X::PT::positspec == PositSpec::WithNanInf ? (ii.sign ? "posit(-infinity)" : "posit(+infinity)") : "posit(infinity)"); | |
else if(ii.nan) | |
ons << "posit(nan)"; | |
else | |
{ | |
ons << " posit(" << (ii.sign ? "-" : "+") ; | |
ons << " rs/es/fs:" << std::dec << ii.rs << "/" << ii.es << "/" << ii.fs << " "; | |
ons << " k:" << std::dec << (Q)ii.k ; | |
ons << " exp:" << std::dec << (1<<ii.exp); | |
ons << " ifraction:" << std::hex << (Q)ii.ifraction; | |
ons << " binary:" << std::bitset<sizeof(typename X::value_t)*8>(xux.v) << ")"; | |
} | |
} | |
#endif | |
#ifndef FPGAHLS | |
template <class T> | |
struct posit_formatter | |
{ | |
public: | |
posit_formatter(T p): posit(p) {} | |
friend std::ostream & operator << (std::ostream & ons, const posit_formatter & x) | |
{ | |
printinfo<T>(ons,x.posit.v); | |
return ons; | |
} | |
T posit; | |
}; | |
#endif | |
namespace std | |
{ | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> abs(Posit<T,totalbits,esbits,FT,positspec> z) | |
{ | |
using PP=Posit<T,totalbits,esbits,FT,positspec>; | |
return PP(PP::DeepInit(),pcabs(z.v)); | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> min(Posit<T,totalbits,esbits,FT,positspec> a, Posit<T,totalbits,esbits,FT,positspec> b) | |
{ | |
return a <= b ? a : b; | |
} | |
template <class T,int totalbits, int esbits, class FT, PositSpec positspec> | |
inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> max(Posit<T,totalbits,esbits,FT,positspec> a, Posit<T,totalbits,esbits,FT,positspec> b) | |
{ | |
return a >= b ? a : b; | |
} | |
template <class B,int totalbits, int esbits, class FT, PositSpec positspec> class numeric_limits<Posit<B,totalbits,esbits,FT,positspec> > { | |
public: | |
using T=Posit<B,totalbits,esbits,FT,positspec>; | |
using PT=typename T::PT; | |
static constexpr bool is_specialized = true; | |
static constexpr T min() noexcept { return T::min(); } | |
static constexpr T max() noexcept { return T::max(); } | |
static constexpr T lowest() noexcept { return T::lowest (); } | |
//static constexpr int digits = 0; number of digits (in radix base) in the mantissa | |
//static constexpr int digits10 = 0; | |
static constexpr bool is_signed = true; | |
static constexpr bool is_integer = false; | |
static constexpr bool is_exact = false; | |
static constexpr int radix = 2; | |
static constexpr T epsilon() noexcept { return T::one().next()-T::one(); } | |
//static constexpr T round_error() noexcept { return T(); } | |
// this is also the maximum integer | |
static constexpr int min_exponent = PT::minexponent(); | |
// static constexpr int min_exponent10 = 0; | |
static constexpr int max_exponent = PT::maxexponent(); | |
//static constexpr int max_exponent10 = 0; | |
static constexpr bool has_infinity = true; | |
static constexpr bool has_quiet_NaN = positspec != PositSpec::WithInf; | |
static constexpr bool has_signaling_NaN = false; | |
//static constexpr float_denorm_style has_denorm = denorm_absent; | |
static constexpr bool has_denorm_loss = false; | |
static constexpr T infinity() noexcept { return T::infinity(); } | |
static constexpr T quiet_NaN() noexcept { return T::nan(); } | |
//static constexpr T signaling_NaN() noexcept { return T(); } | |
static constexpr T denorm_min() noexcept { return T::min(); } | |
static constexpr bool is_iec559 = false; | |
static constexpr bool is_bounded = false; | |
static constexpr bool is_modulo = false; | |
static constexpr bool traps = false; | |
static constexpr bool tinyness_before = false; | |
//static constexpr float_round_style round_style = round_toward_zero; | |
/* | |
round_toward_zero, if it rounds toward zero. | |
round_to_nearest, if it rounds to the nearest representable value. | |
round_toward_infinity, if it rounds toward infinity. | |
round_toward_neg_infinity, if it rounds toward negative infinity. | |
round_indeterminate, if the rounding style is indeterminable at compile time. | |
*/ | |
}; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment