eruffaldi · January 24, 2019 09:58
diff --git a/posit_one.h b/posit_one.h
 // cppPosit 7b7b5ecde436a65923d679d91ddb1b29b711af27

 /**
 * Emanuele Ruffaldi (C) 2017-2018
 *
 * My personal bit hip pop using BMI extensions and a bit of constexpr!
 */

 #if defined(__SDSVHLS__) && !defined(FPGAHLS)
 #define FPGAHLS
 #endif

 #if defined(__llvm__) && __clang_major__ > 3
 #define HAS_bextr_u64
 #endif

 #include <stdint.h>
 #include <bitset>

 // CPU detection x86
 #if defined(__x86_64) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
 #define __is_x86_any__
 #endif

 #ifdef __is_x86_any__
 #ifdef _MSC_VER
 #include <intrin.h>
 #include <immintrin.h>
 #else
 #include <x86intrin.h>
 #endif
 #endif

 #ifndef CONSTEXPR14
 #if __cplusplus >= 201402L
 #define CONSTEXPR14 constexpr
 #else
 #define CONSTEXPR14
 #endif
 #endif

 #if !defined(FPGAHLS) && !defined(_MSC_VER)
 #define CLZCONSTEXPR constexpr
 #else
 #define CLZCONSTEXPR
 #endif

 // C version
 #define BIT_MASK(__TYPE__, __ONE_COUNT__) \
 	((__TYPE__)(-((__ONE_COUNT__) != 0))) & (((__TYPE__)-1) >> ((sizeof(__TYPE__) * 8) - (__ONE_COUNT__)))

 template <typename R>
 constexpr R bitmask(unsigned int const onecount)
 {
 	return static_cast<R>(-(onecount != 0)) & (static_cast<R>(-1) >> ((sizeof(R) * 8) - onecount));
 }

 template <typename T>
 constexpr typename std::remove_reference<T>::type makeprval(T &&t)
 {
 	return t;
 }

 #define isprvalconstexpr(e) noexcept(makeprval(e))

 #ifdef _MSC_VER
 	// note this is not constexpr due to _BitScanReverse
 	static uint32_t __inline __builtin_clz(uint32_t x) 
 	{
        unsigned long r = 0;
        _BitScanReverse(&r, x);
        return (31-r);
    }
 #endif

 // __builtin_clzll
 CLZCONSTEXPR inline uint64_t __builtin_clz64(uint64_t v)
 {
 	return (v >> 32 != 0 ? __builtin_clz(v >> 32) : 32 + __builtin_clz(v));
 }

 // this is constexpr, others not
 // never call with input==0
 CLZCONSTEXPR inline int findbitleftmostC(uint64_t input)
 {
 	return __builtin_clz64(input);
 }

 // this is constexpr, others not
 // never call with input==0
 CLZCONSTEXPR inline int findbitleftmostC(uint32_t input)
 {
 	return __builtin_clz(input);
 }

 // this is constexpr, others not
 // never call with input==0
 CLZCONSTEXPR inline int findbitleftmostC(uint16_t input)
 {
 	return __builtin_clz((uint32_t)input) - 16;
 }

 // this is constexpr, others not
 // never call with input==0
 CLZCONSTEXPR inline int findbitleftmostC(uint8_t input)
 {
 	return __builtin_clz((uint32_t)input) - 24;
 }

 #if 0
 #if !defined(__arm__) && !defined(FPGAHLS)
 # make a more realiable detection
 inline int findbitleftmost(uint8_t input)
 {
 	return __lzcnt16(input) - 8;
 }

 inline int findbitleftmost(uint16_t input)
 {
 	return __lzcnt16(input);
 }

 inline int findbitleftmost(uint32_t input)
 {
 	return __lzcnt32(input);
 }
 /* CSIM
 inline int findbitleftmost(uint64_t input)
 {
 	return  __lzcnt64(input);
 }
 */

 // detect constexpr for X so we can speedup
 #define findbitleftmost(X) (isprvalconstexpr(X) ? findbitleftmostC(X) : findbitleftmost(X))
 #endif
 #endif

 // indices are with 0 on the right
 template <class T, class Y, int offset, int size>
 constexpr T bitset_part(T input, Y value)
 {
 	return (input & ~bitmask<T>(size)) | ((value & bitmask<T>(size)) << offset);
 }

 template <class T, class Y>
 CONSTEXPR14 T bitset_part(T input, Y value, int offset, int size)
 {
 	auto M = bitmask<T>(size);
 	return (input & ~M) | ((value & M) << offset);
 }

 template <class T, int offset, int size>
 constexpr T bitset_get(T input)
 {
 	return (input >> offset) & bitmask<T>(size);
 }

 template <class T>
 CONSTEXPR14 T bitset_get(T input, int offset, int size)
 {
 	auto M = bitmask<T>(size);
 	return (input >> offset) & M;
 }
 #if defined(__is_x86_any__) && !defined(FPGAHLS) && defined(__BMI__)
 /* CSIM
 inline uint64_t bitset_gethw(uint64_t input, int offset, int size)
 {
 	return _bextr_u64(input, offset, size);
 }
 */

 inline uint16_t bitset_gethw(uint16_t input, int offset, int size)
 {
 	return _bextr_u32(input, offset, size);
 }

 inline uint32_t bitset_gethw(uint32_t input, int offset, int size)
 {
 	return _bextr_u32(input, offset, size);
 }
 //#ifdef HAS_bextr_u64
 template <int offset, int size>
 uint64_t bitset_gethwT(uint64_t input)
 {
 	return _bextr_u64(input, offset, size);
 }
 //#endif

 template <int offset, int size>
 uint32_t bitset_gethwT(uint32_t input)
 {
 	return _bextr_u32(input, offset, size);
 }

 template <int offset, int size>
 uint16_t bitset_gethwT(uint16_t input)
 {
 	return _bextr_u32(input, offset, size);
 }

 template <int offset, int size>
 uint8_t bitset_gethwT(uint8_t input)
 {
 	return _bextr_u32(input, offset, size);
 }

 // with template
 #define bitset_getT(X, A, B) (isprvalconstexpr(X) ? bitset_gethwT<A, B>(X) : bitset_get<decltype(X), A, B>(X))

 // with flexible arguments
 #define bitset_get(X, A, B) (isprvalconstexpr(X) && isprvalconstexpr(A) && isprvalconstexpr(B) ? bitset_gethw(X, A, B) : bitset_get(X, A, B))
 #else

 #define bitset_getT(X, A, B) (bitset_get<decltype(X), A, B>(X))

 #endif
 template <class T, int N>
 struct bitset_leftmost_get_const
 {
 	constexpr T operator()(T X) const
 	{
 		return (T)bitset_getT((typename std::make_unsigned<T>::type)(X), sizeof(X) * 8 - N, N);
 	}
 };

 template <class T>
 struct bitset_leftmost_get_const<T, 0>
 {
 	constexpr T operator()(T X) const
 	{
 		return 0;
 	}
 };

 #define bitset_leftmost_getT(X, N) bitset_getT(X, sizeof(X) * 8 - N, N)

 template <class T>
 constexpr std::bitset<sizeof(T) * 8> bitset_all(const T x)
 {
 	return std::bitset<sizeof(T) * 8>(x);
 }

 template <class T, int N>
 constexpr std::bitset<N> bitset_msb(const T x)
 {
 	return std::bitset<N>(x >> (sizeof(T) * 8 - N));
 }

 template <class T, int N>
 constexpr std::bitset<N> bitset_lsb(const T x)
 {
 	return std::bitset<N>(x);
 }

 /// absolute value of signed integer with conditions
 template <class T>
 constexpr T pcabs(T x)
 {
 	return x < 0 ? -x : x;
 }

 ///
 /// absolute value of signed integer without conditions
 template <class T> // ,typename std::enable_if<std::is_integral<T>::value ,int>::type* = nullptr>
 CONSTEXPR14 T pabs(T x)
 {
 	T mask = (x >> (sizeof(T) * 8 - 1));
 	return (x + mask) ^ mask;
 }

 // support
 template <class A, int abits, class B, int bbits, bool abits_gt_bbits>
 struct cast_msb_
 {
 };

 // support
 template <class A, int abits, class B, int bbits>
 struct cast_msb_<A, abits, B, bbits, true>
 {

 	constexpr B operator()(A value) const
 	{
 		// #A > #B
 		return value >> (abits - bbits);
 	}
 };

 // support
 // #B >= #A
 template <class A, int abits, class B, int bbits>
 struct cast_msb_<A, abits, B, bbits, false>
 {
 	constexpr B operator()(A value) const
 	{
 		return ((B)value) << (bbits - abits);
 	}
 };

 /// MSB aligned data cast from type A to type B
 template <class A, int abits, class B, int bbits>
 struct cast_msb : public cast_msb_<A, abits, B, bbits, (abits > bbits)>
 {
 	static_assert(std::is_unsigned<A>::value, "required unsigned A");
 	static_assert(std::is_unsigned<B>::value, "required unsigned B");
 };

 // support
 template <class A, int abits, class B, int bbits, bool abits_gt_bbits>
 struct cast_right_to_left_
 {
 };

 // support
 template <class A, int abits, class B, int bbits>
 struct cast_right_to_left_<A, abits, B, bbits, true>
 {

 	constexpr B operator()(A value) const
 	{
 		// #A > #B
 		return bitset_getT(value, 0, abits) >> (abits - bbits);
 	}
 };

 // support
 // #B >= #A
 template <class A, int abits, class B, int bbits>
 struct cast_right_to_left_<A, abits, B, bbits, false>
 {
 	constexpr B operator()(A value) const
 	{
 		return ((B)bitset_getT(value, 0, abits)) << (bbits - abits);
 	}
 };

 /// MSB aligned data cast from type A to type B
 template <class A, int abits, class B, int bbits>
 struct cast_right_to_left : public cast_right_to_left_<A, abits, B, bbits, (abits > bbits)>
 {
 	static_assert(std::is_unsigned<A>::value, "required unsigned A");
 	static_assert(std::is_unsigned<B>::value, "required unsigned B");
 };


 template <class T, int N, int F>
 struct fixedtrait
 {
    static_assert(sizeof(T)*8 <= N,"fixedtrait holding type is too small");
    static_assert(N > 0,"fixedtrait total bits should be positive");
    static_assert(F <= N && F >= 0,"fraction bits should be less than N and not negative");
    static_assert(std::is_integral<T>::value && std::is_signed<T>::value,"only for signed integrals");
 	using value_t = T;
 	static constexpr int totalbits = N;
 	static constexpr int fraction_bits = F;
 };

 #if 0
 #if __cplusplus >= 201402L
 template <class T>
 struct fixedtrait<T,sizeof(T)*8,0>
 {
    static_assert(std::is_integral<T>::value && std::is_signed<T>::value,"only for signed integrals");
 	using value_t = T;
 	static constexpr int totalbits = sizeof(T)*8;
 	static constexpr int fraction_bits = 0;
 };
 #endif
 #endif
 #include <limits>
 #include <cstdint>

 #if defined(__SDSVHLS__) && !defined(FPGAHLS)
 #define FPGAHLS
 #endif

 // wrapper for custom floats holdi
 template <class T>
 struct valuewrap
 {
 	T what;
    constexpr valuewrap(T w): what(w) {}
    constexpr valuewrap() : what(0) {}
 };

 struct halffloat : public valuewrap<uint16_t>
 {
 	using valuewrap<uint16_t>::valuewrap;
 };

 struct halffloatalt : public valuewrap<uint16_t>
 {
 	using valuewrap<uint16_t>::valuewrap;
 };

 struct microfloat : public valuewrap<uint8_t>
 {
 	using valuewrap<uint8_t>::valuewrap;
 };


 /// holder_T is an unsigned integer capable of storing 1+exp_bits+frac_bits exactly
 /// value_T  is the struct or native type used for this 
 template <int exp_bits, int frac_bits, class value_T, class holder_T, bool with_denorm_ = true>
 struct any_floattrait
 {
    using value_t = value_T;
    using holder_t = holder_T;

    static_assert(exp_bits+1+frac_bits == sizeof(holder_t)*8,"holding size");
    //static_assert<!std::is_signed<holder_t> >;

    static constexpr int data_bits = exp_bits+frac_bits+1;
    static constexpr int exponent_bits =  exp_bits;
 	static constexpr int fraction_bits = frac_bits;
 	static constexpr int exponent_bias = (1<<(exp_bits-1))-1; 
    static constexpr int exponent_max =  (1<<(exp_bits))-2;
 	static constexpr int with_denorm = with_denorm_;
    static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1;

    enum : holder_t {
    	signbit = ((holder_t)(1))<<(data_bits-1),

     pinfinity_h = bitmask<holder_t>(exp_bits) << frac_bits, // 0 1[e] 0[f]
     ninfinity_h = pinfinity_h | signbit, // 1 1[e] 0[f]

    // many nan ar possible, we pick the one with 
     nan_h = bitmask<holder_t>(exp_bits+1) << (frac_bits-1), // 0 1[e] 1 0[f-1]
     one_h = bitmask<holder_t>(exp_bits-1) << (frac_bits), // 0 0 1[e-1] 0[f]
     afterone_h = one_h+1, // 0 0 1[e-1] 0[f-1] 1
     max_h = (bitmask<holder_t>(exp_bits-1) << (frac_bits+1)) | bitmask<holder_t>(frac_bits), // 0 1[e-1] 0 1[f]
     lowest_h = max_h | signbit,
     min_h = ((holder_t)(1)) << frac_bits, // 0 0[e-1] 1 0[f]
     two_h = ((holder_t)(1)) << (exp_bits-1+frac_bits) // 0 1 0[e-1+f]
 };
 };


 // pulp8 alternative
 using microfloat_trait =  any_floattrait<5,2,microfloat,uint8_t>;


 // PULP 8E,7M vs classic 5E,10P
 using half_traitalt =  any_floattrait<8,7,halffloatalt,uint16_t>;

 // Intel bfloat16 as 8,7 without denormals
 using bfloat16_trait = any_floattrait<8,7,halffloatalt,uint16_t, false>;

 // https://en.wikipedia.org/wiki/16-bit
 struct half_trait // : public any_floattrait<5,10,halffloat,uint16_t>
 {
    using value_t = halffloat;
    using holder_t = uint16_t;
    static constexpr holder_t ninfinity_h = 0xFC00;
    static constexpr holder_t pinfinity_h = 0x7C00;
    static constexpr holder_t nan_h = 0x7E00;
    static constexpr holder_t one_h = 0x3C00; // one next is just + 1
    static constexpr holder_t two_h = 0x4000; 
    static constexpr holder_t max_h = 0x7bff; 
    static constexpr holder_t min_h = 0x0400;
    static constexpr holder_t lowest_h = 0xfbff; // -max_h
 	static constexpr int with_denorm = true;
    // max subnormal 0 00000 1111111111 2−24 ≈ 6.09756 × 10−5
    // min subnormal 0 00000 0000000001 5.96046 × 10−8

    static constexpr int data_bits = 16; // can be derived from value_t
    static constexpr int exponent_bits =  5;
 	static constexpr int fraction_bits = 10; // can be derived from data_bits and exponent_bits
    static constexpr int exponent_bias = 15;
    static constexpr int exponent_max =  30; // can be derived from bias and bits
 	static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1);
    static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits
 };

 // https://en.wikipedia.org/wiki/Single-precision_floating-point_format
 struct single_trait
 {
 #ifndef FPGAHLS
 	using value_t = float;
 #endif
 	using holder_t = uint32_t;
 #ifndef FPGAHLS
 	static constexpr value_t zero = 0.0f;
 	static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity();
 	static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity();
 #endif

    static constexpr holder_t ninfinity_h = 0xff800000;
    static constexpr holder_t pinfinity_h = 0x7f800000;
    static constexpr holder_t nan_h = 0x7fc00000;
    static constexpr holder_t one_h = 0x3f800000;
    static constexpr holder_t two_h = 0x40000000;
    static constexpr holder_t max_h = 0x7f7fffff;
    static constexpr holder_t min_h = 0x00800000;
    static constexpr holder_t denorm_min_h = 1; // just 1 after 0
    static constexpr holder_t epsilon_h = 0x34000000; // trickier
    static constexpr holder_t lowest_h = 0xff7fffff; // -max_h
 	static constexpr int with_denorm = true;

 	static constexpr int data_bits = 32; // can be derived from value_t
 	static constexpr int exponent_bits =  8;
 	static constexpr int fraction_bits = 23; // can be derived from data_bits and exponent_bits
 	static constexpr int exponent_bias = 127;
 	static constexpr int exponent_max =  254; // can be derived from bias and bits
 	static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1);
 	static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits
 };

 // https://en.wikipedia.org/wiki/Double-precision_floating-point_format
 struct double_trait
 {
 #ifndef FPGAHLS
 	using value_t = double;
 #endif
 	using holder_t = uint64_t;
 #ifndef FPGAHLS
 	static constexpr value_t zero = 0.0;
 	static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity();
 	static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity();
 #endif
    static constexpr holder_t ninfinity_h = 0xfff0000000000000ULL;
    static constexpr holder_t pinfinity_h = 0x7ff0000000000000ULL;
    static constexpr holder_t nan_h = 0x7ff8000000000000ULL;
    static constexpr holder_t one_h = 0x3ff0000000000000ULL;
    static constexpr holder_t two_h = 0x4000000000000000ULL;
    static constexpr holder_t max_h = 0x7fefffffffffffff; // TODO
    static constexpr holder_t lowest_h = 0xffefffffffffffff; // TODO
    static constexpr holder_t min_h = 0x10000000000000; // TODO
 	static constexpr int with_denorm = true;

 	static constexpr int data_bits = 64; // can be derived from value_t
 	static constexpr int exponent_bits =  11;
 	static constexpr int fraction_bits = 52; // can be derived from data_bits and exponent_bits
 	static constexpr int exponent_bias = 1023;
 	static constexpr int exponent_max =  2046;
 	static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1);
 	static constexpr uint32_t exponent_mask = (((holder_t)(1))<<exponent_bits)-1; // 7FF
 };

 #ifdef FLT128_MAX
 // https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format
 // https://gcc.gnu.org/onlinedocs/gcc/Floating-Types.html
 struct float128_trait
 {
 #ifndef FPGAHLS
 	using value_t = __float128;
 #endif
 	using holder_t = unsigned __int128;
 #ifndef FPGAHLS
 	static constexpr value_t zero = 0.0;
 	static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity();
 	static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity();
 #endif

    static constexpr holder_t ninfinity_h = 0xb00000000000ffff8000000000000000;
    static constexpr holder_t pinfinity_h = 0x900007f975247fff8000000000000000;
    static constexpr holder_t nan_h = 0xb000000000007fffc000000000000000;
    static constexpr holder_t one_h = 0xd00007f975243fff8000000000000000;
    static constexpr holder_t two_h = 0x40000000000000000000000000000000;
    static constexpr holder_t max_h = 0x4000; // TODO
    static constexpr holder_t lowest_h = 0x4000; // TODO
    static constexpr holder_t min_h = 0x4000; // TODO
 	static constexpr int with_denorm = true;

 	static constexpr int data_bits = 128; // can be derived from value_t
 	static constexpr int exponent_bits =  15;
 	static constexpr int fraction_bits = 112; // can be derived from data_bits and exponent_bits
 	static constexpr int exponent_bias = 16383;
 	static constexpr int exponent_max =  16383*2;
 	static constexpr holder_t signbit = (1<<(data_bits-1));
 	static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits
 };

 #endif

 /**
 Casting between differently arbitrary floats requires:

 - exponent:
 		oldexponent - oldbias +newbias

 - fraction assumed to be left aligned
 		nothing just keep left aligned, only newfraction bits be used

 See:
 	template <class FT,class ET>
 	template <class Trait>
 	CONSTEXPR14 typename Trait::holder_t Unpacked<FT,ET>::pack_xfloati() const

 	template <class Trait, typename = typename  std::enable_if<!std::is_integral<typename Trait::value_t>::value>::type> 
    explicit CONSTEXPR14 Unpacked(typename Trait::holder_t i) { unpack_xfloat<Trait>(i); }

 */



 template <class T>
 struct float2trait
 {};

 template <>
 struct float2trait<float>
 {
 	using type = float;
 	using trait = single_trait;
 };

 template <>
 struct float2trait<double>
 {
 	using type = double;
 	using trait = double_trait;
 };

 template <>
 struct float2trait<halffloat>
 {
 	using type = halffloat;
 	using trait = half_trait;
 };

 template <>
 struct float2trait<halffloatalt>
 {
 	using type = halffloatalt;
 	using trait = half_traitalt;
 };

 #ifdef FLT128_MAX
 template <>
 struct float2trait<__float128>
 {
 	using type = __float128;
 	using trait = float128_trait;
 };
 #endif

 #if 0
 template <class Trait>
 struct limithelper
 {
 	using T=Trait;
 	  static constexpr bool is_specialized = true;
 	  static constexpr T min() noexcept { return T::min(); }
 	  static constexpr T max() noexcept { return T::max(); }
 	  static constexpr T lowest() noexcept { return T::lowest	(); }
 	  //static constexpr int  digits = 0; number of digits (in radix base) in the mantissa 
 	  //static constexpr int  digits10 = 0;
 	  static constexpr bool is_signed = true;
 	  static constexpr bool is_integer = false;
 	  static constexpr bool is_exact = false;
 	  static constexpr int radix = 2;
 	  static constexpr T epsilon() noexcept { return T::one_next()-T::one(); }
 	  //static constexpr T round_error() noexcept { return T(); } 

 	  // this is also the maximum integer
 	  static constexpr int  min_exponent = PT::minexponent();
 	  // static constexpr int  min_exponent10 = 0;
 	  static constexpr int  max_exponent = PT::maxexponent();
 	  //static constexpr int  max_exponent10 = 0;

 	  static constexpr bool has_infinity = true;
 	  static constexpr bool has_quiet_NaN = withnan;
 	  static constexpr bool has_signaling_NaN = false;
 	  //static constexpr float_denorm_style has_denorm = denorm_absent;
 	  static constexpr bool has_denorm_loss = false;
 	  static constexpr T infinity() noexcept { return T::infinity(); }
 	  static constexpr T quiet_NaN() noexcept { return T::nan(); }
 	  //static constexpr T signaling_NaN() noexcept { return T(); }
 	  static constexpr T denorm_min() noexcept { return T::min(); }

 	  static constexpr bool is_iec559 = false;
 	  static constexpr bool is_bounded = false;
 	  static constexpr bool is_modulo = false;

 	  static constexpr bool traps = false;
 	  static constexpr bool tinyness_before = false;
 };
 #endif

 #if 0

 namespace std
 {
 	template <>
 	struct limits<halffloat>: public limithelper<half_trait>
 	{

 	};

 	template <>
 	struct limits<halffloatalt>: public limithelper<half_traitalt>
 	{

 	};

 	template <>
 	struct limits<microfloat>: public limithelper<microfloat_trait>
 	{

 	};

 }
 #endif
 /**
 * Various Integr Type Helpers
 *
 * Emanuele Ruffaldi 2017
 */
 #include <cstdint>
 #include <inttypes.h>

 #if (defined(__llvm__) && __clang_major__ > 3) || (defined(__GNUC__) && __GNUC__ >= 4)
 #define HAS128T
 #endif

 #ifdef HAS128T
 #define int128_t __int128_t
 #endif
 /// returns the larges type between two
 template <class A,class B>
 using largest_type = typename std::conditional<sizeof(A) >= sizeof(B), A, B>::type;

 /// Given size in bits returns the integer with given size
 /// Equivalent to: http://www.boost.org/doc/libs/1_48_0/libs/integer/doc/html/boost_integer/integer.html#boost_integer.integer.sized
 namespace detail_least 
 {
 	template< int Category > struct int_least_helper {}; 
 #ifdef HAS128T
 	template<> struct int_least_helper<1> { typedef int128_t least; };
 #endif
 	template<> struct int_least_helper<2> { typedef int64_t least; };
 	template<> struct int_least_helper<3> { typedef int32_t least; };
 	template<> struct int_least_helper<4> { typedef int16_t least; };
 	template<> struct int_least_helper<5> { typedef int8_t least; };

 }

 /// Given size in bits returns the integer with given size
 template <unsigned int N>
 struct int_least_bits
 {
 	 typedef typename detail_least::int_least_helper<((N) <= 8) + ((N) <= 16) + ((N) <= 32) + ((N) <= 64) + ((N) <= 128)>::least type;
 };

 /// Helper for avoiding the fact that int8_t and uint8_t are printerd as chars in iostream
 template <class T>
 struct printableinttype
 {
 	using type = T;
 };

 template <class T>
 struct printableinttype<const T>
 {
    using typex = typename printableinttype<T>::type;
    using type = const typex;
 };

 template <>
 struct printableinttype<uint8_t>
 {
 	using type = uint16_t;
 };

 template <>
 struct printableinttype<int8_t>
 {
 	using type = int16_t;
 };

 /// next integer type in size: signed and unsigned
 template <class T>
 struct nextinttype
 {};

 #ifdef HAS128T
 template <>
 struct nextinttype<uint64_t>
 {
 	using type = __uint128_t;
 };
 #endif

 template <>
 struct nextinttype<uint32_t>
 {
 	using type = uint64_t;
 };

 template <>
 struct nextinttype<uint16_t>
 {
 	using type = uint32_t;
 };

 template <>
 struct nextinttype<uint8_t>
 {
 	using type = uint16_t;
 };

 #ifdef HAS128T
 template <>
 struct nextinttype<int64_t>
 {
 	using type = __int128_t;
 };
 #endif

 template <>
 struct nextinttype<int32_t>
 {
 	using type = int64_t;
 };

 template <>
 struct nextinttype<int16_t>
 {
 	using type = int32_t;
 };

 template <>
 struct nextinttype<int8_t>
 {
 	using type = int16_t;
 };
 /**
 * Emanuele Ruffaldi (C) 2017-2018
 *
 * cppPosit project
 * gneralized soft float in unpackd form

 *
 */

 #include <cstdint>
 #ifndef FPGAHLS
 #include <iostream>
 #endif
 #include <inttypes.h>
 #include <math.h>
 #include <bitset>
 #include <limits>
 #include <ratio>

 #if !defined(FPGAHLS) && defined(HAS128T)
 inline std::ostream &operator<<(std::ostream &ons, __int128_t x)
 {
  ons << "cannot print int128";
  return ons;
 }
 #endif

 template <class T>
 constexpr const T &clamp(const T &v, const T &lo, const T &hi)
 {
  return v < lo ? lo : v > hi ? hi : v;
 }

 template <class T>
 constexpr T FLOORDIV(T a, T b)
 {
  return ((a) / (b) - ((a) % (b) < 0));
 }

 template <class FT = uint64_t, class ET = int32_t>
 struct Unpacked
 {
  static_assert(std::is_unsigned<FT>::value,
                "Unpacked requires unsigned fractiont type");
  static_assert(std::is_signed<ET>::value,
                "Unpacked requires signed exponent type");
  using POSIT_LUTYPE = FT;
  enum
  {
    FT_bits = sizeof(FT) * 8
  };
  enum : FT
  {
    FT_leftmost_bit = (((FT)1) << (FT_bits - 1))
  };
 #ifndef UnpackedDualSel
 #define UnpackedDualSel(a, b) ((a) + (b)*4)
 #endif

  enum Type
  {
    Regular,
    Infinity,
    NaN,
    Zero
  }; /// signed infinity and nan require the extra X bit
  Type type = Regular;
  bool negativeSign = false;
  ET exponent = 0; // with sign
  FT fraction = 0; // this can be 52bit for holding double.

  struct single_tag
  {
  };

  explicit CONSTEXPR14 Unpacked(single_tag, uint32_t p)
  {
    unpack_xfloati<single_trait>(p);
  }

  explicit constexpr Unpacked() {}

  // assume regular
  CONSTEXPR14 Unpacked normalized() const
  {
    if (fraction == 0)
    {
      return Unpacked(Zero, false);
    }
    else
    {
      int k = findbitleftmostC(fraction);
      return Unpacked(exponent - k, fraction << (k + 1), negativeSign);
    }
  }

  explicit CONSTEXPR14 Unpacked(halffloat p) { unpack_half(p); }
  explicit CONSTEXPR14 Unpacked(int i) { unpack_int(i); }
  explicit CONSTEXPR14 Unpacked(Type t, bool anegativeSign = false)
      : type(t), negativeSign(anegativeSign){};

  template <class Trait, typename = typename std::enable_if<std::is_integral<
                             typename Trait::value_t>::value>::type>
  explicit CONSTEXPR14 Unpacked(typename Trait::value_t i)
  {
    unpack_xfixed<Trait>(i);
  }

  template <class Trait, typename = typename std::enable_if<!std::is_integral<
                             typename Trait::value_t>::value>::type>
  explicit CONSTEXPR14 Unpacked(typename Trait::holder_t i)
  {
    unpack_xfloat<Trait>(i);
  }

  // expect 1.xxxxxx otherwise make it 0.xxxxxxxxx
  explicit CONSTEXPR14 Unpacked(ET aexponent, FT afraction, bool anegativeSign)
      : type(Regular),
        negativeSign(anegativeSign),
        exponent(aexponent),
        fraction(afraction) {}

 #ifndef FPGAHLS
  explicit CONSTEXPR14 Unpacked(float p)
  {
    unpack_float(p);
  }
  explicit CONSTEXPR14 Unpacked(double p) { unpack_double(p); }
  CONSTEXPR14 Unpacked &unpack_float(float f)
  {
    return unpack_xfloat<single_trait>(f);
  }
  CONSTEXPR14 Unpacked &unpack_double(double d)
  {
    return unpack_xfloat<double_trait>(d);
  }
  constexpr operator float() const { return pack_xfloat<single_trait>(); }
  constexpr operator double() const { return pack_xfloat<double_trait>(); }
 #endif

  CONSTEXPR14 Unpacked &unpack_half(halffloat d)
  {
    return unpack_xfloat<half_trait>(d);
  }
  CONSTEXPR14 Unpacked &unpack_int(int i)
  {
    return unpack_xfixed<fixedtrait<int, sizeof(int) * 8, 0>>(i);
  }

  constexpr operator halffloat() const { return pack_xfloat<half_trait>(); }
  constexpr operator int() const
  {
    return pack_xfixed<fixedtrait<int, sizeof(int) * 8, 0>>();
  }

  template <class Trait>
  CONSTEXPR14 typename Trait::holder_t pack_xfloati() const;

  template <class Trait>
  CONSTEXPR14 typename Trait::value_t pack_xfixed() const;

  template <class Trait>
  typename Trait::value_t pack_xfloat() const
  {
    union {
      typename Trait::holder_t i;
      typename Trait::value_t f;
    } uu;
    uu.i = pack_xfloati<Trait>();
    return uu.f;
  }

  template <class T>
  constexpr T pack_float() const
  {
    return pack_xfloat<typename float2trait<T>::trait>();
  }

  constexpr bool isInfinity() const { return type == Infinity; }
  constexpr bool isRegular() const { return type == Regular; }
  constexpr bool isNaN() const { return type == NaN; }
  constexpr bool isZero() const { return type == Zero; }
  constexpr bool isPositive() const { return !negativeSign; }

  static constexpr Unpacked infinity() { return Unpacked(Infinity); }
  static constexpr Unpacked pinfinity() { return Unpacked(Infinity, false); }
  static constexpr Unpacked ninfinity() { return Unpacked(Infinity, true); }
  static constexpr Unpacked nan() { return Unpacked(NaN); }
  static constexpr Unpacked one() { return Unpacked(0, 0, false); }
  static constexpr Unpacked zero() { return Unpacked(Zero); }
  template <class Trait>
  static constexpr Unpacked make_fixed(typename Trait::value_t x)
  {
    return Unpacked().unpack_xfixed<Trait>(x);
  }

  template <class Trait>
  static constexpr Unpacked make_floati(typename Trait::holder_t x)
  {
    return Unpacked().unpack_xfloati<Trait>(x);
  }

  template <class Trait>
  static constexpr Unpacked make_float(typename Trait::value_t x)
  {
    return Unpacked().unpack_xfloat<Trait>(x);
  }

  constexpr bool operator==(const Unpacked &u) const
  {
    // nan != nan ALWAYS
    return type == NaN || u.type == NaN
               ? false
               : negativeSign == u.negativeSign && type == u.type &&
                     (type == Regular
                          ? (exponent == u.exponent && fraction == u.fraction)
                          : true);
  }

  constexpr bool operator!=(const Unpacked &u) const
  {
    // nan != nan ALWAYS
    return type == NaN || u.type == NaN ? true : (*this == u);
  }

  constexpr Unpacked operator-() const
  {
    return Unpacked(exponent, fraction, !negativeSign);
  }

  CONSTEXPR14 Unpacked inv() const
  {
    switch (type)
    {
    case Regular:
      if (fraction == 0)
      {
        // std::cout << "[exponent inversion " <<  std::dec  << " exponent" <<
        // exponent <<  "] becomes " << -exponent << std::endl;
        return Unpacked(-exponent, 0, negativeSign);
      }
      else
      {
        // one == 0,0,false
        // TODO FIX SIGN/INFINITY/NAN
        // put hidden 1. in mantiss
        POSIT_LUTYPE afrac = FT_leftmost_bit;
        POSIT_LUTYPE bfrac = FT_leftmost_bit | (fraction >> 1);
        //   std::cout << "inversion " << std::hex  << bfrac << " exponent" <<
        //   exponent << std::endl;
        auto exp = -exponent;

        if (afrac < bfrac)
        {
          exp--;
          bfrac >>= 1;
        }

        return Unpacked(
            exp, (((typename nextinttype<FT>::type)afrac) << FT_bits) / bfrac,
            negativeSign);

        // return one()/(*this);
      }
      break;
    case Infinity:
      return zero();
    case Zero:
      return infinity();
    case NaN:
    default:
      return *this;
    }
  }

  /// unpacks a value stored as fixed or integer. Value and holder match
  template <class Trait>
  CONSTEXPR14 Unpacked &unpack_xfixed(typename Trait::value_t value);

  /// unpacks a floating point value as expressed by its holding type (uint32
  /// for single)
  template <class Trait>
  CONSTEXPR14 Unpacked &unpack_xfloati(typename Trait::holder_t value);

  /// unpacks a floating point value by its value type (single)
  template <class Trait>
  Unpacked &unpack_xfloat(typename Trait::value_t value) // CANNOT be
                                                         // constexpr, except
                                                         // using the expensive
                                                         // float2bits
  {
    union {
      typename Trait::holder_t i;
      typename Trait::value_t f;
    } uu;
    uu.f = value;
    return unpack_xfloati<Trait>(uu.i);
  }

  CONSTEXPR14 friend Unpacked operator-(Unpacked a, Unpacked b)
  {
    return a + (-b);
  }

  CONSTEXPR14 Unpacked &operator+=(const Unpacked &a)
  {
    Unpacked r = *this + a;
    *this = r;
    return *this;
  }
  CONSTEXPR14 Unpacked &operator-=(const Unpacked &a)
  {
    *this += (-a);
    return *this;
  }

  // TODO overflow?
  CONSTEXPR14 friend Unpacked operator+(Unpacked a, Unpacked b)
  {
    // UnpackedDualSel(*,NaN)
    // UnpackedDualSel(NaN,*)
    if (a.isNaN() || b.isNaN())
      return a;
    switch (UnpackedDualSel(a.type, b.type))
    {
    case UnpackedDualSel(Regular, Regular):
    {
      auto dir = a.exponent - b.exponent;
      const ET exp = (dir < 0 ? b.exponent : a.exponent) + 1;

      // move right means increment exponent
      // 1.xxxx => 0.1xxxxxx
      // 1.yyyy => 0.1yyyyyy
      POSIT_LUTYPE afrac1 =
          (FT_leftmost_bit >> 1) |
          (a.fraction >> 2); // denormalized and shifted right
      POSIT_LUTYPE bfrac1 = (FT_leftmost_bit >> 1) | (b.fraction >> 2);
      POSIT_LUTYPE afrac = dir < 0
                               ? (afrac1 >> -dir)
                               : afrac1; // denormalized and shifted right
      POSIT_LUTYPE bfrac = dir < 0 ? bfrac1 : (bfrac1 >> dir);

      // 1.xxxx => 0.1xxxxx => 0.0k 1 xxxx
      //
      // if dir==0 then:
      //   0.1xxxxx
      //   0.1yyyyy
      //   1.zzzzzz
      //
      // but also
      //   0.1xxxx
      //   0.0001yyyy
      //   0.1zzzz
      //
      // if 1. we easily normalize by shift
      // if 0. we pre
      int mode =
          a.negativeSign == b.negativeSign ? 0 : afrac > bfrac ? 1 : -1;
      bool osign = mode >= 0 ? a.negativeSign : b.negativeSign;
      POSIT_LUTYPE frac = mode == 0
                              ? afrac + bfrac
                              : mode > 0 ? afrac - bfrac : bfrac - afrac;
      return Unpacked(exp, frac, osign).normalized(); // pass denormalized
    }
    case UnpackedDualSel(Regular, Zero):
    case UnpackedDualSel(Zero, Zero):
    case UnpackedDualSel(Infinity, Zero):
    case UnpackedDualSel(Infinity, Regular):
      return a;
    case UnpackedDualSel(Zero, Regular):
    case UnpackedDualSel(Zero, Infinity):
    case UnpackedDualSel(Regular, Infinity):
      return b;
    default: // case UnpackedDualSel(Infinity,Infinity):
      return (a.negativeSign == b.negativeSign) ? a : nan();
    }
  }

  // https://www.edwardrosten.com/code/fp_template.html
  // https://github.com/Melown/half
  // TODO overflow?
  CONSTEXPR14 friend Unpacked operator*(const Unpacked &a, const Unpacked &b)
  {
    if (a.isNaN() || b.isNaN())
      return a;
    switch (UnpackedDualSel(a.type, b.type))
    {
    case UnpackedDualSel(Regular, Regular):
    {
      POSIT_LUTYPE afrac = FT_leftmost_bit | (a.fraction >> 1);
      POSIT_LUTYPE bfrac = FT_leftmost_bit | (b.fraction >> 1);
      auto frac =
          ((((typename nextinttype<FT>::type)afrac) * bfrac) >> FT_bits);
 #ifdef FPGAHLS
 #pragma HLS RESOURCE variable = frac core = Mul_LUT
 #endif
      bool q = (frac & FT_leftmost_bit) == 0;
      auto rfrac = q ? (frac << 1) : frac;
      auto exp = a.exponent + b.exponent + (q ? 0 : 1);
 #if 0
                if ((frac & FT_leftmost_bit) == 0) {
                    exp--;
                    frac <<= 1;
                }
 #endif
      return Unpacked(exp, rfrac << 1, a.negativeSign ^ b.negativeSign);
    }
    case UnpackedDualSel(Regular, Zero):
    case UnpackedDualSel(Zero, Regular):
    case UnpackedDualSel(Zero, Zero):
      return zero();
    case UnpackedDualSel(Infinity, Zero):
    case UnpackedDualSel(Zero, Infinity):
      return nan();
    default: // case UnpackedDualSel(Infinity,Infinity):
      // inf inf or inf reg or reg inf
      return (a.negativeSign ^ b.negativeSign) ? ninfinity() : pinfinity();
    }
  }

  /**
   * Division Truth Table

   */
  // TODO overflow?
  CONSTEXPR14 friend Unpacked operator/(const Unpacked &a, const Unpacked &b)
  {
    if (a.isNaN() || b.isNaN())
      return a;

    // 9 more cases
    switch (UnpackedDualSel(a.type, b.type))
    {
    case UnpackedDualSel(Regular, Regular):
    {
      POSIT_LUTYPE afrac = FT_leftmost_bit | (a.fraction >> 1);
      POSIT_LUTYPE bfrac1 = FT_leftmost_bit | (b.fraction >> 1);
      auto exp = a.exponent - b.exponent + (afrac < bfrac1 ? -1 : 0);
      POSIT_LUTYPE bfrac = afrac < bfrac1 ? (bfrac1 >> 1) : bfrac1;
      /*
        if (afrac < bfrac) {
            exp--;
            bfrac >>= 1;
        }
        */

      return Unpacked(
          exp, (((typename nextinttype<FT>::type)afrac) << FT_bits) / bfrac,
          a.negativeSign ^ b.negativeSign);
    }
    case UnpackedDualSel(Zero, Zero):
    case UnpackedDualSel(Infinity, Infinity):
      return nan();
    case UnpackedDualSel(Zero, Infinity):
      return zero();
    case UnpackedDualSel(Zero, Regular):
    case UnpackedDualSel(Infinity, Zero):
      return a;
    case UnpackedDualSel(Regular, Zero):
      return Unpacked(Unpacked::Infinity, a.negativeSign);
    default: // case UnpackedDualSel(Infinity,Regular):
      return (a.negativeSign ^ b.negativeSign) ? ninfinity() : pinfinity();
    }
  }

 #ifndef FPGAHLS
  friend std::ostream &operator<<(std::ostream &ons, Unpacked const &o)
  {
    switch (o.type)
    {
    case Unpacked::Regular:
      ons << "up(" << (o.negativeSign ? "-" : "+")
          << " exp (dec) = " << std::dec
          << typename printableinttype<const ET>::type(o.exponent)
          << " fraction (hex) = " << std::hex
          << typename printableinttype<const FT>::type(o.fraction)
          << " (bin) = " << std::dec
          << (std::bitset<sizeof(o.fraction) * 8>(o.fraction)) << ")";
      break;
    case Unpacked::Infinity:
      ons << (o.negativeSign ? "up(-infinity)" : "up(+infinity)");
      break;
    case Unpacked::NaN:
      ons << "up(nan)";
      break;
    case Unpacked::Zero:
      ons << "up(0)";
      break;
    }
    return ons;
  }
 #endif
 };

 template <class FT, class ET>
 template <class Trait>
 CONSTEXPR14 Unpacked<FT, ET> &Unpacked<FT, ET>::unpack_xfixed(
    typename Trait::value_t nx)
 {
  // TODO: handle infinity or nan in Trait
  if (nx != 0)
  {
    using UT = typename std::make_unsigned<typename Trait::value_t>::type;
    type = Regular;
    negativeSign = nx < 0;
    UT x = pcabs(nx);
    const int p = Trait::totalbits - findbitleftmostC(x) - 1; // 31->0,0->31
    exponent = (p - Trait::fraction_bits);
    UT ux = p == 0 ? 0 : (x << (Trait::totalbits - p));

    // UT x : 0[N-p-1] 1 ?[p]
    // corner cases:
    //  x: 1 ?[N-1]
    //  x: 0[N-1] 1
    // UT ux: ?[p] 0[N-p]
    // FT f:  ?[min(p,size(FT)] 0[size(FT)-min(p,size(FT)]
    // take all p bits rightmost of x and make them leftmost
    fraction = cast_right_to_left<UT, Trait::totalbits, FT, FT_bits>()(ux);
  }
  else
  {
    exponent = 0;
    fraction = 0;
    type = Zero;
    negativeSign = false;
  }
  return *this;
 }

 // https://www.h-schmidt.net/FloatConverter/IEEE754.html
 template <class FT, class ET>
 template <class Trait>
 CONSTEXPR14 Unpacked<FT, ET> &Unpacked<FT, ET>::unpack_xfloati(
    typename Trait::holder_t value)
 {
  ET rawexp = bitset_getT(value, Trait::fraction_bits, Trait::exponent_bits);
  type = Regular;
  negativeSign =
      value & (((typename Trait::holder_t)1) << (Trait::data_bits - 1));
  exponent = rawexp - Trait::exponent_bias; // ((un.u >> Trait::fraction_bits)
                                            // & Trait::exponent_mask)

  // std::cout  << "un.u is " << std::hex <<un.u << " for " << value <<
  // std::endl;  std::cout << std::dec << "float trait: fraction bits " <<
  // Trait::fraction_bits << " exponent bits " << Trait::exponent_bits << " bias
  // " << Trait::exponent_bias << " mask " << std::hex << Trait::exponent_mask<<
  // std::endl;  std::cout << std::hex << "exponent output " << std::hex <<
  // exponent  << " " << std::dec << exponent << " fraction " << std::hex <<
  // fraction << std::endl;

  // fractional part is LSB of the holder_t and of length
  fraction = cast_right_to_left<typename Trait::holder_t, Trait::fraction_bits,
                                FT, FT_bits>()(value);

  // if(FT_bits < Trait::fraction_bits)
  //	fraction = bitset_getT(value,0,Trait::fraction_bits) >>
  //(Trait::fraction_bits-FT_bits);  else 	fraction =
  //((POSIT_LUTYPE)bitset_getT(value,0,Trait::fraction_bits)) <<
  //(FT_bits-Trait::fraction_bits);

  // stored exponent: 0, x, exponent_mask === 0, any, infinity
  // biased: -max, -max+1, ..., max, max+1 === 0, min, ..., max, infinity
  if (rawexp == ((1 << Trait::exponent_bits) - 1)) // AKA 128 for single
  {
    if (fraction == 0)
    {
      type = Infinity;
    }
    else
    {
      type = NaN; // don't care which
    }
  }
  else if (rawexp == 0)
  {
    // normalized
    if (!Trait::with_denorm || fraction == 0)
    {
      type = Zero;
      negativeSign = false; // don't care signed zero
    }
    // denormalized
    else
    {
      int k = findbitleftmostC(fraction);
      exponent -= k;
      fraction <<= (k + 1);
      //std::cout << typeid(Trait).name() << "unpacking: denormalized (rawexp=0,fraction=" <<  (int)tmp << ") unpacked as  (exp=" << exponent << ",fraction=" << (int)fraction << ")"<< std::endl;
    }
  }
  return *this;
 }

 template <int abits, class AT, int bbits, class BT, bool abits_gt_bbits, AT msb>
 struct fraction_bit_extract
 {
 };

 /// specialization when abits >= bbits
 template <int abits, class AT, int bbits, class BT, AT msb>
 struct fraction_bit_extract<abits, AT, bbits, BT, true, msb>
 {
  static constexpr BT packdenorm(AT fraction)
  {
    // expand the fractiona part
    return (msb | (fraction >> 1)) >> (abits - bbits);
  }

  static constexpr BT pack(AT fraction)
  {
    return bitset_getT(fraction, abits - bbits, bbits);
  }
 };

 /// specialization when bbits >= abits
 template <int abits, class AT, int bbits, class BT, AT msb>
 struct fraction_bit_extract<abits, AT, bbits, BT, false, msb>
 {
  static constexpr BT packdenorm(AT fraction)
  {
    return ((BT)(msb | (fraction >> 1)) << (bbits - abits));
  }

  static constexpr BT pack(AT fraction)
  {
    return ((BT)fraction) << (bbits - abits);
  }
 };

 /**
 * Convert (s,2**E,F) to int
 */
 template <class FT, class ET>
 template <class Trait>
 CONSTEXPR14 typename Trait::value_t Unpacked<FT, ET>::pack_xfixed() const
 {
  switch (type)
  {
  case Infinity:
    return 0;
  case Zero:
    return 0;
  case NaN:
    return 0;
  default:
    break;
  }
  constexpr int intbits = Trait::totalbits - Trait::fraction_bits;
  if (exponent >= intbits)
  {
    return negativeSign ? std::numeric_limits<typename Trait::value_t>::lowest()
                        : std::numeric_limits<typename Trait::value_t>::max();
  }
  else if (exponent < -Trait::fraction_bits)
  {
    return 0;
  }
  else
  {
    using ST = typename Trait::value_t;
    using UT = typename std::make_unsigned<ST>::type;
    // fraction 1.xxxxx from left aligned over FT bytes to UT bytes still left
    // aligned over Trait::totalbits
    UT f = fraction_bit_extract<FT_bits, FT, Trait::totalbits, UT,
                                (FT_bits > Trait::totalbits),
                                FT_leftmost_bit>::pack(fraction);

    // add the 1 bit for the current exponent
    // f[totalbits] -> 0[intbits-exponent+1] 1
    // f[exponent+Trait::fraction_bits-1]
    //
    // extrema: e.g. for totalbits=32, whatever fraction
    // - exponent==-Trait::fraction_bits ==> 1 | 0
    // - exponent==intbits-1 ==> 0x8000000 | (F >> 1)
    ST r = (ST(1) << (exponent + Trait::fraction_bits)) |
           (ST)(f >> (intbits - exponent));
    return negativeSign ? -r : r;
  }
 }

 template <class FT, class ET>
 template <class Trait>
 CONSTEXPR14 typename Trait::holder_t Unpacked<FT, ET>::pack_xfloati() const
 {
  switch (type)
  {
  case Infinity:
    return negativeSign ? Trait::ninfinity_h : Trait::pinfinity_h;
  case Zero:
    return 0;
  case NaN:
    return Trait::nan_h;
    ; // it will cast to double TODO: it will cast to value_t
  default:
    break;
  }

  largest_type<ET, typename int_least_bits<Trait::exponent_bits>::type> fexp =
      exponent;
  fexp += Trait::exponent_bias;

  // left aligned
  typename Trait::holder_t fexpbits = 0;
  typename Trait::holder_t ffracbits = 0;

  if (fexp > Trait::exponent_max) // this is RAW exponent max
  {
    return negativeSign ? Trait::lowest_h : Trait::max_h;
    // overflow, set as MAX
    // fexpbits = ((typename Trait::holder_t)Trait::exponent_max) <<
    // (Trait::fraction_bits); // AKA 254 and 23  ffracbits = -1;
  }
  else if (fexp < 1)
  {
    if (Trait::with_denorm)
      return 0;
    else
    {
      // denormalized numbers are and happen when raw exponent is below 1
      // 0.fractionbits
      //
      // 2^exponent * fraction ==> 0.rawfraction
      //
      // approach:
      //  FT_bits >= Trait::fraction_bits
      //      FT_leftmost_bit| (fraction>>)
      //  FT_bits < Trait::fraction_bits

      ffracbits = fraction_bit_extract<FT_bits, FT, Trait::fraction_bits,
                                       typename Trait::holder_t,
                                       (FT_bits > Trait::fraction_bits),
                                       FT_leftmost_bit>::packdenorm(fraction);
      // use denormalization
      ffracbits >>= -fexp;
      //std::cout << typeid(Trait).name()<< "pack - denormalized (exp=" << exponent << ",fraction=" << (int)fraction << ") means rawexp=" << fexp << " results in fraction=" << (int)ffracbits << "\n";
    }
  }
  else // normal
  {
    fexpbits = ((typename Trait::holder_t)(fexp & Trait::exponent_mask))
               << (Trait::fraction_bits);
    ffracbits = fraction_bit_extract<
        FT_bits, FT, Trait::fraction_bits, typename Trait::holder_t,
        (FT_bits > Trait::fraction_bits), FT_leftmost_bit>::pack(fraction);
  }

  typename Trait::holder_t value =
      ffracbits | fexpbits | (negativeSign ? Trait::signbit : 0);

  // don't underflow to zero?
  if (value != 0 && (value << 1) == 0)
  {
    value++;
  }
  return value;
 }

 template <class SrcTrait, class DstTrait, class FT>
 constexpr typename DstTrait::holder_t convertfloats(
    typename SrcTrait::holder_t src)
 {
  return Unpacked<typename largest_type<typename SrcTrait::holder_t,
                                        typename DstTrait::holder_t>::type,
                  int>::template make_floati<SrcTrait>(src)
      .template pack_xfloati<DstTrait>();
 }
 	/**
 * Emanuele Ruffaldi (C) 2017
 * Templated C++ Posit

 Test

 using X=Posit<int32_t,4,0,uint32_t>;
 X::PT::decode_posit_rs(1)
 */

 #ifndef FPGAHLS
 inline float uint32_to_float(uint32_t i)
 {
 	union {
 		float f;
 		uint32_t i;
 	} x;
 	x.i = i;
 	return x.f;
 }
 #endif

 enum class PositSpec { WithNan, WithInf, WithNanInf};

 template <class T, int totalbits, int esbits, PositSpec positspec_ >
 struct PositTrait
 {
 	static_assert(std::is_signed<T>::value,"required signed T");
 	static_assert(sizeof(T)*8 >= totalbits,"required enough storage T for provided bits  SREF");
 	static_assert(esbits <= totalbits-3,"esbits should be at most N-3 for the cases [x01,E] and [x10,E]");

 	using POSIT_STYPE = typename std::make_signed<T>::type;
 	using POSIT_UTYPE = typename std::make_unsigned<T>::type;
 	static constexpr PositSpec positspec = positspec_;
 	static constexpr bool withnan = positspec_ != PositSpec::WithInf;
 	using exponenttype = typename std::conditional<(totalbits+esbits >= sizeof(T)*8),typename  nextinttype<T>::type,T>::type;

 	//enum : POSIT_UTYPE {
 		static constexpr POSIT_UTYPE POSIT_MAXREGIME_BITS = totalbits-1;
 		static constexpr POSIT_UTYPE POIST_ONEHELPER = 1;
 		static constexpr POSIT_UTYPE POSIT_HOLDER_SIZE = sizeof(T)*8;
 		static constexpr POSIT_UTYPE POSIT_SIZE = totalbits;
 		static constexpr POSIT_UTYPE POSIT_ESP_SIZE = esbits;
 		static constexpr POSIT_UTYPE POSIT_MSB = POIST_ONEHELPER<<(totalbits-1);
 		static constexpr POSIT_UTYPE POSIT_HOLDER_MSB = POIST_ONEHELPER<<(POSIT_HOLDER_SIZE-1);
        static constexpr POSIT_UTYPE POSIT_MASK = ((POSIT_MSB-1)|(POSIT_MSB));
        static constexpr POSIT_UTYPE POSIT_ESP_MASK = (POIST_ONEHELPER<< esbits)-1;
        //POSIT_HOLDER_MSB = 1U<<(POSIT_HOLDER_SIZE-1),
        //POSIT_HOLDER_MASK = ((POSIT_HOLDER_SIZE-1)|(POSIT_HOLDER_SIZE)),
 		static constexpr POSIT_UTYPE POSIT_EXTRA_BITS = POSIT_HOLDER_SIZE-totalbits;
 		static constexpr POSIT_UTYPE POSIT_SIGNBIT = (POIST_ONEHELPER<<(totalbits-1)); // bit
 		static constexpr POSIT_UTYPE POSIT_INVERTBIT = (POIST_ONEHELPER<<(totalbits-2));

 		static constexpr POSIT_STYPE POSIT_REG_SCALE = 1<<esbits;

 		// these are portable ways for representing 10000000 and the two adjacents numbers in 
 		// the posit circle
 		static constexpr POSIT_STYPE _POSIT_TOP = (POSIT_STYPE)((POSIT_UTYPE(~0) << (totalbits-1)));
 		static constexpr POSIT_STYPE _POSIT_TOPRIGHT = (POSIT_STYPE)((POIST_ONEHELPER<< (totalbits-1))-1);
 		static constexpr POSIT_STYPE _POSIT_TOPLEFT = (POSIT_STYPE)((POSIT_UTYPE(~0) << (totalbits-1)))+1;

 		// Without Nan (classic Posit): there only one Infinity
 		// With NaN: the top element is NaN and then its adjacents correspond to +- Infinity
 		static constexpr POSIT_STYPE POSIT_PINF =  positspec_ == PositSpec::WithNanInf ? _POSIT_TOPRIGHT: _POSIT_TOP ; // 1[sign] 000000 or N-1 111 bits
 		static constexpr POSIT_STYPE POSIT_NINF =  positspec_ == PositSpec::WithNanInf ? _POSIT_TOPLEFT: _POSIT_TOP;
 		static constexpr POSIT_STYPE POSIT_NAN  = _POSIT_TOP;  // infinity in withnan=false otherwise it is truly nan
 		static constexpr POSIT_STYPE POSIT_ONE =  POSIT_INVERTBIT; // fine due to position of invert bit
 		static constexpr POSIT_STYPE POSIT_MONE = -POSIT_ONE ; // minus one

 		// Two
 		static constexpr POSIT_STYPE POSIT_TWO = (POSIT_INVERTBIT | (POSIT_INVERTBIT>>(1+esbits)));

 		// 1/2
 		// 00 1[esbits+1] 0[N-2-esbitis-1]
 		static constexpr POSIT_STYPE POSIT_HALF = POSIT_STYPE( (POSIT_UTYPE(-1) >> (totalbits-esbits-1))) << (totalbits-3-esbits);
 		
 		// max value below Infinity
 		// 1[holder-total] 1 0[total-1]
 		static constexpr POSIT_STYPE POSIT_MAXPOS = _POSIT_TOPRIGHT - (positspec_ == PositSpec::WithNanInf ? 1:0);

 		// min value above -Infinity
 		// 0[holder-total] 0 1[total-1]
 		static constexpr POSIT_STYPE POSIT_MINNEG = _POSIT_TOPLEFT + (positspec_ == PositSpec::WithNanInf? 1:0);

 		// minimal number above zero
 		static constexpr POSIT_STYPE POSIT_AFTER0 = 1; // right to 0
 		static constexpr POSIT_STYPE POSIT_BEFORE0 = -POSIT_AFTER0; // left to 0

 		//static constexpr exponenttype maxexponent = withnan_ ? POSIT_REG_SCALE * (POSIT_SIZE - 3) : POSIT_REG_SCALE * (POSIT_SIZE - 2);  // sign+1st rs
 		//static constexpr exponenttype minexponent = (-((exponenttype)POSIT_REG_SCALE) * (POSIT_SIZE - 2))  // sign+1st rs

 		static constexpr exponenttype maxexponent() { return positspec_ == PositSpec::WithNanInf ? POSIT_REG_SCALE * (totalbits - 3) : POSIT_REG_SCALE * (totalbits - 2); }
 		static constexpr exponenttype minexponent() { return (-((exponenttype)POSIT_REG_SCALE) * (totalbits - 2)) ; }
 	//enum : exponenttype{
 	//};
    //static constexpr POSIT_UTYPE LMASK(POSIT_UTYPE bits, POSIT_UTYPE size)
    //{ return ((bits) & (POSIT_MASK << (POSIT_SIZE - (size)))); }


    // pars is T_left
    static CONSTEXPR14 std::pair<int,int> decode_posit_rs(T pars)
    {  
        const bool x = (pars & POSIT_HOLDER_MSB) != 0; // marker bit for > 1
        int aindex = x ? (~pars == 0 ? POSIT_MAXREGIME_BITS : findbitleftmostC((POSIT_UTYPE)~pars)) : (pars == 0 ? POSIT_MAXREGIME_BITS : findbitleftmostC((POSIT_UTYPE)pars)); // index is LAST with !x
        int index = aindex; // aindex > POSIT_SIZE  ? POSIT_SIZE : aindex;
        int reg = x ? index-1 : -index;
        int rs =  POSIT_MAXREGIME_BITS < index+1 ? POSIT_MAXREGIME_BITS : index+1; //std::min((int)POSIT_MAXREGIME_BITS,index+1);

        /**
         if(x)
         {
 			pars = ~pars;
 			if(!pars)
 			{
 				return { POSIT_MAXREGIME_BITS-1, POSIT_MAXREGIME_BITS };
 			}
 			else
 			{
 				int index = findbitleftmostC((POSIT_UTYPE)pars));
 				return { index-1, index+1 };
 			}
         }
         else
         {
 			if(!pars)
 			{
 				return { -(int)POSIT_MAXREGIME_BITS, POSIT_MAXREGIME_BITS };
 			}
 			else
 			{
 				int index = findbitleftmostC((POSIT_UTYPE)pars));
 				return { -index, index+1 };	
 			}
         }
         */
        //std::cout << "decode posit " << std::hex << std::bitset<sizeof(T)*8>(pars) << " 1first " << x <<  " index " <<  index << "(aindex " << aindex << ") rs " << rs << "reg  " << reg  << std::endl;
        return {reg,rs};
    }

    static constexpr std::pair<POSIT_STYPE,POSIT_UTYPE> split_reg_exp(exponenttype eexponent)
    {
    	// FLOORDIV(a,b)  ((a) / (b) - ((a) % (b) < 0)) 
    	// int reg = FLOORDIV(up.exp, POW2(es));
    	// POSIT_UTYPE exp = up.exp - POW2(es) * reg;
        return {eexponent >> POSIT_ESP_SIZE, eexponent & POSIT_ESP_MASK };
    }

    /// compiler note: it generate simply: shl and or
    static constexpr exponenttype join_reg_exp(POSIT_STYPE reg, POSIT_UTYPE exp)
    {
    	return (((exponenttype)reg) * (1<<POSIT_ESP_SIZE))|exp;
    }
 };

 //template <class T, int totalbits, int esbits, PositSpec positspec_ >
 //constexpr typename PositTrait<T,totalbits,esbits,withnan_>::exponenttype PositTrait<T,totalbits,esbits,withnan_>::minexponent;

 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 class Posit;

 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto unpack_posit(const Posit<T,totalbits,esbits,FT,positspec> & p) -> typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT ;

 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> pack_posit(const typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT & x);


 /**
 * Minimal Unpacked representaiton of the Posit
 * UT is UnpackedT
 * PT is the Trait
 */
 template <class UT, class PT>
 struct UnpackedLow_t
 {
 	using Type = typename UT::Type;

 	constexpr UnpackedLow_t(Type t): type(t), negativeSign(false), regime(0),fraction(0),exp(0) {}
 	constexpr UnpackedLow_t(Type t, bool anegativeSign): type(t), negativeSign(anegativeSign), regime(0),fraction(0),exp(0) {}
 	constexpr UnpackedLow_t(bool n, typename PT::POSIT_STYPE r, typename PT::POSIT_UTYPE e, typename PT::POSIT_UTYPE f):
 		 type(UT::Regular),negativeSign(n), regime(r), exp(e), fraction(f) {}

 	Type type;
 	bool negativeSign; // for Regular and Infinity if applicabl
 	typename PT::POSIT_STYPE regime; // decoded with sign
 	typename PT::POSIT_UTYPE exp;    // decoded
 	typename PT::POSIT_UTYPE fraction; // fraction left aligned without 1.
 };
 /**
 * Stores the data in the MSB totalbits of T
 * Uses esbits bits
 *
 * Notes:
 * \tparam T is the holding type that has to be signed due to complement 2 sign method
 * \tparam totalbits is the significant bits of posit stored in T right aligned. Due to the 2 complement scheme the MSB bits are extension of the sign
 * \tparam esbits is the size of the exponent 
 * \tparam FT is the unisgned type holding the fraction with the 1 explicity specified
 *
 */
 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 class Posit
 {
 public:
 	using PT=PositTrait<T,totalbits,esbits,positspec>;
 	static_assert(std::is_unsigned<FT>::value,"required unsigned FT");


    enum { vtotalbits = totalbits, vesbits = esbits};
 	struct DeepInit{};
 	static constexpr bool withnan = positspec != PositSpec::WithInf;
    using value_t=T;
    using fraction_t=FT;
    using UnpackedT=Unpacked<FT,typename PT::exponenttype>;
    using UnpackedLow = UnpackedLow_t<UnpackedT,PT>;
 	using exponenttype = typename PT::exponenttype;
 	T v; // index in the N2 space

 	struct PositMul
 	{
 		constexpr PositMul(Posit av, Posit bv) : a(av),b(bv) {}
 		Posit a,b;

 		constexpr Posit asPosit() const { return pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()*b.unpack()); }

 		constexpr operator Posit() const { return asPosit(); }

 		constexpr operator UnpackedT() const { return asPosit(); }
 	#ifndef FPGAHLS
 		constexpr operator float() const { return asPosit(); }
 		constexpr operator double() const { return asPosit(); }
 	#endif
 		constexpr operator int() const { return asPosit(); }

 		// pa.a*pa.b+pb.a*pb.b => 
 		friend constexpr Posit operator+(const PositMul & pa, const PositMul & pb) 
 		{
 			return pack_posit<T,totalbits,esbits,FT,positspec>(pa.a.unpack()*pa.b.unpack()+pb.a.unpack()*pb.b.unpack()); 
 		}

 		// missing operators
 		// &
 		// -
 	};

 	CONSTEXPR14 Posit half() const;
 	CONSTEXPR14 Posit twice() const;
 	CONSTEXPR14 UnpackedLow unpack_low() const;
 	static CONSTEXPR14 Posit pack_low(UnpackedLow);
 	static CONSTEXPR14 UnpackedT unpacked_low2full(UnpackedLow x);
 	static CONSTEXPR14 UnpackedLow unpacked_full2low(UnpackedT tx);


 	/// diagnostics with full details
 	struct info
 	{
 		bool infinity = false;
 		bool nan = false;
 		int k = 0;
 		bool sign = false; // true if negative
 		double regime = 0;
 		int rs = 0;
 		int es = 0;
 		int fs = 0;
 		int exp = 0;
 		int exponent = 0;
 		FT ifraction = 0;
 		double fraction = 0;
 		double value = 0;
 	};

 	info analyze();


    friend constexpr bool operator == (const Posit & a, const Posit & u)  { return withnan && (a.isNaN()||u.isNaN())?false :a.v == u.v; }
    friend constexpr bool operator != (const Posit & a, const Posit & u)  { return !(a == u); }
    friend constexpr bool operator < (const Posit & a, const Posit & u)  { return withnan && (a.isNaN()||u.isNaN())?false :a.v < u.v;; }
    friend constexpr bool operator <= (const Posit & a, const Posit & u)  { return withnan && (a.isNaN()||u.isNaN())?false :a.v <= u.v; }

    friend constexpr bool operator > (const Posit & a, const Posit & u)  { return withnan && (a.isNaN()||u.isNaN())?false :a.v > u.v; }
    friend constexpr bool operator >= (const Posit & a, const Posit & u)  { return withnan && (a.isNaN()||u.isNaN())?false :a.v >= u.v; }

    static constexpr Posit ldexp(const Posit & u, int exp); // exponent product

 	using single_tag = typename UnpackedT::single_tag;

 	constexpr Posit() : v(0) {}

 	CONSTEXPR14 explicit Posit(single_tag t, uint32_t p) { v = pack_posit<T,totalbits,esbits,FT,positspec>(UnpackedT(t,p)).v; }

    /// construct passing the holding type x
 	CONSTEXPR14 explicit Posit(DeepInit, T x) : v(x) {} 

 	/// construct from decomposed (s, R,E,F)
 	CONSTEXPR14 explicit Posit(UnpackedLow u) : v(pack_low(u).v) {} 

 	/// construct from fully unpacked floating (s,e,F)
 	CONSTEXPR14 explicit Posit(UnpackedT u) : v(pack_posit<T,totalbits,esbits,FT,positspec>(u).v) {} 

 #ifndef FPGAHLS
    CONSTEXPR14 explicit Posit(float f): Posit(UnpackedT(f)) {}
 	CONSTEXPR14 explicit Posit(double d): Posit(UnpackedT(d)) {}
 #endif
 	CONSTEXPR14 Posit(int i): Posit(UnpackedT(i)) {}

 	constexpr UnpackedT unpack() const { return unpack_posit<T,totalbits,esbits,FT,positspec>(*this); }

 	/// absolute value
 	/// TODO: use (v ^ mask) - mask   OR (x+mask)^nasj
 	/// where int const mask = v >> sizeof(int) * CHAR_BIT - 1;
 	constexpr Posit abs()  const { return Posit(DeepInit(),pcabs(v));  }  // could be >= infinity because infinity is sign symmetric

 	/// negation
 	constexpr Posit neg()  const { return Posit(DeepInit(),-v); }; 

 	/// 1/x
 	CONSTEXPR14 Posit inv()  const;

 	// SFINAE optionally: template<typename U = T, class = typename std::enable_if<withnan, U>::type>
    constexpr bool hasNaN() const { return positspec != PositSpec::WithInf; }
 	constexpr bool isNaN() const { return positspec != PositSpec::WithInf && v == PT::POSIT_NAN; } 
 	constexpr bool isnegative() const { return v < 0; } //(v &POSIT_SIGNBIT) != 0; }
 	constexpr bool isinfinity() const { return positspec != PositSpec::WithNan && (v == PT::POSIT_PINF || v == PT::POSIT_NINF); }
 	constexpr bool iszero() const { return v == 0; }
 	constexpr bool isone() const { return v == PT::POSIT_ONE; }
 	constexpr Posit prev() const { return Posit(DeepInit(),v > PT::POSIT_MAXPOS || v <= PT::POSIT_MINNEG ? v : v-1); }
 	constexpr Posit next() const { return Posit(DeepInit(),v <= PT::POSIT_MINNEG || v > PT::POSIT_MAXPOS ? v : v+1); }
 	//TBDconstexpr bool isNaN() const; 
 	//TBD constexpr bool isexact() const { return (v&1) == 0; }

 	//TBD constexpr bool isfractional() const { return v > 0 && (abs().v < (N2>>2)); } // (0 < x < 1) or  (-1 < x < 0) == (-1,1) removing 0
 	//TBD constexpr bool isstrictlynegative() const { return v > (N2>>1); } // -inf < x < 0

 	    // Level 1: unpacked
 	// Level 0: something using posit specialties
 	friend constexpr PositMul operator*(const Posit & a, const Posit & b) 
 	{
 		return PositMul(a,b); 
 	}

 	friend constexpr Posit operator+(const Posit & a, const PositMul & b)
 	{
 		return fma(b.a,b.b,a);
 	}

 	friend constexpr Posit operator+(const PositMul & a, const Posit & b)
 	{
 		return fma(a.a,a.b,b);
 	}

 	friend constexpr Posit fma(const Posit & a, const Posit & b, const Posit & c)
 	{
 		return pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()*b.unpack()+c.unpack());
 	}

 	CONSTEXPR14 Posit & operator*= (const Posit & b)
 	{
 		*this = pack_posit<T,totalbits,esbits,FT,positspec>(unpack()*b.unpack());
 		return *this;
 	}
    friend constexpr Posit operator+(const Posit & a, const Posit & b)
    {
        return a.iszero() ? b : b.iszero() ? a: pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()+b.unpack());
    }

 	Posit& operator+=(const Posit &a) { Posit r = *this+a; v = r.v; return *this; }

 	static constexpr Posit zero() { return Posit(DeepInit(),0); }
 	static constexpr Posit inf() { return Posit(DeepInit(),PT::POSIT_PINF); }
 	static constexpr Posit pinf() { return Posit(DeepInit(),PT::POSIT_PINF); }
 	static constexpr Posit ninf() { return Posit(DeepInit(),PT::POSIT_NINF); }
 	static constexpr Posit max() { return Posit(DeepInit(),PT::POSIT_MAXPOS); }
 	static constexpr Posit min() { return Posit(DeepInit(),PT::POSIT_AFTER0); }
 	static constexpr Posit lowest() { return Posit(DeepInit(),PT::POSIT_MINNEG); }

 	// SFINAE optionally: template<typename U = T, class = typename std::enable_if<withnan, U>::type>
 	static constexpr Posit nan() { return Posit(DeepInit(),PT::POSIT_NAN); }
 	static constexpr Posit infinity() { return Posit(DeepInit(),PT::POSIT_PINF); }
 	static constexpr Posit one() { return Posit(DeepInit(),PT::POSIT_ONE); }
 	static constexpr Posit two() { return Posit(DeepInit(),PT::POSIT_TWO); }
 	static constexpr Posit mone() { return Posit(DeepInit(),PT::POSIT_MONE); }
 	static constexpr Posit onehalf() { return Posit(DeepInit(),PT::POSIT_HALF); }

 	// custom operators
 	constexpr Posit operator-() const { return neg(); } 
 	constexpr Posit operator~() const { return inv(); } 
 	friend constexpr Posit operator-(const Posit & a, const Posit & b)  { return a + (-b); }
 	friend constexpr Posit operator/(const Posit & a, const Posit & b)  { return pack_posit< T,totalbits,esbits,FT,positspec> (a.unpack()/b.unpack()); }


   
 	/*	
 	void setBits(POSIT_UTYPE bits)
 	{
 		if(bits & POSIT_SIGNBIT)
 		{
 			v = ((~0) << POSIT_SIZE) | bits;
 		}
 		else
 		{
 			v = bits;
 		}
 	}
 	*/

 	// slowproduct

 	// slowsum

 	// exactvalue

 	// iostream

 	// sqrt

 	// exp

 	// conversion

 	// max
 	constexpr uint32_t as_float_bin() const { return unpack().template pack_xfloati<single_trait>(); }
 	constexpr operator UnpackedT() const { return unpack(); }
 #ifndef FPGAHLS
 	constexpr operator float() const { return unpack(); }
 	constexpr operator double() const { return unpack(); }
 #endif
 	constexpr operator int() const { return unpack(); }

 	/// 1/(exp(-x)+1)
 	/// TODO: infintity check + __round of result
 	constexpr Posit pseudosigmoid() const { return Posit(DeepInit(),(v ^ PT::POSIT_SIGNBIT) >> 2); };

 	/// ln(1+exp(x))
 	constexpr Posit pseudosoftplus() const { return Posit(DeepInit(),(v ^ PT::POSIT_SIGNBIT) >> 1); };

 	/// returns true if in [0,1]
 	constexpr bool isUnitRange() const { return v >= 0 && v <= PT::POSIT_ONE; };

 	/// unitary range 1-x
 	constexpr Posit urOneMinus() const { return Posit(DeepInit(),PT::POSIT_INVERTBIT-v); }

 	/// unitary range x(1-x)
 	constexpr Posit urDeltaPs() const { return (*this)*urOneMinus(); }

 	struct FullWriter
 	{
 		FullWriter(T x): ax(x) {}
 		T ax;
 	};	

 	FullWriter describe() const { return FullWriter(v); }

 #ifndef FPGAHLS
 	friend std::ostream & operator << (std::ostream &ons, const FullWriter & w)
 	{
 		return ons;
 	}
 #endif


 };

 #if 0
 //template <class T,int totalbits, int esbits, class FT, PositSpec positspec, class Trait>
 template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec>::Posit(int xvalue)
 {
 	using Trait=PT;
    using POSIT_UTYPE = typename PT::POSIT_UTYPE;
    using POSIT_STYPE = typename PT::POSIT_STYPE;
    using UT=Unpacked<FT,typename PT::exponenttype>;

    if(xvalue == 0)
    {
    	v = 0;
    }
    else
    {
 	    bool negativeSign = xvalue < 0;
 	    int value = xvalue < 0 ? -xvalue: xvalue;

 		auto exponentF = rawexp - Trait::exponent_bias; // ((un.u >> Trait::fraction_bits) & Trait::exponent_mask)
 	    auto fractionF = cast_right_to_left<typename Trait::holder_t,Trait::fraction_bits,FT,UT::FT_bits>()(value);

 		if(rawexp == ((1 << Trait::exponent_bits)-1))
 		{
 			if(fractionF == 0)
 			{
 				return PP(typename PP::DeepInit(),negativeSign ? PT::POSIT_NINF : PT::POSIT_PINF);
 			}
 			else
 			{
 				return PP(typename PP::DeepInit(),PT::POSIT_NAN);
 			}
 		}
 		else if (rawexp == 0)
 	    {
 	        if(fractionF == 0)
 	        {
 	            negativeSign = false;
 	            return PP::zero();
 	        }
 	        else
 	        {
 	            int k = findbitleftmostC(fractionF);

 	            exponentF -= k;
 	            fractionF <<= k+1; // plus normalization
 	        }	
 	   	}

 	    // Phase 3: compute low as regime (Unpacked_Low)

 		auto eexponent = clamp<decltype(exponentF)>(exponentF,PT::minexponent,PT::maxexponent); // no overflow
 		auto rr = PT::split_reg_exp(exponentF);
 		auto fraction = cast_msb<FT,sizeof(FT)*8,typename PT::POSIT_UTYPE,sizeof(typename PT::POSIT_UTYPE)*8>()(fractionF); 
 		auto reg = rr.first;
 		auto exp = rr.second;

 		// Phase 4: UnpackedLow to Posit

 	    auto rs = std::max(-reg + 1, reg + 2); 
 	    auto es = std::min((int)(totalbits-rs-1),(int)esbits);

 	    POSIT_UTYPE regbits = reg < 0 ? (PT::POSIT_HOLDER_MSB >> -reg) : (PT::POSIT_MASK << (PT::POSIT_HOLDER_SIZE-(reg+1))); // reg+1 bits on the left
 		POSIT_UTYPE eexp = msb_exp<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,esbits,(esbits == 00)>()(exp);
 		POSIT_STYPE p = ((fraction >> (rs+es+1)) | (eexp >> (rs+1)) | (regbits>>1)) >> (sizeof(PP)*8-totalbits);

 	    return PP(typename PP::DeepInit(),negativeSign ? -p : p);
 	}
 }
 #endif
 #ifndef FPGAHLS
 template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
 std::ostream & operator << (std::ostream & ons, Posit<T,totalbits,esbits,FT,positspec> const & o)
 {
 	ons << o.unpack();
 	return ons;
 }
 #endif


 /// Level 1: -exponent of unpacked
 /// Level 0: flip bits of rs
 template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::inv() const -> Posit
 {
 	auto u = unpack_low();
 	if(u.fraction == 0)
 	{
 		// 2^(R scale + E) with E unsigned
 		// becomes -R scale + (scale-E)
 		if(u.exp == 0)
 			u.regime = -u.regime;
 		else
 		{
 			u.regime = -(u.regime+1);
 			u.exp = PT::POSIT_REG_SCALE-u.exp; // NOP if esbits == 0
 		}
 		return pack_low(u);
 	}
 	else
 	{
 		return pack_posit< T,totalbits,esbits,FT,positspec> (unpacked_low2full(u).inv());
 	}
 }

 template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
 constexpr Posit<T,totalbits,esbits,FT,positspec> neg(Posit<T,totalbits,esbits,FT,positspec> x) { return -x; }

 template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
 constexpr Posit<T,totalbits,esbits,FT,positspec> inv(Posit<T,totalbits,esbits,FT,positspec> x) { return ~x; }

 template <class T, int hbits,int ebits, bool zeroes>
 struct msb_exp
 {

 };

 template <class T, int hbits,int ebits>
 struct msb_exp<T,hbits,ebits,true>
 {
 	 constexpr T operator()(T) const
 	{
 		return 0;
 	}
 };

 template <class T, int hbits,int ebits>
 struct msb_exp<T,hbits,ebits,false>
 {
 constexpr T operator()(T exp) const
 	{
 		return exp << (hbits-ebits);
 	}

 };

 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpack_low() const -> UnpackedLow
 {
    using PT=PositTrait<T,totalbits,esbits,positspec>;
    using POSIT_UTYPE = typename PT::POSIT_UTYPE;
    //using POSIT_STYPE = typename PT::POSIT_STYPE;

 	if(isinfinity()) // infinity
    {
    	return UnpackedLow(UnpackedT::Infinity, v < 0);
    }
    else if(isNaN())
   	{	
    	return UnpackedLow(UnpackedT::NaN);
   	}	
    else if(v == 0)
    	return UnpackedLow(UnpackedT::Zero);
 	else
 	{
        //constexpr int POSIT_RS_MAX = PT::POSIT_SIZE-1-esbits;

 		//r.type = UnpackedT::Regular;
 		bool negativeSign = (v & PT::POSIT_SIGNBIT) != 0;
 		//std::cout << "unpacking " << std::bitset<sizeof(T)*8>(pa) << " abs " << std::bitset<sizeof(T)*8>(pcabs(pa)) << " r.negativeSign? " << r.negativeSign << std::endl;
        T pa = negativeSign ? -v : v;
 	//	std::cout << "after " << std::hex << pa << std::endl;

        POSIT_UTYPE pars1 = pa << (PT::POSIT_EXTRA_BITS+1); // MSB: RS ES FS MSB
        auto q = PT::decode_posit_rs(pars1);
        int reg = q.first;
        int rs = q.second;
        POSIT_UTYPE pars2 = pars1 << rs; // MSB: ES FS
        POSIT_UTYPE exp = bitset_leftmost_get_const<T,esbits>()(pars2); //        bitset_leftmost_getT(pars,esbits);
        POSIT_UTYPE pars = pars2 << esbits; // MSB: FS left aligned in T

        return UnpackedLow(negativeSign,reg,exp,pars);
        //r.fraction = pars;
       //std::cout << "fraction is " << std::bitset<sizeof(FT)*8>(r.fraction) << " with rs bits " << rs << " for reg " << reg << std::endl;
        //r.exp = exp;
        //r.regime = reg;
 	}
 }

 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::pack_low(UnpackedLow x) -> Posit
 {
 	using PP=Posit<T,totalbits,esbits,FT,positspec>;
    using PT=typename Posit<T,totalbits,esbits,FT,positspec>::PT;
    using POSIT_UTYPE = typename PT::POSIT_UTYPE;
    using POSIT_STYPE = typename PT::POSIT_STYPE;

    switch(x.type)
 	{
 		case UnpackedT::Infinity:
 			// if infinity is missing return nan
 			return positspec != PositSpec::WithNan ? (x.negativeSign ? PP::ninf(): PP::pinf()): PP::nan();
 		case UnpackedT::Zero:
 			return PP(typename PP::DeepInit(),0);
 		case UnpackedT::NaN:
 			// if nan is missing return infinity
 			return positspec != PositSpec::WithInf ? PP::nan() : PP::pinf();
 		default:
 			break;
 	}

 	auto exp = x.exp;
 	auto reg = x.regime;

    // for reg>=0: 1 0[reg+1] => size is reg+2 
    // for reg <0: 0[-reg] 0  => size is reg+1
    auto rs = -reg+1 > reg+2 ? -reg+1:reg+2; //std::max(-reg + 1, reg + 2);  MSVC issue
    auto es = (totalbits-rs-1) < esbits ? (totalbits-rs-1): esbits; //std::min((int)(totalbits-rs-1),(int)esbits);  MSVC issue
    

    POSIT_UTYPE regbits = reg < 0 ? (PT::POSIT_HOLDER_MSB >> -reg) : (PT::POSIT_MASK << (PT::POSIT_HOLDER_SIZE-(reg+1))); // reg+1 bits on the left
 	POSIT_UTYPE eexp = msb_exp<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,esbits,(esbits == 00)>()(exp);
 	POSIT_UTYPE fraction =  x.fraction;
 	POSIT_STYPE p = ((fraction >> (rs+es+1)) | (eexp >> (rs+1)) | (regbits>>1)) >> (sizeof(T)*8-totalbits);
 	//std::cout << "incoming " << x << std::endl;
 	//std::cout << "fraction before " << std::bitset<sizeof(FT)*8>(x.fraction) << " and " << " after " << std::bitset<sizeof(POSIT_UTYPE)*8>(fraction) << " residual exponent " << exp << " from " << eexponent <<  " and regime " << reg << std::endl;
 	//std::cout << "output sign " << std::bitset<sizeof(T)*8>(p) << " then " << std::bitset<sizeof(T)*8>(-p) << std::endl; 
    return PP(typename PP::DeepInit(),x.negativeSign ? -p : p);
 }

 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::half() const -> Posit<T,totalbits,esbits,FT,positspec>
 {
 	UnpackedLow q = unpack_low();
 	if(q.type == UnpackedT::Regular)
 	{
 		// +- 2^(R expmax + E) 1.xyz  == +- 2^(exp) 1.xyz
 		// where xyz are decimal digits
 		// 1.xyz / 2     => 0.1xyz ==> just exp--
 		//
 		// exp-- mean E-- if E s not null
 		// otherwise R-- and exp 
 		if(q.exp == 0)
 		{
 			q.regime--; // will it undrflow?
 			q.exp = PT::POSIT_REG_SCALE-1; // maximum exponent
 		}
 		else
 		{
 			q.exp--;
 		}
 		return pack_low(q);
 	}
 	else
 	{
 		return *this;
 	}
 }

 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::twice() const -> Posit<T,totalbits,esbits,FT,positspec>
 {
 	UnpackedLow q = unpack_low();
 	if(q.type == UnpackedT::Regular)
 	{
 		// +- 2^(R expmax + E) 1.xyz  == +- 2^(exp) 1.xyz
 		// where xyz are decimal digits
 		// 1.xyz / 2     => 0.1xyz ==> just exp--
 		//
 		// exp-- mean E-- if E s not null
 		// otherwise R-- and exp 
 		if(q.exp == PT::POSIT_REG_SCALE-1)
 		{
 			q.regime++; // will it overflo??
 			q.exp = 0; // maximum exponent
 		}
 		else
 		{
 			q.exp++;
 		}
 		return pack_low(q);
 	}
 	else
 	{
 		return *this;
 	}
 }
 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpacked_low2full(UnpackedLow q) -> UnpackedT
 {
    using POSIT_UTYPE = typename PT::POSIT_UTYPE;
    UnpackedT r;
    r.type = q.type;
    r.negativeSign = q.negativeSign;

    if(q.type == UnpackedT::Regular)
    {
        r.fraction = cast_msb<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,FT,UnpackedT::FT_bits>()(q.fraction);
        r.exponent = PT::join_reg_exp(q.regime,q.exp);
    }
 	return r;
 }

 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpacked_full2low(UnpackedT x) -> UnpackedLow
 {
 	if(x.type == UnpackedT::Regular)
 	{
 		auto eexponent = clamp<decltype(x.exponent)>(x.exponent,PT::minexponent(),PT::maxexponent()); // no overflow
 		auto rr = PT::split_reg_exp(eexponent);
 		auto frac = cast_msb<FT,sizeof(FT)*8,typename PT::POSIT_UTYPE,sizeof(typename PT::POSIT_UTYPE)*8>()(x.fraction);
 		return UnpackedLow(x.negativeSign,rr.first,rr.second,frac);
 	}
 	else
 	{
 		return UnpackedLow(x.type,x.negativeSign);
 	}
 }


 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> pack_posit(const typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT & x)
 {
 	using PP=Posit<T,totalbits,esbits,FT,positspec>;
 	return PP::pack_low(PP::unpacked_full2low(x));
 }


 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 auto Posit<T,totalbits,esbits,FT,positspec>::analyze() -> info
 {
 	using UT=UnpackedT;
    using POSIT_UTYPE = typename PT::POSIT_UTYPE;
    //using POSIT_STYPE = typename PT::POSIT_STYPE;

    
    	auto pa = v;
 	info i;	
 	if(isinfinity())
    {
    	if(positspec == PositSpec::WithNanInf)
 			i.sign = (pa & PT::POSIT_SIGNBIT) != 0;
    	i.infinity = true;
    	return i;
    }
    else if(isNaN())
    {
    	i.nan = true;
    	return i;
    }
    else if(v == 0)
    {
    	return i;
    }
    else
    {
        //constexpr int POSIT_RS_MAX = PT::POSIT_SIZE-1-esbits;

 		i.sign = (pa & PT::POSIT_SIGNBIT) != 0;
        pa = pcabs(pa);
        POSIT_UTYPE pars = pa << (PT::POSIT_EXTRA_BITS+1); // output MSB: RS ES FS 
        auto q = PT::decode_posit_rs(pars);	
        int reg = q.first;
        int rs = q.second;
        pars <<= rs; // MSB: ES FS
        POSIT_UTYPE exp = bitset_leftmost_getT(pars,esbits);
        pars <<= esbits; // output MSB: FS left aligned in T
        //std::cout << std::bitset<PT::POSIT_HOLDER_SIZE>(pars) << std::endl;
        i.ifraction = sizeof(FT) >= sizeof(T) ? pars << (UT::FT_bits-PT::POSIT_HOLDER_SIZE) : pars >> (PT::POSIT_HOLDER_SIZE-UT::FT_bits); // output: FS left aligned in FT (larger or equal to T)
    	i.exponent = PT::join_reg_exp(reg,exp);
    	i.exp = exp;
    	i.rs = rs;
    	i.k = reg;
    	i.es = totalbits-rs-1 < esbits ? totalbits-rs-1 : esbits; // std::min((int)(totalbits-rs-1),(int)esbits); MSVC issue
    	i.fs = totalbits-rs-i.es-1; 
    	return i;
    }

 }



 template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 CONSTEXPR14 auto unpack_posit(const Posit<T,totalbits,esbits,FT,positspec> & p) -> typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT 
 {
 	using PP=Posit<T,totalbits,esbits,FT,positspec>;
 	return PP::unpacked_low2full(p.unpack_low());
 }

 #ifndef FPGAHLS
 template <class X>
 void printinfo(std::ostream & ons, typename X::value_t v)
 {
 	using Q= typename printableinttype<typename X::value_t>::type;
 	X x(typename X::DeepInit(),v); // load the posit OK
 	typename X::UnpackedT u(x.unpack()); // unpack it OK
 	X xux(u); // pack
    typename X::info ii = x.analyze();
    if(ii.infinity)
    	ons << (X::PT::positspec == PositSpec::WithNanInf ? (ii.sign ? "posit(-infinity)" : "posit(+infinity)") : "posit(infinity)");
    else if(ii.nan)
    	ons << "posit(nan)";
    else
    {
    	ons << " posit(" << (ii.sign ? "-" : "+") ;
    	ons << " rs/es/fs:" << std::dec <<  ii.rs << "/" << ii.es << "/" << ii.fs << " ";
    	ons << " k:" << std::dec << (Q)ii.k ;
    	ons << " exp:" << std::dec << (1<<ii.exp);
    	ons << " ifraction:" << std::hex << (Q)ii.ifraction;
        ons << " binary:" << std::bitset<sizeof(typename X::value_t)*8>(xux.v) << ")";
     }	
 }
 #endif

 #ifndef FPGAHLS

 template <class T>
 struct posit_formatter
 {
 public:
 	posit_formatter(T p): posit(p) {}

 	friend std::ostream & operator << (std::ostream & ons, const posit_formatter & x)
 	{
 		printinfo<T>(ons,x.posit.v);
 		return ons;
 	}

 	T posit;
 };
 #endif

 namespace std
 {
 	template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 	inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> abs(Posit<T,totalbits,esbits,FT,positspec> z) 
 	{
 		using PP=Posit<T,totalbits,esbits,FT,positspec>;
 		return PP(PP::DeepInit(),pcabs(z.v));
 	}

 	template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 	inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> min(Posit<T,totalbits,esbits,FT,positspec> a, Posit<T,totalbits,esbits,FT,positspec> b)
 	{
 		return a <=  b ? a : b;
 	}

 	template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 	inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> max(Posit<T,totalbits,esbits,FT,positspec> a, Posit<T,totalbits,esbits,FT,positspec> b)
 	{
 		return a >= b ? a : b;
 	}

 	template <class B,int totalbits, int esbits, class FT, PositSpec positspec> class numeric_limits<Posit<B,totalbits,esbits,FT,positspec> > {
 	public:
 	  using T=Posit<B,totalbits,esbits,FT,positspec>;
 	  using PT=typename T::PT;
 	  static constexpr bool is_specialized = true;
 	  static constexpr T min() noexcept { return T::min(); }
 	  static constexpr T max() noexcept { return T::max(); }
 	  static constexpr T lowest() noexcept { return T::lowest	(); }
 	  //static constexpr int  digits = 0; number of digits (in radix base) in the mantissa 
 	  //static constexpr int  digits10 = 0;
 	  static constexpr bool is_signed = true;
 	  static constexpr bool is_integer = false;
 	  static constexpr bool is_exact = false;
 	  static constexpr int radix = 2;
 	  static constexpr T epsilon() noexcept { return T::one().next()-T::one(); }
 	  //static constexpr T round_error() noexcept { return T(); } 

 	  // this is also the maximum integer
 	  static constexpr int  min_exponent = PT::minexponent();
 	  // static constexpr int  min_exponent10 = 0;
 	  static constexpr int  max_exponent = PT::maxexponent();
 	  //static constexpr int  max_exponent10 = 0;

 	  static constexpr bool has_infinity = true;
 	  static constexpr bool has_quiet_NaN = positspec != PositSpec::WithInf;
 	  static constexpr bool has_signaling_NaN = false;
 	  //static constexpr float_denorm_style has_denorm = denorm_absent;
 	  static constexpr bool has_denorm_loss = false;
 	  static constexpr T infinity() noexcept { return T::infinity(); }
 	  static constexpr T quiet_NaN() noexcept { return T::nan(); }
 	  //static constexpr T signaling_NaN() noexcept { return T(); }
 	  static constexpr T denorm_min() noexcept { return T::min(); }

 	  static constexpr bool is_iec559 = false;
 	  static constexpr bool is_bounded = false;
 	  static constexpr bool is_modulo = false;

 	  static constexpr bool traps = false;
 	  static constexpr bool tinyness_before = false;
 	  //static constexpr float_round_style round_style = round_toward_zero;
 	  /*
 	  round_toward_zero, if it rounds toward zero.
 round_to_nearest, if it rounds to the nearest representable value.
 round_toward_infinity, if it rounds toward infinity.
 round_toward_neg_infinity, if it rounds toward negative infinity.
 round_indeterminate, if the rounding style is indeterminable at compile time.
 */
 	};

 }