This commit is contained in:
iceman1001 2019-04-08 08:23:11 +02:00
parent 9328a8efbb
commit 075ccebfd5

View file

@ -49,43 +49,43 @@ static inline double decode_half(unsigned short half) {
/* software implementation of float-to-fp16 conversions */
static inline unsigned short encode_half(double val) {
uint64_t v;
int sign, exp, mant;
int sign, exp1, mant;
memcpy(&v, &val, sizeof(v));
sign = v >> 63 << 15;
exp = (v >> 52) & 0x7ff;
exp1 = (v >> 52) & 0x7ff;
mant = v << 12 >> 12 >> (53 - 11); /* keep only the 11 most significant bits of the mantissa */
exp -= 1023;
if (exp == 1024) {
exp1 -= 1023;
if (exp1 == 1024) {
/* infinity or NaN */
exp = 16;
exp1 = 16;
mant >>= 1;
} else if (exp >= 16) {
} else if (exp1 >= 16) {
/* overflow, as largest number */
exp = 15;
exp1 = 15;
mant = 1023;
} else if (exp >= -14) {
} else if (exp1 >= -14) {
/* regular normal */
} else if (exp >= -24) {
} else if (exp1 >= -24) {
/* subnormal */
mant |= 1024;
mant >>= -(exp + 14);
exp = -15;
mant >>= -(exp1 + 14);
exp1 = -15;
} else {
/* underflow, make zero */
return 0;
}
/* safe cast here as bit operations above guarantee not to overflow */
return (unsigned short)(sign | ((exp + 15) << 10) | mant);
return (unsigned short)(sign | ((exp1 + 15) << 10) | mant);
}
/* this function was copied & adapted from RFC 7049 Appendix D */
static inline double decode_half(unsigned short half) {
int exp = (half >> 10) & 0x1f;
int exp1 = (half >> 10) & 0x1f;
int mant = half & 0x3ff;
double val;
if (exp == 0) val = ldexp(mant, -24);
else if (exp != 31) val = ldexp(mant + 1024, exp - 25);
if (exp1 == 0) val = ldexp1(mant, -24);
else if (exp1 != 31) val = ldexp1(mant + 1024, exp1 - 25);
else val = mant == 0 ? INFINITY : NAN;
return (half & 0x8000) ? -val : val;
}