fpu: Update softfloat to QEMU 2.0.0
Change-Id: Ia59fc5ec65865b1bb86df33e7f2eeb659084eda4
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index dbda61b..e00a6fb 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -42,6 +42,9 @@
#include "fpu/softfloat.h"
+/* We only need stdlib for abort() */
+#include <stdlib.h>
+
/*----------------------------------------------------------------------------
| Primitive arithmetic functions, including multi-word arithmetic, and
| division and square root approximations. (Can be specialized to target if
@@ -59,21 +62,6 @@
*----------------------------------------------------------------------------*/
#include "softfloat-specialize.h"
-void set_float_rounding_mode(int val STATUS_PARAM)
-{
- STATUS(float_rounding_mode) = val;
-}
-
-void set_float_exception_flags(int val STATUS_PARAM)
-{
- STATUS(float_exception_flags) = val;
-}
-
-void set_floatx80_rounding_precision(int val STATUS_PARAM)
-{
- STATUS(floatx80_rounding_precision) = val;
-}
-
/*----------------------------------------------------------------------------
| Returns the fraction bits of the half-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
@@ -121,20 +109,22 @@
roundingMode = STATUS(float_rounding_mode);
roundNearestEven = ( roundingMode == float_round_nearest_even );
- roundIncrement = 0x40;
- if ( ! roundNearestEven ) {
- if ( roundingMode == float_round_to_zero ) {
- roundIncrement = 0;
- }
- else {
- roundIncrement = 0x7F;
- if ( zSign ) {
- if ( roundingMode == float_round_up ) roundIncrement = 0;
- }
- else {
- if ( roundingMode == float_round_down ) roundIncrement = 0;
- }
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ roundIncrement = 0x40;
+ break;
+ case float_round_to_zero:
+ roundIncrement = 0;
+ break;
+ case float_round_up:
+ roundIncrement = zSign ? 0 : 0x7f;
+ break;
+ case float_round_down:
+ roundIncrement = zSign ? 0x7f : 0;
+ break;
+ default:
+ abort();
}
roundBits = absZ & 0x7F;
absZ = ( absZ + roundIncrement )>>7;
@@ -170,19 +160,22 @@
roundingMode = STATUS(float_rounding_mode);
roundNearestEven = ( roundingMode == float_round_nearest_even );
- increment = ( (int64_t) absZ1 < 0 );
- if ( ! roundNearestEven ) {
- if ( roundingMode == float_round_to_zero ) {
- increment = 0;
- }
- else {
- if ( zSign ) {
- increment = ( roundingMode == float_round_down ) && absZ1;
- }
- else {
- increment = ( roundingMode == float_round_up ) && absZ1;
- }
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ increment = ((int64_t) absZ1 < 0);
+ break;
+ case float_round_to_zero:
+ increment = 0;
+ break;
+ case float_round_up:
+ increment = !zSign && absZ1;
+ break;
+ case float_round_down:
+ increment = zSign && absZ1;
+ break;
+ default:
+ abort();
}
if ( increment ) {
++absZ0;
@@ -204,6 +197,61 @@
}
/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit unsigned integer corresponding to the
+| input. Ordinarily, the fixed-point input is simply rounded to an integer,
+| with the inexact exception raised if the input cannot be represented exactly
+| as an integer. However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
+ uint64_t absZ1 STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = (roundingMode == float_round_nearest_even);
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ increment = ((int64_t)absZ1 < 0);
+ break;
+ case float_round_to_zero:
+ increment = 0;
+ break;
+ case float_round_up:
+ increment = !zSign && absZ1;
+ break;
+ case float_round_down:
+ increment = zSign && absZ1;
+ break;
+ default:
+ abort();
+ }
+ if (increment) {
+ ++absZ0;
+ if (absZ0 == 0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+ absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
+ }
+
+ if (zSign && absZ0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+
+ if (absZ1) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return absZ0;
+}
+
+/*----------------------------------------------------------------------------
| Returns the fraction bits of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
@@ -240,7 +288,7 @@
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
-static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
+float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
{
if (STATUS(flush_inputs_to_zero)) {
if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
@@ -319,20 +367,23 @@
roundingMode = STATUS(float_rounding_mode);
roundNearestEven = ( roundingMode == float_round_nearest_even );
- roundIncrement = 0x40;
- if ( ! roundNearestEven ) {
- if ( roundingMode == float_round_to_zero ) {
- roundIncrement = 0;
- }
- else {
- roundIncrement = 0x7F;
- if ( zSign ) {
- if ( roundingMode == float_round_up ) roundIncrement = 0;
- }
- else {
- if ( roundingMode == float_round_down ) roundIncrement = 0;
- }
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ roundIncrement = 0x40;
+ break;
+ case float_round_to_zero:
+ roundIncrement = 0;
+ break;
+ case float_round_up:
+ roundIncrement = zSign ? 0 : 0x7f;
+ break;
+ case float_round_down:
+ roundIncrement = zSign ? 0x7f : 0;
+ break;
+ default:
+ abort();
+ break;
}
roundBits = zSig & 0x7F;
if ( 0xFD <= (uint16_t) zExp ) {
@@ -422,7 +473,7 @@
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
-static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
+float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
{
if (STATUS(flush_inputs_to_zero)) {
if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
@@ -501,20 +552,22 @@
roundingMode = STATUS(float_rounding_mode);
roundNearestEven = ( roundingMode == float_round_nearest_even );
- roundIncrement = 0x200;
- if ( ! roundNearestEven ) {
- if ( roundingMode == float_round_to_zero ) {
- roundIncrement = 0;
- }
- else {
- roundIncrement = 0x3FF;
- if ( zSign ) {
- if ( roundingMode == float_round_up ) roundIncrement = 0;
- }
- else {
- if ( roundingMode == float_round_down ) roundIncrement = 0;
- }
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ roundIncrement = 0x200;
+ break;
+ case float_round_to_zero:
+ roundIncrement = 0;
+ break;
+ case float_round_up:
+ roundIncrement = zSign ? 0 : 0x3ff;
+ break;
+ case float_round_down:
+ roundIncrement = zSign ? 0x3ff : 0;
+ break;
+ default:
+ abort();
}
roundBits = zSig & 0x3FF;
if ( 0x7FD <= (uint16_t) zExp ) {
@@ -684,19 +737,21 @@
goto precision80;
}
zSig0 |= ( zSig1 != 0 );
- if ( ! roundNearestEven ) {
- if ( roundingMode == float_round_to_zero ) {
- roundIncrement = 0;
- }
- else {
- roundIncrement = roundMask;
- if ( zSign ) {
- if ( roundingMode == float_round_up ) roundIncrement = 0;
- }
- else {
- if ( roundingMode == float_round_down ) roundIncrement = 0;
- }
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ break;
+ case float_round_to_zero:
+ roundIncrement = 0;
+ break;
+ case float_round_up:
+ roundIncrement = zSign ? 0 : roundMask;
+ break;
+ case float_round_down:
+ roundIncrement = zSign ? roundMask : 0;
+ break;
+ default:
+ abort();
}
roundBits = zSig0 & roundMask;
if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
@@ -743,19 +798,22 @@
if ( zSig0 == 0 ) zExp = 0;
return packFloatx80( zSign, zExp, zSig0 );
precision80:
- increment = ( (int64_t) zSig1 < 0 );
- if ( ! roundNearestEven ) {
- if ( roundingMode == float_round_to_zero ) {
- increment = 0;
- }
- else {
- if ( zSign ) {
- increment = ( roundingMode == float_round_down ) && zSig1;
- }
- else {
- increment = ( roundingMode == float_round_up ) && zSig1;
- }
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ increment = ((int64_t)zSig1 < 0);
+ break;
+ case float_round_to_zero:
+ increment = 0;
+ break;
+ case float_round_up:
+ increment = !zSign && zSig1;
+ break;
+ case float_round_down:
+ increment = zSign && zSig1;
+ break;
+ default:
+ abort();
}
if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
if ( ( 0x7FFE < zExp )
@@ -785,16 +843,22 @@
zExp = 0;
if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
- if ( roundNearestEven ) {
- increment = ( (int64_t) zSig1 < 0 );
- }
- else {
- if ( zSign ) {
- increment = ( roundingMode == float_round_down ) && zSig1;
- }
- else {
- increment = ( roundingMode == float_round_up ) && zSig1;
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ increment = ((int64_t)zSig1 < 0);
+ break;
+ case float_round_to_zero:
+ increment = 0;
+ break;
+ case float_round_up:
+ increment = !zSign && zSig1;
+ break;
+ case float_round_down:
+ increment = zSign && zSig1;
+ break;
+ default:
+ abort();
}
if ( increment ) {
++zSig0;
@@ -994,19 +1058,22 @@
roundingMode = STATUS(float_rounding_mode);
roundNearestEven = ( roundingMode == float_round_nearest_even );
- increment = ( (int64_t) zSig2 < 0 );
- if ( ! roundNearestEven ) {
- if ( roundingMode == float_round_to_zero ) {
- increment = 0;
- }
- else {
- if ( zSign ) {
- increment = ( roundingMode == float_round_down ) && zSig2;
- }
- else {
- increment = ( roundingMode == float_round_up ) && zSig2;
- }
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ increment = ((int64_t)zSig2 < 0);
+ break;
+ case float_round_to_zero:
+ increment = 0;
+ break;
+ case float_round_up:
+ increment = !zSign && zSig2;
+ break;
+ case float_round_down:
+ increment = zSign && zSig2;
+ break;
+ default:
+ abort();
}
if ( 0x7FFD <= (uint32_t) zExp ) {
if ( ( 0x7FFD < zExp )
@@ -1054,16 +1121,22 @@
zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
zExp = 0;
if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
- if ( roundNearestEven ) {
- increment = ( (int64_t) zSig2 < 0 );
- }
- else {
- if ( zSign ) {
- increment = ( roundingMode == float_round_down ) && zSig2;
- }
- else {
- increment = ( roundingMode == float_round_up ) && zSig2;
- }
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ case float_round_ties_away:
+ increment = ((int64_t)zSig2 < 0);
+ break;
+ case float_round_to_zero:
+ increment = 0;
+ break;
+ case float_round_up:
+ increment = !zSign && zSig2;
+ break;
+ case float_round_down:
+ increment = zSign && zSig2;
+ break;
+ default:
+ abort();
}
}
}
@@ -1121,7 +1194,7 @@
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float32 int32_to_float32( int32 a STATUS_PARAM )
+float32 int32_to_float32(int32_t a STATUS_PARAM)
{
flag zSign;
@@ -1138,7 +1211,7 @@
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float64 int32_to_float64( int32 a STATUS_PARAM )
+float64 int32_to_float64(int32_t a STATUS_PARAM)
{
flag zSign;
uint32 absA;
@@ -1161,7 +1234,7 @@
| Arithmetic.
*----------------------------------------------------------------------------*/
-floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
+floatx80 int32_to_floatx80(int32_t a STATUS_PARAM)
{
flag zSign;
uint32 absA;
@@ -1183,7 +1256,7 @@
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float128 int32_to_float128( int32 a STATUS_PARAM )
+float128 int32_to_float128(int32_t a STATUS_PARAM)
{
flag zSign;
uint32 absA;
@@ -1205,7 +1278,7 @@
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float32 int64_to_float32( int64 a STATUS_PARAM )
+float32 int64_to_float32(int64_t a STATUS_PARAM)
{
flag zSign;
uint64 absA;
@@ -1231,7 +1304,7 @@
}
-float32 uint64_to_float32( uint64 a STATUS_PARAM )
+float32 uint64_to_float32(uint64_t a STATUS_PARAM)
{
int8 shiftCount;
@@ -1258,7 +1331,7 @@
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float64 int64_to_float64( int64 a STATUS_PARAM )
+float64 int64_to_float64(int64_t a STATUS_PARAM)
{
flag zSign;
@@ -1271,7 +1344,7 @@
}
-float64 uint64_to_float64(uint64 a STATUS_PARAM)
+float64 uint64_to_float64(uint64_t a STATUS_PARAM)
{
int exp = 0x43C;
@@ -1292,7 +1365,7 @@
| Arithmetic.
*----------------------------------------------------------------------------*/
-floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
+floatx80 int64_to_floatx80(int64_t a STATUS_PARAM)
{
flag zSign;
uint64 absA;
@@ -1312,7 +1385,7 @@
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float128 int64_to_float128( int64 a STATUS_PARAM )
+float128 int64_to_float128(int64_t a STATUS_PARAM)
{
flag zSign;
uint64 absA;
@@ -1339,7 +1412,7 @@
}
-float128 uint64_to_float128(uint64 a STATUS_PARAM)
+float128 uint64_to_float128(uint64_t a STATUS_PARAM)
{
if (a == 0) {
return float128_zero;
@@ -1509,6 +1582,72 @@
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit unsigned integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| unsigned integer is returned. Otherwise, if the conversion overflows, the
+| largest unsigned integer is returned. If the 'a' is negative, the result
+| is rounded and zero is returned; values that do not round to zero will
+| raise the inexact exception flag.
+*----------------------------------------------------------------------------*/
+
+uint64 float32_to_uint64(float32 a STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint32_t aSig;
+ uint64_t aSig64, aSigExtra;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac(a);
+ aExp = extractFloat32Exp(a);
+ aSign = extractFloat32Sign(a);
+ if ((aSign) && (aExp > 126)) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ if (float32_is_any_nan(a)) {
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ } else {
+ return 0;
+ }
+ }
+ shiftCount = 0xBE - aExp;
+ if (aExp) {
+ aSig |= 0x00800000;
+ }
+ if (shiftCount < 0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+
+ aSig64 = aSig;
+ aSig64 <<= 40;
+ shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
+ return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit unsigned integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero. If
+| `a' is a NaN, the largest unsigned integer is returned. Otherwise, if the
+| conversion overflows, the largest unsigned integer is returned. If the
+| 'a' is negative, the result is rounded and zero is returned; values that do
+| not round to zero will raise the inexact flag.
+*----------------------------------------------------------------------------*/
+
+uint64 float32_to_uint64_round_to_zero(float32 a STATUS_PARAM)
+{
+ signed char current_rounding_mode = STATUS(float_rounding_mode);
+ set_float_rounding_mode(float_round_to_zero STATUS_VAR);
+ int64_t v = float32_to_uint64(a STATUS_VAR);
+ set_float_rounding_mode(current_rounding_mode STATUS_VAR);
+ return v;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
| `a' to the 64-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic, except that the conversion is always rounded toward zero. If
@@ -1656,7 +1795,6 @@
flag aSign;
int_fast16_t aExp;
uint32_t lastBitMask, roundBitsMask;
- int8 roundingMode;
uint32_t z;
a = float32_squash_input_denormal(a STATUS_VAR);
@@ -1677,6 +1815,11 @@
return packFloat32( aSign, 0x7F, 0 );
}
break;
+ case float_round_ties_away:
+ if (aExp == 0x7E) {
+ return packFloat32(aSign, 0x7F, 0);
+ }
+ break;
case float_round_down:
return make_float32(aSign ? 0xBF800000 : 0);
case float_round_up:
@@ -1688,15 +1831,30 @@
lastBitMask <<= 0x96 - aExp;
roundBitsMask = lastBitMask - 1;
z = float32_val(a);
- roundingMode = STATUS(float_rounding_mode);
- if ( roundingMode == float_round_nearest_even ) {
+ switch (STATUS(float_rounding_mode)) {
+ case float_round_nearest_even:
z += lastBitMask>>1;
- if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
- }
- else if ( roundingMode != float_round_to_zero ) {
- if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
+ if ((z & roundBitsMask) == 0) {
+ z &= ~lastBitMask;
+ }
+ break;
+ case float_round_ties_away:
+ z += lastBitMask >> 1;
+ break;
+ case float_round_to_zero:
+ break;
+ case float_round_up:
+ if (!extractFloat32Sign(make_float32(z))) {
z += roundBitsMask;
}
+ break;
+ case float_round_down:
+ if (extractFloat32Sign(make_float32(z))) {
+ z += roundBitsMask;
+ }
+ break;
+ default:
+ abort();
}
z &= ~ roundBitsMask;
if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
@@ -2234,6 +2392,17 @@
}
}
/* Zero plus something non-zero : just return the something */
+ if (flags & float_muladd_halve_result) {
+ if (cExp == 0) {
+ normalizeFloat32Subnormal(cSig, &cExp, &cSig);
+ }
+ /* Subtract one to halve, and one again because roundAndPackFloat32
+ * wants one less than the true exponent.
+ */
+ cExp -= 2;
+ cSig = (cSig | 0x00800000) << 7;
+ return roundAndPackFloat32(cSign ^ signflip, cExp, cSig STATUS_VAR);
+ }
return packFloat32(cSign ^ signflip, cExp, cSig);
}
@@ -2270,6 +2439,9 @@
/* Throw out the special case of c being an exact zero now */
shift64RightJamming(pSig64, 32, &pSig64);
pSig = pSig64;
+ if (flags & float_muladd_halve_result) {
+ pExp--;
+ }
return roundAndPackFloat32(zSign, pExp - 1,
pSig STATUS_VAR);
}
@@ -2334,6 +2506,10 @@
zSig64 <<= shiftcount;
zExp -= shiftcount;
}
+ if (flags & float_muladd_halve_result) {
+ zExp--;
+ }
+
shift64RightJamming(zSig64, 32, &zSig64);
return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
}
@@ -3005,6 +3181,128 @@
(((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
}
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper half-precision floating-
+| point value corresponding to the abstract input. Ordinarily, the abstract
+| value is simply rounded and packed into the half-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal half-
+| precision floating-point number.
+| The `ieee' flag indicates whether to use IEEE standard half precision, or
+| ARM-style "alternative representation", which omits the NaN and Inf
+| encodings in order to raise the maximum representable exponent by one.
+| The input significand `zSig' has its binary point between bits 22
+| and 23, which is 13 bits to the left of the usual location. This shifted
+| significand must be normalized or smaller. If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding. In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| Note the slightly odd position of the binary point in zSig compared with the
+| other roundAndPackFloat functions. This should probably be fixed if we
+| need to implement more float16 routines than just conversion.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp,
+ uint32_t zSig, flag ieee STATUS_PARAM)
+{
+ int maxexp = ieee ? 29 : 30;
+ uint32_t mask;
+ uint32_t increment;
+ bool rounding_bumps_exp;
+ bool is_tiny = false;
+
+ /* Calculate the mask of bits of the mantissa which are not
+ * representable in half-precision and will be lost.
+ */
+ if (zExp < 1) {
+ /* Will be denormal in halfprec */
+ mask = 0x00ffffff;
+ if (zExp >= -11) {
+ mask >>= 11 + zExp;
+ }
+ } else {
+ /* Normal number in halfprec */
+ mask = 0x00001fff;
+ }
+
+ switch (STATUS(float_rounding_mode)) {
+ case float_round_nearest_even:
+ increment = (mask + 1) >> 1;
+ if ((zSig & mask) == increment) {
+ increment = zSig & (increment << 1);
+ }
+ break;
+ case float_round_ties_away:
+ increment = (mask + 1) >> 1;
+ break;
+ case float_round_up:
+ increment = zSign ? 0 : mask;
+ break;
+ case float_round_down:
+ increment = zSign ? mask : 0;
+ break;
+ default: /* round_to_zero */
+ increment = 0;
+ break;
+ }
+
+ rounding_bumps_exp = (zSig + increment >= 0x01000000);
+
+ if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
+ if (ieee) {
+ float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
+ return packFloat16(zSign, 0x1f, 0);
+ } else {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return packFloat16(zSign, 0x1f, 0x3ff);
+ }
+ }
+
+ if (zExp < 0) {
+ /* Note that flush-to-zero does not affect half-precision results */
+ is_tiny =
+ (STATUS(float_detect_tininess) == float_tininess_before_rounding)
+ || (zExp < -1)
+ || (!rounding_bumps_exp);
+ }
+ if (zSig & mask) {
+ float_raise(float_flag_inexact STATUS_VAR);
+ if (is_tiny) {
+ float_raise(float_flag_underflow STATUS_VAR);
+ }
+ }
+
+ zSig += increment;
+ if (rounding_bumps_exp) {
+ zSig >>= 1;
+ zExp++;
+ }
+
+ if (zExp < -10) {
+ return packFloat16(zSign, 0, 0);
+ }
+ if (zExp < 0) {
+ zSig >>= -zExp;
+ zExp = 0;
+ }
+ return packFloat16(zSign, zExp, zSig >> 13);
+}
+
+static void normalizeFloat16Subnormal(uint32_t aSig, int_fast16_t *zExpPtr,
+ uint32_t *zSigPtr)
+{
+ int8_t shiftCount = countLeadingZeros32(aSig) - 21;
+ *zSigPtr = aSig << shiftCount;
+ *zExpPtr = 1 - shiftCount;
+}
+
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
The latter gains extra exponent range by omitting the NaN/Inf encodings. */
@@ -3025,15 +3323,12 @@
return packFloat32(aSign, 0xff, 0);
}
if (aExp == 0) {
- int8 shiftCount;
-
if (aSig == 0) {
return packFloat32(aSign, 0, 0);
}
- shiftCount = countLeadingZeros32( aSig ) - 21;
- aSig = aSig << shiftCount;
- aExp = -shiftCount;
+ normalizeFloat16Subnormal(aSig, &aExp, &aSig);
+ aExp--;
}
return packFloat32( aSign, aExp + 0x70, aSig << 13);
}
@@ -3043,9 +3338,7 @@
flag aSign;
int_fast16_t aExp;
uint32_t aSig;
- uint32_t mask;
- uint32_t increment;
- int8 roundingMode;
+
a = float32_squash_input_denormal(a STATUS_VAR);
aSig = extractFloat32Frac( a );
@@ -3054,11 +3347,12 @@
if ( aExp == 0xFF ) {
if (aSig) {
/* Input is a NaN */
- float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
if (!ieee) {
+ float_raise(float_flag_invalid STATUS_VAR);
return packFloat16(aSign, 0, 0);
}
- return r;
+ return commonNaNToFloat16(
+ float32ToCommonNaN(a STATUS_VAR) STATUS_VAR);
}
/* Infinity */
if (!ieee) {
@@ -3070,66 +3364,92 @@
if (aExp == 0 && aSig == 0) {
return packFloat16(aSign, 0, 0);
}
- /* Decimal point between bits 22 and 23. */
+ /* Decimal point between bits 22 and 23. Note that we add the 1 bit
+ * even if the input is denormal; however this is harmless because
+ * the largest possible single-precision denormal is still smaller
+ * than the smallest representable half-precision denormal, and so we
+ * will end up ignoring aSig and returning via the "always return zero"
+ * codepath.
+ */
aSig |= 0x00800000;
- aExp -= 0x7f;
- if (aExp < -14) {
- mask = 0x00ffffff;
- if (aExp >= -24) {
- mask >>= 25 + aExp;
- }
- } else {
- mask = 0x00001fff;
- }
- if (aSig & mask) {
- float_raise( float_flag_underflow STATUS_VAR );
- roundingMode = STATUS(float_rounding_mode);
- switch (roundingMode) {
- case float_round_nearest_even:
- increment = (mask + 1) >> 1;
- if ((aSig & mask) == increment) {
- increment = aSig & (increment << 1);
- }
- break;
- case float_round_up:
- increment = aSign ? 0 : mask;
- break;
- case float_round_down:
- increment = aSign ? mask : 0;
- break;
- default: /* round_to_zero */
- increment = 0;
- break;
- }
- aSig += increment;
- if (aSig >= 0x01000000) {
- aSig >>= 1;
- aExp++;
- }
- } else if (aExp < -14
- && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
- float_raise( float_flag_underflow STATUS_VAR);
- }
+ aExp -= 0x71;
- if (ieee) {
- if (aExp > 15) {
- float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
- return packFloat16(aSign, 0x1f, 0);
+ return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR);
+}
+
+float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint32_t aSig;
+
+ aSign = extractFloat16Sign(a);
+ aExp = extractFloat16Exp(a);
+ aSig = extractFloat16Frac(a);
+
+ if (aExp == 0x1f && ieee) {
+ if (aSig) {
+ return commonNaNToFloat64(
+ float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
}
- } else {
- if (aExp > 16) {
- float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
+ return packFloat64(aSign, 0x7ff, 0);
+ }
+ if (aExp == 0) {
+ if (aSig == 0) {
+ return packFloat64(aSign, 0, 0);
+ }
+
+ normalizeFloat16Subnormal(aSig, &aExp, &aSig);
+ aExp--;
+ }
+ return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
+}
+
+float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint64_t aSig;
+ uint32_t zSig;
+
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat64Frac(a);
+ aExp = extractFloat64Exp(a);
+ aSign = extractFloat64Sign(a);
+ if (aExp == 0x7FF) {
+ if (aSig) {
+ /* Input is a NaN */
+ if (!ieee) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return packFloat16(aSign, 0, 0);
+ }
+ return commonNaNToFloat16(
+ float64ToCommonNaN(a STATUS_VAR) STATUS_VAR);
+ }
+ /* Infinity */
+ if (!ieee) {
+ float_raise(float_flag_invalid STATUS_VAR);
return packFloat16(aSign, 0x1f, 0x3ff);
}
+ return packFloat16(aSign, 0x1f, 0);
}
- if (aExp < -24) {
+ shift64RightJamming(aSig, 29, &aSig);
+ zSig = aSig;
+ if (aExp == 0 && zSig == 0) {
return packFloat16(aSign, 0, 0);
}
- if (aExp < -14) {
- aSig >>= -14 - aExp;
- aExp = -14;
- }
- return packFloat16(aSign, aExp + 14, aSig >> 13);
+ /* Decimal point between bits 22 and 23. Note that we add the 1 bit
+ * even if the input is denormal; however this is harmless because
+ * the largest possible single-precision denormal is still smaller
+ * than the smallest representable half-precision denormal, and so we
+ * will end up ignoring aSig and returning via the "always return zero"
+ * codepath.
+ */
+ zSig |= 0x00800000;
+ aExp -= 0x3F1;
+
+ return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR);
}
/*----------------------------------------------------------------------------
@@ -3206,7 +3526,6 @@
flag aSign;
int_fast16_t aExp;
uint64_t lastBitMask, roundBitsMask;
- int8 roundingMode;
uint64_t z;
a = float64_squash_input_denormal(a STATUS_VAR);
@@ -3227,6 +3546,11 @@
return packFloat64( aSign, 0x3FF, 0 );
}
break;
+ case float_round_ties_away:
+ if (aExp == 0x3FE) {
+ return packFloat64(aSign, 0x3ff, 0);
+ }
+ break;
case float_round_down:
return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
case float_round_up:
@@ -3239,15 +3563,30 @@
lastBitMask <<= 0x433 - aExp;
roundBitsMask = lastBitMask - 1;
z = float64_val(a);
- roundingMode = STATUS(float_rounding_mode);
- if ( roundingMode == float_round_nearest_even ) {
- z += lastBitMask>>1;
- if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
- }
- else if ( roundingMode != float_round_to_zero ) {
- if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
+ switch (STATUS(float_rounding_mode)) {
+ case float_round_nearest_even:
+ z += lastBitMask >> 1;
+ if ((z & roundBitsMask) == 0) {
+ z &= ~lastBitMask;
+ }
+ break;
+ case float_round_ties_away:
+ z += lastBitMask >> 1;
+ break;
+ case float_round_to_zero:
+ break;
+ case float_round_up:
+ if (!extractFloat64Sign(make_float64(z))) {
z += roundBitsMask;
}
+ break;
+ case float_round_down:
+ if (extractFloat64Sign(make_float64(z))) {
+ z += roundBitsMask;
+ }
+ break;
+ default:
+ abort();
}
z &= ~ roundBitsMask;
if ( z != float64_val(a) )
@@ -3787,6 +4126,17 @@
}
}
/* Zero plus something non-zero : just return the something */
+ if (flags & float_muladd_halve_result) {
+ if (cExp == 0) {
+ normalizeFloat64Subnormal(cSig, &cExp, &cSig);
+ }
+ /* Subtract one to halve, and one again because roundAndPackFloat64
+ * wants one less than the true exponent.
+ */
+ cExp -= 2;
+ cSig = (cSig | 0x0010000000000000ULL) << 10;
+ return roundAndPackFloat64(cSign ^ signflip, cExp, cSig STATUS_VAR);
+ }
return packFloat64(cSign ^ signflip, cExp, cSig);
}
@@ -3822,6 +4172,9 @@
if (!cSig) {
/* Throw out the special case of c being an exact zero now */
shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
+ if (flags & float_muladd_halve_result) {
+ pExp--;
+ }
return roundAndPackFloat64(zSign, pExp - 1,
pSig1 STATUS_VAR);
}
@@ -3858,6 +4211,9 @@
zExp--;
}
shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
+ if (flags & float_muladd_halve_result) {
+ zExp--;
+ }
return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
} else {
/* Subtraction */
@@ -3908,6 +4264,9 @@
zExp -= (shiftcount + 64);
}
}
+ if (flags & float_muladd_halve_result) {
+ zExp--;
+ }
return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
}
}
@@ -4475,7 +4834,6 @@
flag aSign;
int32 aExp;
uint64_t lastBitMask, roundBitsMask;
- int8 roundingMode;
floatx80 z;
aExp = extractFloatx80Exp( a );
@@ -4500,6 +4858,11 @@
packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
}
break;
+ case float_round_ties_away:
+ if (aExp == 0x3FFE) {
+ return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
+ }
+ break;
case float_round_down:
return
aSign ?
@@ -4516,15 +4879,30 @@
lastBitMask <<= 0x403E - aExp;
roundBitsMask = lastBitMask - 1;
z = a;
- roundingMode = STATUS(float_rounding_mode);
- if ( roundingMode == float_round_nearest_even ) {
+ switch (STATUS(float_rounding_mode)) {
+ case float_round_nearest_even:
z.low += lastBitMask>>1;
- if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
- }
- else if ( roundingMode != float_round_to_zero ) {
- if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+ if ((z.low & roundBitsMask) == 0) {
+ z.low &= ~lastBitMask;
+ }
+ break;
+ case float_round_ties_away:
+ z.low += lastBitMask >> 1;
+ break;
+ case float_round_to_zero:
+ break;
+ case float_round_up:
+ if (!extractFloatx80Sign(z)) {
z.low += roundBitsMask;
}
+ break;
+ case float_round_down:
+ if (extractFloatx80Sign(z)) {
+ z.low += roundBitsMask;
+ }
+ break;
+ default:
+ abort();
}
z.low &= ~ roundBitsMask;
if ( z.low == 0 ) {
@@ -5550,7 +5928,6 @@
flag aSign;
int32 aExp;
uint64_t lastBitMask, roundBitsMask;
- int8 roundingMode;
float128 z;
aExp = extractFloat128Exp( a );
@@ -5567,8 +5944,8 @@
lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
roundBitsMask = lastBitMask - 1;
z = a;
- roundingMode = STATUS(float_rounding_mode);
- if ( roundingMode == float_round_nearest_even ) {
+ switch (STATUS(float_rounding_mode)) {
+ case float_round_nearest_even:
if ( lastBitMask ) {
add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
@@ -5579,12 +5956,30 @@
if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
}
}
- }
- else if ( roundingMode != float_round_to_zero ) {
- if ( extractFloat128Sign( z )
- ^ ( roundingMode == float_round_up ) ) {
- add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+ break;
+ case float_round_ties_away:
+ if (lastBitMask) {
+ add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
+ } else {
+ if ((int64_t) z.low < 0) {
+ ++z.high;
+ }
}
+ break;
+ case float_round_to_zero:
+ break;
+ case float_round_up:
+ if (!extractFloat128Sign(z)) {
+ add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
+ }
+ break;
+ case float_round_down:
+ if (extractFloat128Sign(z)) {
+ add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
+ }
+ break;
+ default:
+ abort();
}
z.low &= ~ roundBitsMask;
}
@@ -5602,6 +5997,11 @@
return packFloat128( aSign, 0x3FFF, 0, 0 );
}
break;
+ case float_round_ties_away:
+ if (aExp == 0x3FFE) {
+ return packFloat128(aSign, 0x3FFF, 0, 0);
+ }
+ break;
case float_round_down:
return
aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
@@ -5618,19 +6018,32 @@
roundBitsMask = lastBitMask - 1;
z.low = 0;
z.high = a.high;
- roundingMode = STATUS(float_rounding_mode);
- if ( roundingMode == float_round_nearest_even ) {
+ switch (STATUS(float_rounding_mode)) {
+ case float_round_nearest_even:
z.high += lastBitMask>>1;
if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
z.high &= ~ lastBitMask;
}
- }
- else if ( roundingMode != float_round_to_zero ) {
- if ( extractFloat128Sign( z )
- ^ ( roundingMode == float_round_up ) ) {
+ break;
+ case float_round_ties_away:
+ z.high += lastBitMask>>1;
+ break;
+ case float_round_to_zero:
+ break;
+ case float_round_up:
+ if (!extractFloat128Sign(z)) {
z.high |= ( a.low != 0 );
z.high += roundBitsMask;
}
+ break;
+ case float_round_down:
+ if (extractFloat128Sign(z)) {
+ z.high |= (a.low != 0);
+ z.high += roundBitsMask;
+ }
+ break;
+ default:
+ abort();
}
z.high &= ~ roundBitsMask;
}
@@ -6418,12 +6831,12 @@
}
/* misc functions */
-float32 uint32_to_float32( uint32 a STATUS_PARAM )
+float32 uint32_to_float32(uint32_t a STATUS_PARAM)
{
return int64_to_float32(a STATUS_VAR);
}
-float64 uint32_to_float64( uint32 a STATUS_PARAM )
+float64 uint32_to_float64(uint32_t a STATUS_PARAM)
{
return int64_to_float64(a STATUS_VAR);
}
@@ -6432,17 +6845,18 @@
{
int64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
v = float32_to_int64(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
} else if (v > 0xffffffff) {
res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
@@ -6450,17 +6864,58 @@
{
int64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
v = float32_to_int64_round_to_zero(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
} else if (v > 0xffffffff) {
res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
+ return res;
+}
+
+int_fast16_t float32_to_int16(float32 a STATUS_PARAM)
+{
+ int32_t v;
+ int_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
+
+ v = float32_to_int32(a STATUS_VAR);
+ if (v < -0x8000) {
+ res = -0x8000;
+ } else if (v > 0x7fff) {
+ res = 0x7fff;
+ } else {
+ return v;
+ }
+
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
+ return res;
+}
+
+uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM)
+{
+ int32_t v;
+ uint_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
+
+ v = float32_to_int32(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ } else if (v > 0xffff) {
+ res = 0xffff;
+ } else {
+ return v;
+ }
+
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
@@ -6468,53 +6923,92 @@
{
int64_t v;
uint_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
v = float32_to_int64_round_to_zero(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
} else if (v > 0xffff) {
res = 0xffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
uint32 float64_to_uint32( float64 a STATUS_PARAM )
{
- int64_t v;
+ uint64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
- v = float64_to_int64(a STATUS_VAR);
- if (v < 0) {
- res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
- } else if (v > 0xffffffff) {
+ v = float64_to_uint64(a STATUS_VAR);
+ if (v > 0xffffffff) {
res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
{
- int64_t v;
+ uint64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
- v = float64_to_int64_round_to_zero(a STATUS_VAR);
+ v = float64_to_uint64_round_to_zero(a STATUS_VAR);
+ if (v > 0xffffffff) {
+ res = 0xffffffff;
+ } else {
+ return v;
+ }
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
+ return res;
+}
+
+int_fast16_t float64_to_int16(float64 a STATUS_PARAM)
+{
+ int64_t v;
+ int_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
+
+ v = float64_to_int32(a STATUS_VAR);
+ if (v < -0x8000) {
+ res = -0x8000;
+ } else if (v > 0x7fff) {
+ res = 0x7fff;
+ } else {
+ return v;
+ }
+
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
+ return res;
+}
+
+uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM)
+{
+ int64_t v;
+ uint_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
+
+ v = float64_to_int32(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
- } else if (v > 0xffffffff) {
- res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffff) {
+ res = 0xffff;
} else {
- res = v;
+ return v;
}
+
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
@@ -6522,41 +7016,75 @@
{
int64_t v;
uint_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
v = float64_to_int64_round_to_zero(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
} else if (v > 0xffff) {
res = 0xffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
-/* FIXME: This looks broken. */
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit unsigned integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. If the conversion overflows, the
+| largest unsigned integer is returned. If 'a' is negative, the value is
+| rounded and zero is returned; negative values that do not round to zero
+| will raise the inexact exception.
+*----------------------------------------------------------------------------*/
+
+uint64_t float64_to_uint64(float64 a STATUS_PARAM)
{
- int64_t v;
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint64_t aSig, aSigExtra;
+ a = float64_squash_input_denormal(a STATUS_VAR);
- v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
- v += float64_val(a);
- v = float64_to_int64(make_float64(v) STATUS_VAR);
-
- return v - INT64_MIN;
+ aSig = extractFloat64Frac(a);
+ aExp = extractFloat64Exp(a);
+ aSign = extractFloat64Sign(a);
+ if (aSign && (aExp > 1022)) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ if (float64_is_any_nan(a)) {
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ } else {
+ return 0;
+ }
+ }
+ if (aExp) {
+ aSig |= LIT64(0x0010000000000000);
+ }
+ shiftCount = 0x433 - aExp;
+ if (shiftCount <= 0) {
+ if (0x43E < aExp) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+ aSigExtra = 0;
+ aSig <<= -shiftCount;
+ } else {
+ shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+ }
+ return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
}
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
{
- int64_t v;
-
- v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
- v += float64_val(a);
- v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
-
- return v - INT64_MIN;
+ signed char current_rounding_mode = STATUS(float_rounding_mode);
+ set_float_rounding_mode(float_round_to_zero STATUS_VAR);
+ int64_t v = float64_to_uint64(a STATUS_VAR);
+ set_float_rounding_mode(current_rounding_mode STATUS_VAR);
+ return v;
}
#define COMPARE(s, nan_exp) \
@@ -6795,10 +7323,13 @@
}
return a;
}
- if ( aExp != 0 )
+ if (aExp != 0) {
aSig |= 0x00800000;
- else if ( aSig == 0 )
+ } else if (aSig == 0) {
return a;
+ } else {
+ aExp++;
+ }
if (n > 0x200) {
n = 0x200;
@@ -6828,10 +7359,13 @@
}
return a;
}
- if ( aExp != 0 )
+ if (aExp != 0) {
aSig |= LIT64( 0x0010000000000000 );
- else if ( aSig == 0 )
+ } else if (aSig == 0) {
return a;
+ } else {
+ aExp++;
+ }
if (n > 0x1000) {
n = 0x1000;
@@ -6861,8 +7395,12 @@
return a;
}
- if (aExp == 0 && aSig == 0)
- return a;
+ if (aExp == 0) {
+ if (aSig == 0) {
+ return a;
+ }
+ aExp++;
+ }
if (n > 0x10000) {
n = 0x10000;
@@ -6891,10 +7429,13 @@
}
return a;
}
- if ( aExp != 0 )
+ if (aExp != 0) {
aSig0 |= LIT64( 0x0001000000000000 );
- else if ( aSig0 == 0 && aSig1 == 0 )
+ } else if (aSig0 == 0 && aSig1 == 0) {
return a;
+ } else {
+ aExp++;
+ }
if (n > 0x10000) {
n = 0x10000;
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 2365274..4b3090c 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -152,7 +152,8 @@
float_round_nearest_even = 0,
float_round_down = 1,
float_round_up = 2,
- float_round_to_zero = 3
+ float_round_to_zero = 3,
+ float_round_ties_away = 4,
};
/*----------------------------------------------------------------------------
@@ -180,12 +181,22 @@
flag default_nan_mode;
} float_status;
-void set_float_rounding_mode(int val STATUS_PARAM);
-void set_float_exception_flags(int val STATUS_PARAM);
INLINE void set_float_detect_tininess(int val STATUS_PARAM)
{
STATUS(float_detect_tininess) = val;
}
+INLINE void set_float_rounding_mode(int val STATUS_PARAM)
+{
+ STATUS(float_rounding_mode) = val;
+}
+INLINE void set_float_exception_flags(int val STATUS_PARAM)
+{
+ STATUS(float_exception_flags) = val;
+}
+INLINE void set_floatx80_rounding_precision(int val STATUS_PARAM)
+{
+ STATUS(floatx80_rounding_precision) = val;
+}
INLINE void set_flush_to_zero(flag val STATUS_PARAM)
{
STATUS(flush_to_zero) = val;
@@ -198,11 +209,34 @@
{
STATUS(default_nan_mode) = val;
}
+INLINE int get_float_detect_tininess(float_status *status)
+{
+ return STATUS(float_detect_tininess);
+}
+INLINE int get_float_rounding_mode(float_status *status)
+{
+ return STATUS(float_rounding_mode);
+}
INLINE int get_float_exception_flags(float_status *status)
{
return STATUS(float_exception_flags);
}
-void set_floatx80_rounding_precision(int val STATUS_PARAM);
+INLINE int get_floatx80_rounding_precision(float_status *status)
+{
+ return STATUS(floatx80_rounding_precision);
+}
+INLINE flag get_flush_to_zero(float_status *status)
+{
+ return STATUS(flush_to_zero);
+}
+INLINE flag get_flush_inputs_to_zero(float_status *status)
+{
+ return STATUS(flush_inputs_to_zero);
+}
+INLINE flag get_default_nan_mode(float_status *status)
+{
+ return STATUS(default_nan_mode);
+}
/*----------------------------------------------------------------------------
| Routine to raise any or all of the software IEC/IEEE floating-point
@@ -211,39 +245,72 @@
void float_raise( int8 flags STATUS_PARAM);
/*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+float32 float32_squash_input_denormal(float32 a STATUS_PARAM);
+float64 float64_squash_input_denormal(float64 a STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
| Options to indicate which negations to perform in float*_muladd()
| Using these differs from negating an input or output before calling
| the muladd function in that this means that a NaN doesn't have its
| sign bit inverted before it is propagated.
+| We also support halving the result before rounding, as a special
+| case to support the ARM fused-sqrt-step instruction FRSQRTS.
*----------------------------------------------------------------------------*/
enum {
float_muladd_negate_c = 1,
float_muladd_negate_product = 2,
float_muladd_negate_result = 4,
+ float_muladd_halve_result = 8,
};
/*----------------------------------------------------------------------------
| Software IEC/IEEE integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
-float32 int32_to_float32( int32 STATUS_PARAM );
-float64 int32_to_float64( int32 STATUS_PARAM );
-float32 uint32_to_float32( uint32 STATUS_PARAM );
-float64 uint32_to_float64( uint32 STATUS_PARAM );
-floatx80 int32_to_floatx80( int32 STATUS_PARAM );
-float128 int32_to_float128( int32 STATUS_PARAM );
-float32 int64_to_float32( int64 STATUS_PARAM );
-float32 uint64_to_float32( uint64 STATUS_PARAM );
-float64 int64_to_float64( int64 STATUS_PARAM );
-float64 uint64_to_float64( uint64 STATUS_PARAM );
-floatx80 int64_to_floatx80( int64 STATUS_PARAM );
-float128 int64_to_float128( int64 STATUS_PARAM );
-float128 uint64_to_float128( uint64 STATUS_PARAM );
+float32 int32_to_float32(int32_t STATUS_PARAM);
+float64 int32_to_float64(int32_t STATUS_PARAM);
+float32 uint32_to_float32(uint32_t STATUS_PARAM);
+float64 uint32_to_float64(uint32_t STATUS_PARAM);
+floatx80 int32_to_floatx80(int32_t STATUS_PARAM);
+float128 int32_to_float128(int32_t STATUS_PARAM);
+float32 int64_to_float32(int64_t STATUS_PARAM);
+float32 uint64_to_float32(uint64_t STATUS_PARAM);
+float64 int64_to_float64(int64_t STATUS_PARAM);
+float64 uint64_to_float64(uint64_t STATUS_PARAM);
+floatx80 int64_to_floatx80(int64_t STATUS_PARAM);
+float128 int64_to_float128(int64_t STATUS_PARAM);
+float128 uint64_to_float128(uint64_t STATUS_PARAM);
+
+/* We provide the int16 versions for symmetry of API with float-to-int */
+INLINE float32 int16_to_float32(int16_t v STATUS_PARAM)
+{
+ return int32_to_float32(v STATUS_VAR);
+}
+
+INLINE float32 uint16_to_float32(uint16_t v STATUS_PARAM)
+{
+ return uint32_to_float32(v STATUS_VAR);
+}
+
+INLINE float64 int16_to_float64(int16_t v STATUS_PARAM)
+{
+ return int32_to_float64(v STATUS_VAR);
+}
+
+INLINE float64 uint16_to_float64(uint16_t v STATUS_PARAM)
+{
+ return uint32_to_float64(v STATUS_VAR);
+}
/*----------------------------------------------------------------------------
| Software half-precision conversion routines.
*----------------------------------------------------------------------------*/
float16 float32_to_float16( float32, flag STATUS_PARAM );
float32 float16_to_float32( float16, flag STATUS_PARAM );
+float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM);
+float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM);
/*----------------------------------------------------------------------------
| Software half-precision operations.
@@ -265,6 +332,8 @@
/*----------------------------------------------------------------------------
| Software IEC/IEEE single-precision conversion routines.
*----------------------------------------------------------------------------*/
+int_fast16_t float32_to_int16(float32 STATUS_PARAM);
+uint_fast16_t float32_to_uint16(float32 STATUS_PARAM);
int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM);
uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM);
int32 float32_to_int32( float32 STATUS_PARAM );
@@ -272,6 +341,8 @@
uint32 float32_to_uint32( float32 STATUS_PARAM );
uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
int64 float32_to_int64( float32 STATUS_PARAM );
+uint64 float32_to_uint64(float32 STATUS_PARAM);
+uint64 float32_to_uint64_round_to_zero(float32 STATUS_PARAM);
int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
float64 float32_to_float64( float32 STATUS_PARAM );
floatx80 float32_to_floatx80( float32 STATUS_PARAM );
@@ -371,6 +442,8 @@
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision conversion routines.
*----------------------------------------------------------------------------*/
+int_fast16_t float64_to_int16(float64 STATUS_PARAM);
+uint_fast16_t float64_to_uint16(float64 STATUS_PARAM);
int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM);
uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM);
int32 float64_to_int32( float64 STATUS_PARAM );