ArmPkg/Library/ArmSoftFloatLib/Arm/softfloat.h

   1 /*  $NetBSD: softfloat.h,v 1.10 2013/04/24 18:04:46 matt Exp $  */
   2
   3 /* This is a derivative work. */
   4
   5 /*
   6 ===============================================================================
   7
   8 This C header file is part of the SoftFloat IEC/IEEE Floating-point
   9 Arithmetic Package, Release 2a.
  10
  11 Written by John R. Hauser.  This work was made possible in part by the
  12 International Computer Science Institute, located at Suite 600, 1947 Center
  13 Street, Berkeley, California 94704.  Funding was partially provided by the
  14 National Science Foundation under grant MIP-9311980.  The original version
  15 of this code was written as part of a project to build a fixed-point vector
  16 processor in collaboration with the University of California at Berkeley,
  17 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19 arithmetic/SoftFloat.html'.
  20
  21 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26
  27 Derivative works are acceptable, even for commercial purposes, so long as
  28 (1) they include prominent notice that the work is derivative, and (2) they
  29 include prominent notice akin to these four paragraphs for those parts of
  30 this code that are retained.
  31
  32 ===============================================================================
  33 */
  34
  35 /*
  36 -------------------------------------------------------------------------------
  37 The macro `FLOATX80' must be defined to enable the extended double-precision
  38 floating-point format `floatx80'.  If this macro is not defined, the
  39 `floatx80' type will not be defined, and none of the functions that either
  40 input or output the `floatx80' type will be defined.  The same applies to
  41 the `FLOAT128' macro and the quadruple-precision format `float128'.
  42 -------------------------------------------------------------------------------
  43 */
  44 /* #define FLOATX80 */
  45 /* #define FLOAT128 */
  46
  47 #define FE_INVALID      0x01    /* invalid operation exception */
  48 #define FE_DIVBYZERO    0x02    /* divide-by-zero exception */
  49 #define FE_OVERFLOW     0x04    /* overflow exception */
  50 #define FE_UNDERFLOW    0x08    /* underflow exception */
  51 #define FE_INEXACT      0x10    /* imprecise (loss of precision; "inexact") */
  52
  53 #define FE_ALL_EXCEPT   0x1f
  54
  55 #define FE_TONEAREST    0   /* round to nearest representable number */
  56 #define FE_UPWARD       1   /* round toward positive infinity */
  57 #define FE_DOWNWARD     2   /* round toward negative infinity */
  58 #define FE_TOWARDZERO   3   /* round to zero (truncate) */
  59
  60 typedef int fp_except;
  61
  62 /* Bit defines for fp_except */
  63
  64 #define FP_X_INV    FE_INVALID      /* invalid operation exception */
  65 #define FP_X_DZ     FE_DIVBYZERO    /* divide-by-zero exception */
  66 #define FP_X_OFL    FE_OVERFLOW     /* overflow exception */
  67 #define FP_X_UFL    FE_UNDERFLOW    /* underflow exception */
  68 #define FP_X_IMP    FE_INEXACT      /* imprecise (prec. loss; "inexact") */
  69
  70 /* Rounding modes */
  71
  72 typedef enum {
  73     FP_RN=FE_TONEAREST,     /* round to nearest representable number */
  74     FP_RP=FE_UPWARD,        /* round toward positive infinity */
  75     FP_RM=FE_DOWNWARD,      /* round toward negative infinity */
  76     FP_RZ=FE_TOWARDZERO     /* round to zero (truncate) */
  77 } fp_rnd;
  78
  79 /*
  80 -------------------------------------------------------------------------------
  81 Software IEC/IEEE floating-point types.
  82 -------------------------------------------------------------------------------
  83 */
  84 typedef unsigned int float32;
  85 typedef unsigned long long float64;
  86 #ifdef FLOATX80
  87 typedef struct {
  88     unsigned short high;
  89     unsigned long long low;
  90 } floatx80;
  91 #endif
  92 #ifdef FLOAT128
  93 typedef struct {
  94     unsigned long long high, low;
  95 } float128;
  96 #endif
  97
  98 /*
  99 -------------------------------------------------------------------------------
 100 Software IEC/IEEE floating-point underflow tininess-detection mode.
 101 -------------------------------------------------------------------------------
 102 */
 103 #ifndef SOFTFLOAT_FOR_GCC
 104 extern int float_detect_tininess;
 105 #endif
 106 enum {
 107     float_tininess_after_rounding  = 0,
 108     float_tininess_before_rounding = 1
 109 };
 110
 111 /*
 112 -------------------------------------------------------------------------------
 113 Software IEC/IEEE floating-point rounding mode.
 114 -------------------------------------------------------------------------------
 115 */
 116 extern fp_rnd float_rounding_mode;
 117 #define float_round_nearest_even FP_RN
 118 #define float_round_to_zero      FP_RZ
 119 #define float_round_down         FP_RM
 120 #define float_round_up           FP_RP
 121
 122 /*
 123 -------------------------------------------------------------------------------
 124 Software IEC/IEEE floating-point exception flags.
 125 -------------------------------------------------------------------------------
 126 */
 127 extern fp_except float_exception_flags;
 128 extern fp_except float_exception_mask;
 129 enum {
 130     float_flag_inexact   = FP_X_IMP,
 131     float_flag_underflow = FP_X_UFL,
 132     float_flag_overflow  = FP_X_OFL,
 133     float_flag_divbyzero = FP_X_DZ,
 134     float_flag_invalid   = FP_X_INV
 135 };
 136
 137 /*
 138 -------------------------------------------------------------------------------
 139 Routine to raise any or all of the software IEC/IEEE floating-point
 140 exception flags.
 141 -------------------------------------------------------------------------------
 142 */
 143 void float_raise( fp_except );
 144
 145 /*
 146 -------------------------------------------------------------------------------
 147 Software IEC/IEEE integer-to-floating-point conversion routines.
 148 -------------------------------------------------------------------------------
 149 */
 150 float32 int32_to_float32( int32 );
 151 float32 uint32_to_float32( uint32 );
 152 float64 int32_to_float64( int32 );
 153 float64 uint32_to_float64( uint32 );
 154 #ifdef FLOATX80
 155 floatx80 int32_to_floatx80( int32 );
 156 floatx80 uint32_to_floatx80( uint32 );
 157 #endif
 158 #ifdef FLOAT128
 159 float128 int32_to_float128( int32 );
 160 float128 uint32_to_float128( uint32 );
 161 #endif
 162 #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
 163 float32 int64_to_float32( long long );
 164 float64 int64_to_float64( long long );
 165 #ifdef FLOATX80
 166 floatx80 int64_to_floatx80( long long );
 167 #endif
 168 #ifdef FLOAT128
 169 float128 int64_to_float128( long long );
 170 #endif
 171 #endif
 172
 173 /*
 174 -------------------------------------------------------------------------------
 175 Software IEC/IEEE single-precision conversion routines.
 176 -------------------------------------------------------------------------------
 177 */
 178 int float32_to_int32( float32 );
 179 int float32_to_int32_round_to_zero( float32 );
 180 #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
 181 unsigned int float32_to_uint32_round_to_zero( float32 );
 182 #endif
 183 #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
 184 long long float32_to_int64( float32 );
 185 long long float32_to_int64_round_to_zero( float32 );
 186 #endif
 187 float64 float32_to_float64( float32 );
 188 #ifdef FLOATX80
 189 floatx80 float32_to_floatx80( float32 );
 190 #endif
 191 #ifdef FLOAT128
 192 float128 float32_to_float128( float32 );
 193 #endif
 194
 195 /*
 196 -------------------------------------------------------------------------------
 197 Software IEC/IEEE single-precision operations.
 198 -------------------------------------------------------------------------------
 199 */
 200 float32 float32_round_to_int( float32 );
 201 float32 float32_add( float32, float32 );
 202 float32 float32_sub( float32, float32 );
 203 float32 float32_mul( float32, float32 );
 204 float32 float32_div( float32, float32 );
 205 float32 float32_rem( float32, float32 );
 206 float32 float32_sqrt( float32 );
 207 int float32_eq( float32, float32 );
 208 int float32_le( float32, float32 );
 209 int float32_lt( float32, float32 );
 210 int float32_eq_signaling( float32, float32 );
 211 int float32_le_quiet( float32, float32 );
 212 int float32_lt_quiet( float32, float32 );
 213 #ifndef SOFTFLOAT_FOR_GCC
 214 int float32_is_signaling_nan( float32 );
 215 #endif
 216
 217 /*
 218 -------------------------------------------------------------------------------
 219 Software IEC/IEEE double-precision conversion routines.
 220 -------------------------------------------------------------------------------
 221 */
 222 int float64_to_int32( float64 );
 223 int float64_to_int32_round_to_zero( float64 );
 224 #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
 225 unsigned int float64_to_uint32_round_to_zero( float64 );
 226 #endif
 227 #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
 228 long long float64_to_int64( float64 );
 229 long long float64_to_int64_round_to_zero( float64 );
 230 #endif
 231 float32 float64_to_float32( float64 );
 232 #ifdef FLOATX80
 233 floatx80 float64_to_floatx80( float64 );
 234 #endif
 235 #ifdef FLOAT128
 236 float128 float64_to_float128( float64 );
 237 #endif
 238
 239 /*
 240 -------------------------------------------------------------------------------
 241 Software IEC/IEEE double-precision operations.
 242 -------------------------------------------------------------------------------
 243 */
 244 float64 float64_round_to_int( float64 );
 245 float64 float64_add( float64, float64 );
 246 float64 float64_sub( float64, float64 );
 247 float64 float64_mul( float64, float64 );
 248 float64 float64_div( float64, float64 );
 249 float64 float64_rem( float64, float64 );
 250 float64 float64_sqrt( float64 );
 251 int float64_eq( float64, float64 );
 252 int float64_le( float64, float64 );
 253 int float64_lt( float64, float64 );
 254 int float64_eq_signaling( float64, float64 );
 255 int float64_le_quiet( float64, float64 );
 256 int float64_lt_quiet( float64, float64 );
 257 #ifndef SOFTFLOAT_FOR_GCC
 258 int float64_is_signaling_nan( float64 );
 259 #endif
 260
 261 #ifdef FLOATX80
 262
 263 /*
 264 -------------------------------------------------------------------------------
 265 Software IEC/IEEE extended double-precision conversion routines.
 266 -------------------------------------------------------------------------------
 267 */
 268 int floatx80_to_int32( floatx80 );
 269 int floatx80_to_int32_round_to_zero( floatx80 );
 270 long long floatx80_to_int64( floatx80 );
 271 long long floatx80_to_int64_round_to_zero( floatx80 );
 272 float32 floatx80_to_float32( floatx80 );
 273 float64 floatx80_to_float64( floatx80 );
 274 #ifdef FLOAT128
 275 float128 floatx80_to_float128( floatx80 );
 276 #endif
 277
 278 /*
 279 -------------------------------------------------------------------------------
 280 Software IEC/IEEE extended double-precision rounding precision.  Valid
 281 values are 32, 64, and 80.
 282 -------------------------------------------------------------------------------
 283 */
 284 extern int floatx80_rounding_precision;
 285
 286 /*
 287 -------------------------------------------------------------------------------
 288 Software IEC/IEEE extended double-precision operations.
 289 -------------------------------------------------------------------------------
 290 */
 291 floatx80 floatx80_round_to_int( floatx80 );
 292 floatx80 floatx80_add( floatx80, floatx80 );
 293 floatx80 floatx80_sub( floatx80, floatx80 );
 294 floatx80 floatx80_mul( floatx80, floatx80 );
 295 floatx80 floatx80_div( floatx80, floatx80 );
 296 floatx80 floatx80_rem( floatx80, floatx80 );
 297 floatx80 floatx80_sqrt( floatx80 );
 298 int floatx80_eq( floatx80, floatx80 );
 299 int floatx80_le( floatx80, floatx80 );
 300 int floatx80_lt( floatx80, floatx80 );
 301 int floatx80_eq_signaling( floatx80, floatx80 );
 302 int floatx80_le_quiet( floatx80, floatx80 );
 303 int floatx80_lt_quiet( floatx80, floatx80 );
 304 int floatx80_is_signaling_nan( floatx80 );
 305
 306 #endif
 307
 308 #ifdef FLOAT128
 309
 310 /*
 311 -------------------------------------------------------------------------------
 312 Software IEC/IEEE quadruple-precision conversion routines.
 313 -------------------------------------------------------------------------------
 314 */
 315 int float128_to_int32( float128 );
 316 int float128_to_int32_round_to_zero( float128 );
 317 long long float128_to_int64( float128 );
 318 long long float128_to_int64_round_to_zero( float128 );
 319 float32 float128_to_float32( float128 );
 320 float64 float128_to_float64( float128 );
 321 #ifdef FLOATX80
 322 floatx80 float128_to_floatx80( float128 );
 323 #endif
 324
 325 /*
 326 -------------------------------------------------------------------------------
 327 Software IEC/IEEE quadruple-precision operations.
 328 -------------------------------------------------------------------------------
 329 */
 330 float128 float128_round_to_int( float128 );
 331 float128 float128_add( float128, float128 );
 332 float128 float128_sub( float128, float128 );
 333 float128 float128_mul( float128, float128 );
 334 float128 float128_div( float128, float128 );
 335 float128 float128_rem( float128, float128 );
 336 float128 float128_sqrt( float128 );
 337 int float128_eq( float128, float128 );
 338 int float128_le( float128, float128 );
 339 int float128_lt( float128, float128 );
 340 int float128_eq_signaling( float128, float128 );
 341 int float128_le_quiet( float128, float128 );
 342 int float128_lt_quiet( float128, float128 );
 343 int float128_is_signaling_nan( float128 );
 344
 345 #endif