1 /** 2 * Low-level Mathematical Functions which take advantage of the IEEE754 ABI. 3 * 4 * Copyright: 5 * Portions Copyright (C) 2001-2005 Digital Mars. 6 * Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 7 * All rights reserved. 8 * 9 * License: 10 * Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 11 * See LICENSE_TANGO.txt for details. 12 * 13 * Authors: Don Clugston, Walter Bright, Sean Kelly 14 * 15 */ 16 /** 17 * Macros: 18 * 19 * TABLE_SV = <table border=1 cellpadding=4 cellspacing=0> 20 * <caption>Special Values</caption> 21 * $0</table> 22 * SVH = $(TR $(TH $1) $(TH $2)) 23 * SV = $(TR $(TD $1) $(TD $2)) 24 * SVH3 = $(TR $(TH $1) $(TH $2) $(TH $3)) 25 * SV3 = $(TR $(TD $1) $(TD $2) $(TD $3)) 26 * NAN = $(RED NAN) 27 * PLUSMN = ± 28 * INFIN = ∞ 29 * PLUSMNINF = ±∞ 30 * PI = π 31 * LT = < 32 * GT = > 33 * SQRT = &radix; 34 * HALF = ½ 35 */ 36 module ocean.math.IEEE; 37 38 import ocean.meta.types.Qualifiers; 39 import ocean.core.Verify; 40 41 version (unittest) import ocean.core.Test; 42 43 version(TangoNoAsm) { 44 45 } else version(D_InlineAsm_X86) { 46 version = Naked_D_InlineAsm_X86; 47 } 48 49 version (X86){ 50 version = X86_Any; 51 } 52 53 version (X86_64){ 54 version = X86_Any; 55 } 56 57 version (Naked_D_InlineAsm_X86) { 58 // Don't include this extra dependency unless we need to. 59 version (unittest) { 60 static import core.stdc.math; 61 } 62 } else { 63 // Needed for cos(), sin(), tan() on GNU. 64 static import core.stdc.math; 65 } 66 static import tsm = core.stdc.math; 67 68 // Standard Tango NaN payloads. 69 // NOTE: These values may change in future Tango releases 70 // The lowest three bits indicate the cause of the NaN: 71 // 0 = error other than those listed below: 72 // 1 = domain error 73 // 2 = singularity 74 // 3 = range 75 // 4-7 = reserved. 76 enum TANGO_NAN { 77 // General errors 78 DOMAIN_ERROR = 0x0101, 79 SINGULARITY = 0x0102, 80 RANGE_ERROR = 0x0103, 81 // NaNs created by functions in the basic library 82 TAN_DOMAIN = 0x1001, 83 POW_DOMAIN = 0x1021, 84 GAMMA_DOMAIN = 0x1101, 85 GAMMA_POLE = 0x1102, 86 SGNGAMMA = 0x1112, 87 BETA_DOMAIN = 0x1131, 88 // NaNs from statistical functions 89 NORMALDISTRIBUTION_INV_DOMAIN = 0x2001, 90 STUDENTSDDISTRIBUTION_DOMAIN = 0x2011 91 } 92 93 private: 94 /* Most of the functions depend on the format of the largest IEEE floating-point type. 95 * These code will differ depending on whether 'real' is 64, 80, or 128 bits, 96 * and whether it is a big-endian or little-endian architecture. 97 * Only five 'real' ABIs are currently supported: 98 * 64 bit Big-endian 'double' (eg PowerPC) 99 * 128 bit Big-endian 'quadruple' (eg SPARC) 100 * 64 bit Little-endian 'double' (eg x86-SSE2) 101 * 80 bit Little-endian, with implied bit 'real80' (eg x87, Itanium). 102 * 128 bit Little-endian 'quadruple' (not implemented on any known processor!) 103 * 104 * There is also an unsupported ABI which does not follow IEEE; several of its functions 105 * will generate run-time errors if used. 106 * 128 bit Big-endian 'doubledouble' (used by GDC <= 0.23 for PowerPC) 107 */ 108 109 version(LittleEndian) { 110 static assert(real.mant_dig == 53 || real.mant_dig==64 || real.mant_dig == 113, 111 "Only 64-bit, 80-bit, and 128-bit reals are supported for LittleEndian CPUs"); 112 } else { 113 static assert(real.mant_dig == 53 || real.mant_dig==106 || real.mant_dig == 113, 114 "Only 64-bit and 128-bit reals are supported for BigEndian CPUs. double-double reals have partial support"); 115 } 116 117 // Constants used for extracting the components of the representation. 118 // They supplement the built-in floating point properties. 119 template floatTraits(T) { 120 // EXPMASK is a ushort mask to select the exponent portion (without sign) 121 // SIGNMASK is a ushort mask to select the sign bit. 122 // EXPPOS_SHORT is the index of the exponent when represented as a ushort array. 123 // SIGNPOS_BYTE is the index of the sign when represented as a ubyte array. 124 // RECIP_EPSILON is the value such that (smallest_denormal) * RECIP_EPSILON == T.min 125 static immutable T RECIP_EPSILON = (1/T.epsilon); 126 127 static if (T.mant_dig == 24) { // float 128 enum : ushort { 129 EXPMASK = 0x7F80, 130 SIGNMASK = 0x8000, 131 EXPBIAS = 0x3F00 132 } 133 static immutable uint EXPMASK_INT = 0x7F80_0000; 134 static immutable uint MANTISSAMASK_INT = 0x007F_FFFF; 135 version(LittleEndian) { 136 static immutable EXPPOS_SHORT = 1; 137 } else { 138 static immutable EXPPOS_SHORT = 0; 139 } 140 } else static if (T.mant_dig==53) { // double, or real==double 141 enum : ushort { 142 EXPMASK = 0x7FF0, 143 SIGNMASK = 0x8000, 144 EXPBIAS = 0x3FE0 145 } 146 static immutable uint EXPMASK_INT = 0x7FF0_0000; 147 static immutable uint MANTISSAMASK_INT = 0x000F_FFFF; // for the MSB only 148 version(LittleEndian) { 149 static immutable EXPPOS_SHORT = 3; 150 static immutable SIGNPOS_BYTE = 7; 151 } else { 152 static immutable EXPPOS_SHORT = 0; 153 static immutable SIGNPOS_BYTE = 0; 154 } 155 } else static if (T.mant_dig==64) { // real80 156 enum : ushort { 157 EXPMASK = 0x7FFF, 158 SIGNMASK = 0x8000, 159 EXPBIAS = 0x3FFE 160 } 161 // const ulong QUIETNANMASK = 0xC000_0000_0000_0000; // Converts a signaling NaN to a quiet NaN. 162 version(LittleEndian) { 163 static immutable EXPPOS_SHORT = 4; 164 static immutable SIGNPOS_BYTE = 9; 165 } else { 166 static immutable EXPPOS_SHORT = 0; 167 static immutable SIGNPOS_BYTE = 0; 168 } 169 } else static if (real.mant_dig==113){ // quadruple 170 enum : ushort { 171 EXPMASK = 0x7FFF, 172 SIGNMASK = 0x8000, 173 EXPBIAS = 0x3FFE 174 } 175 version(LittleEndian) { 176 static immutable EXPPOS_SHORT = 7; 177 static immutable SIGNPOS_BYTE = 15; 178 } else { 179 static immutable EXPPOS_SHORT = 0; 180 static immutable SIGNPOS_BYTE = 0; 181 } 182 } else static if (real.mant_dig==106) { // doubledouble 183 enum : ushort { 184 EXPMASK = 0x7FF0, 185 SIGNMASK = 0x8000 186 // EXPBIAS = 0x3FE0 187 } 188 // the exponent byte is not unique 189 version(LittleEndian) { 190 static immutable EXPPOS_SHORT = 7; // 3 is also an exp short 191 static immutable SIGNPOS_BYTE = 15; 192 } else { 193 static immutable EXPPOS_SHORT = 0; // 4 is also an exp short 194 static immutable SIGNPOS_BYTE = 0; 195 } 196 } 197 } 198 199 // These apply to all floating-point types 200 version(LittleEndian) { 201 static immutable MANTISSA_LSB = 0; 202 static immutable MANTISSA_MSB = 1; 203 } else { 204 static immutable MANTISSA_LSB = 1; 205 static immutable MANTISSA_MSB = 0; 206 } 207 208 public: 209 210 /** IEEE exception status flags 211 212 These flags indicate that an exceptional floating-point condition has occured. 213 They indicate that a NaN or an infinity has been generated, that a result 214 is inexact, or that a signalling NaN has been encountered. 215 The return values of the properties should be treated as booleans, although 216 each is returned as an int, for speed. 217 218 Example: 219 ---- 220 real a=3.5; 221 // Set all the flags to zero 222 resetIeeeFlags(); 223 assert(!ieeeFlags.divByZero); 224 // Perform a division by zero. 225 a/=0.0L; 226 assert(a==real.infinity); 227 assert(ieeeFlags.divByZero); 228 // Create a NaN 229 a*=0.0L; 230 assert(ieeeFlags.invalid); 231 assert(isNaN(a)); 232 233 // Check that calling func() has no effect on the 234 // status flags. 235 IeeeFlags f = ieeeFlags; 236 func(); 237 assert(ieeeFlags == f); 238 239 ---- 240 */ 241 struct IeeeFlags 242 { 243 private: 244 // The x87 FPU status register is 16 bits. 245 // The Pentium SSE2 status register is 32 bits. 246 int m_flags; 247 version (X86_Any) { 248 // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits). 249 enum : int { 250 INEXACT_MASK = 0x20, 251 UNDERFLOW_MASK = 0x10, 252 OVERFLOW_MASK = 0x08, 253 DIVBYZERO_MASK = 0x04, 254 INVALID_MASK = 0x01 255 } 256 // Don't bother about denormals, they are not supported on most CPUs. 257 // DENORMAL_MASK = 0x02; 258 } else version (PPC) { 259 // PowerPC FPSCR is a 32-bit register. 260 enum : int { 261 INEXACT_MASK = 0x600, 262 UNDERFLOW_MASK = 0x010, 263 OVERFLOW_MASK = 0x008, 264 DIVBYZERO_MASK = 0x020, 265 INVALID_MASK = 0xF80 266 } 267 } else { // SPARC FSR is a 32bit register 268 //(64 bits for Sparc 7 & 8, but high 32 bits are uninteresting). 269 enum : int { 270 INEXACT_MASK = 0x020, 271 UNDERFLOW_MASK = 0x080, 272 OVERFLOW_MASK = 0x100, 273 DIVBYZERO_MASK = 0x040, 274 INVALID_MASK = 0x200 275 } 276 } 277 private: 278 static IeeeFlags getIeeeFlags() 279 { 280 version(D_InlineAsm_X86) 281 { 282 asm 283 { 284 naked; 285 fstsw AX; 286 // NOTE: If compiler supports SSE2, need to OR the result with 287 // the SSE2 status register. 288 // Clear all irrelevant bits 289 and EAX, 0x03D; 290 ret; 291 } 292 } 293 else version(D_InlineAsm_X86_64) 294 { 295 asm 296 { 297 naked; 298 fstsw AX; 299 // NOTE: If compiler supports SSE2, need to OR the result with 300 // the SSE2 status register. 301 // Clear all irrelevant bits 302 and RAX, 0x03D; 303 ret; 304 } 305 } else { 306 /* SPARC: 307 int retval; 308 asm { st %fsr, retval; } 309 return retval; 310 */ 311 static assert(0, "Not yet supported"); 312 } 313 } 314 static void resetIeeeFlags() 315 { 316 version (D_InlineAsm_X86) 317 asm {fnclex;} 318 else version (D_InlineAsm_X86_64) 319 asm {fnclex;} 320 else { 321 /* SPARC: 322 int tmpval; 323 asm { st %fsr, tmpval; } 324 tmpval &=0xFFFF_FC00; 325 asm { ld tmpval, %fsr; } 326 */ 327 throw new SanityException("Not yet supported"); 328 } 329 } 330 public: 331 /// The result cannot be represented exactly, so rounding occured. 332 /// (example: x = sin(0.1); ) 333 int inexact() { return m_flags & INEXACT_MASK; } 334 /// A zero was generated by underflow (example: x = real.min_normal*real.epsilon/2;) 335 int underflow() { return m_flags & UNDERFLOW_MASK; } 336 /// An infinity was generated by overflow (example: x = real.max*2;) 337 int overflow() { return m_flags & OVERFLOW_MASK; } 338 /// An infinity was generated by division by zero (example: x = 3/0.0; ) 339 int divByZero() { return m_flags & DIVBYZERO_MASK; } 340 /// A machine NaN was generated. (example: x = real.infinity * 0.0; ) 341 int invalid() { return m_flags & INVALID_MASK; } 342 } 343 344 /// Return a snapshot of the current state of the floating-point status flags. 345 IeeeFlags ieeeFlags() { return IeeeFlags.getIeeeFlags(); } 346 347 /// Set all of the floating-point status flags to false. 348 void resetIeeeFlags() { IeeeFlags.resetIeeeFlags; } 349 350 unittest { 351 static real a = 3.5; 352 resetIeeeFlags(); 353 test(!ieeeFlags.divByZero); 354 a /= 0.0L; 355 test(ieeeFlags.divByZero); 356 test(a == real.infinity); 357 a *= 0.0L; 358 test(ieeeFlags.invalid); 359 test(isNaN(a)); 360 a = real.max; 361 a *= 2; 362 test(ieeeFlags.overflow); 363 a = real.min_normal * real.epsilon; 364 a /= 99; 365 test(ieeeFlags.underflow); 366 test(ieeeFlags.inexact); 367 } 368 369 /********************************************************************* 370 * Separate floating point value into significand and exponent. 371 * 372 * Returns: 373 * Calculate and return $(I x) and $(I exp) such that 374 * value =$(I x)*2$(SUP exp) and 375 * .5 $(LT)= |$(I x)| $(LT) 1.0 376 * 377 * $(I x) has same sign as value. 378 * 379 * $(TABLE_SV 380 * $(TR $(TH value) $(TH returns) $(TH exp)) 381 * $(TR $(TD $(PLUSMN)0.0) $(TD $(PLUSMN)0.0) $(TD 0)) 382 * $(TR $(TD +$(INFIN)) $(TD +$(INFIN)) $(TD int.max)) 383 * $(TR $(TD -$(INFIN)) $(TD -$(INFIN)) $(TD int.min)) 384 * $(TR $(TD $(PLUSMN)$(NAN)) $(TD $(PLUSMN)$(NAN)) $(TD int.min)) 385 * ) 386 */ 387 real frexp(real value, out int exp) 388 { 389 ushort* vu = cast(ushort*)&value; 390 long* vl = cast(long*)&value; 391 uint ex; 392 alias floatTraits!(real) F; 393 394 ex = vu[F.EXPPOS_SHORT] & F.EXPMASK; 395 static if (real.mant_dig == 64) { // real80 396 if (ex) { // If exponent is non-zero 397 if (ex == F.EXPMASK) { // infinity or NaN 398 if (*vl & 0x7FFF_FFFF_FFFF_FFFF) { // NaN 399 *vl |= 0xC000_0000_0000_0000; // convert $(NAN)S to $(NAN)Q 400 exp = int.min; 401 } else if (vu[F.EXPPOS_SHORT] & 0x8000) { // negative infinity 402 exp = int.min; 403 } else { // positive infinity 404 exp = int.max; 405 } 406 } else { 407 exp = ex - F.EXPBIAS; 408 vu[F.EXPPOS_SHORT] = cast(ushort)((0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FFE); 409 } 410 } else if (!*vl) { 411 // value is +-0.0 412 exp = 0; 413 } else { 414 // denormal 415 value *= F.RECIP_EPSILON; 416 ex = vu[F.EXPPOS_SHORT] & F.EXPMASK; 417 exp = ex - F.EXPBIAS - 63; 418 vu[F.EXPPOS_SHORT] = cast(ushort)((0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FFE); 419 } 420 return value; 421 } else static if (real.mant_dig == 113) { // quadruple 422 if (ex) { // If exponent is non-zero 423 if (ex == F.EXPMASK) { // infinity or NaN 424 if (vl[MANTISSA_LSB] |( vl[MANTISSA_MSB]&0x0000_FFFF_FFFF_FFFF)) { // NaN 425 vl[MANTISSA_MSB] |= 0x0000_8000_0000_0000; // convert $(NAN)S to $(NAN)Q 426 exp = int.min; 427 } else if (vu[F.EXPPOS_SHORT] & 0x8000) { // negative infinity 428 exp = int.min; 429 } else { // positive infinity 430 exp = int.max; 431 } 432 } else { 433 exp = ex - F.EXPBIAS; 434 vu[F.EXPPOS_SHORT] = cast(ushort)((0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FFE); 435 } 436 } else if ((vl[MANTISSA_LSB] |(vl[MANTISSA_MSB]&0x0000_FFFF_FFFF_FFFF))==0) { 437 // value is +-0.0 438 exp = 0; 439 } else { 440 // denormal 441 value *= F.RECIP_EPSILON; 442 ex = vu[F.EXPPOS_SHORT] & F.EXPMASK; 443 exp = ex - F.EXPBIAS - 113; 444 vu[F.EXPPOS_SHORT] = cast(ushort)((0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FFE); 445 } 446 return value; 447 } else static if (real.mant_dig==53) { // real is double 448 if (ex) { // If exponent is non-zero 449 if (ex == F.EXPMASK) { // infinity or NaN 450 if (*vl==0x7FF0_0000_0000_0000) { // positive infinity 451 exp = int.max; 452 } else if (*vl==0xFFF0_0000_0000_0000) { // negative infinity 453 exp = int.min; 454 } else { // NaN 455 *vl |= 0x0008_0000_0000_0000; // convert $(NAN)S to $(NAN)Q 456 exp = int.min; 457 } 458 } else { 459 exp = (ex - F.EXPBIAS) >>> 4; 460 vu[F.EXPPOS_SHORT] = (0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FE0; 461 } 462 } else if (!(*vl & 0x7FFF_FFFF_FFFF_FFFF)) { 463 // value is +-0.0 464 exp = 0; 465 } else { 466 // denormal 467 ushort sgn; 468 sgn = (0x8000 & vu[F.EXPPOS_SHORT])| 0x3FE0; 469 *vl &= 0x7FFF_FFFF_FFFF_FFFF; 470 471 int i = -0x3FD+11; 472 do { 473 i--; 474 *vl <<= 1; 475 } while (*vl > 0); 476 exp = i; 477 vu[F.EXPPOS_SHORT] = sgn; 478 } 479 return value; 480 }else { //static if(real.mant_dig==106) // doubledouble 481 static assert(0, "Unsupported"); 482 } 483 } 484 485 unittest 486 { 487 static real[3][] vals = // x,frexp,exp 488 [ 489 [0.0, 0.0, 0], 490 [-0.0, -0.0, 0], 491 [1.0, .5, 1], 492 [-1.0, -.5, 1], 493 [2.0, .5, 2], 494 [double.min_normal/2.0, .5, -1022], 495 [real.infinity,real.infinity,int.max], 496 [-real.infinity,-real.infinity,int.min], 497 ]; 498 499 int i; 500 int eptr; 501 real v = frexp(NaN(0xABC), eptr); 502 test(isIdentical(NaN(0xABC), v)); 503 test(eptr ==int.min); 504 v = frexp(-NaN(0xABC), eptr); 505 test(isIdentical(-NaN(0xABC), v)); 506 test(eptr ==int.min); 507 508 for (i = 0; i < vals.length; i++) { 509 real x = vals[i][0]; 510 real e = vals[i][1]; 511 int exp = cast(int)vals[i][2]; 512 v = frexp(x, eptr); 513 // printf("frexp(%La) = %La, should be %La, eptr = %d, should be %d\n", x, v, e, eptr, exp); 514 test(isIdentical(e, v)); 515 test(exp == eptr); 516 517 } 518 static if (real.mant_dig == 64) { 519 static real[3][] extendedvals = [ // x,frexp,exp 520 [0x1.a5f1c2eb3fe4efp+73L, 0x1.A5F1C2EB3FE4EFp-1L, 74], // normal 521 [0x1.fa01712e8f0471ap-1064L, 0x1.fa01712e8f0471ap-1L, -1063], 522 [real.min_normal, .5, -16381], 523 [real.min_normal/2.0L, .5, -16382] // denormal 524 ]; 525 526 for (i = 0; i < extendedvals.length; i++) { 527 real x = extendedvals[i][0]; 528 real e = extendedvals[i][1]; 529 int exp = cast(int)extendedvals[i][2]; 530 v = frexp(x, eptr); 531 test(isIdentical(e, v)); 532 test(exp == eptr); 533 534 } 535 } 536 } 537 538 /** 539 * Compute n * 2$(SUP exp) 540 * References: frexp 541 */ 542 real ldexp(real n, int exp) /* intrinsic */ 543 { 544 version(Naked_D_InlineAsm_X86) 545 { 546 asm { 547 fild exp; 548 fld n; 549 fscale; 550 fstp ST(1); 551 } 552 } 553 else 554 { 555 return core.stdc.math.ldexpl(n, exp); 556 } 557 } 558 559 /****************************************** 560 * Extracts the exponent of x as a signed integral value. 561 * 562 * If x is not a special value, the result is the same as 563 * $(D cast(int)logb(x)). 564 * 565 * Remarks: This function is consistent with IEEE754R, but it 566 * differs from the C function of the same name 567 * in the return value of infinity. (in C, ilogb(real.infinity)== int.max). 568 * Note that the special return values may all be equal. 569 * 570 * $(TABLE_SV 571 * $(TR $(TH x) $(TH ilogb(x)) $(TH Invalid?)) 572 * $(TR $(TD 0) $(TD FP_ILOGB0) $(TD yes)) 573 * $(TR $(TD $(PLUSMN)$(INFIN)) $(TD FP_ILOGBINFINITY) $(TD yes)) 574 * $(TR $(TD $(NAN)) $(TD FP_ILOGBNAN) $(TD yes)) 575 * ) 576 */ 577 int ilogb(real x) 578 { 579 version(Naked_D_InlineAsm_X86) 580 { 581 int y; 582 asm { 583 fld x; 584 fxtract; 585 fstp ST(0); // drop significand 586 fistp y; // and return the exponent 587 } 588 return y; 589 } else static if (real.mant_dig==64) { // 80-bit reals 590 alias floatTraits!(real) F; 591 short e = cast(short)((cast(short *)&x)[F.EXPPOS_SHORT] & F.EXPMASK); 592 if (e == F.EXPMASK) { 593 // BUG: should also set the invalid exception 594 ulong s = *cast(ulong *)&x; 595 if (s == 0x8000_0000_0000_0000) { 596 return FP_ILOGBINFINITY; 597 } 598 else return FP_ILOGBNAN; 599 } 600 if (e==0) { 601 ulong s = *cast(ulong *)&x; 602 if (s == 0x0000_0000_0000_0000) { 603 // BUG: should also set the invalid exception 604 return FP_ILOGB0; 605 } 606 // Denormals 607 x *= F.RECIP_EPSILON; 608 short f = (cast(short *)&x)[F.EXPPOS_SHORT]; 609 return -0x3FFF - (63-f); 610 } 611 return e - 0x3FFF; 612 } else { 613 return core.stdc.math.ilogbl(x); 614 } 615 } 616 617 version (X86) 618 { 619 static immutable int FP_ILOGB0 = -int.max-1; 620 static immutable int FP_ILOGBNAN = -int.max-1; 621 static immutable int FP_ILOGBINFINITY = -int.max-1; 622 } else { 623 alias core.stdc.math.FP_ILOGB0 FP_ILOGB0; 624 alias core.stdc.math.FP_ILOGBNAN FP_ILOGBNAN; 625 static immutable int FP_ILOGBINFINITY = int.max; 626 } 627 628 unittest { 629 test(ilogb(1.0) == 0); 630 test(ilogb(65536) == 16); 631 test(ilogb(-65536) == 16); 632 test(ilogb(1.0 / 65536) == -16); 633 test(ilogb(real.nan) == FP_ILOGBNAN); 634 test(ilogb(0.0) == FP_ILOGB0); 635 test(ilogb(-0.0) == FP_ILOGB0); 636 // denormal 637 test(ilogb(0.125 * real.min_normal) == real.min_exp - 4); 638 test(ilogb(real.infinity) == FP_ILOGBINFINITY); 639 } 640 641 /***************************************** 642 * Extracts the exponent of x as a signed integral value. 643 * 644 * If x is subnormal, it is treated as if it were normalized. 645 * For a positive, finite x: 646 * 647 * 1 $(LT)= $(I x) * FLT_RADIX$(SUP -logb(x)) $(LT) FLT_RADIX 648 * 649 * $(TABLE_SV 650 * $(TR $(TH x) $(TH logb(x)) $(TH divide by 0?) ) 651 * $(TR $(TD $(PLUSMN)$(INFIN)) $(TD +$(INFIN)) $(TD no)) 652 * $(TR $(TD $(PLUSMN)0.0) $(TD -$(INFIN)) $(TD yes) ) 653 * ) 654 */ 655 real logb(real x) 656 { 657 version(Naked_D_InlineAsm_X86) 658 { 659 asm { 660 fld x; 661 fxtract; 662 fstp ST(0); // drop significand 663 } 664 } else { 665 return core.stdc.math.logbl(x); 666 } 667 } 668 669 unittest { 670 test(logb(real.infinity)== real.infinity); 671 test(isIdentical(logb(NaN(0xFCD)), NaN(0xFCD))); 672 test(logb(1.0)== 0.0); 673 test(logb(-65536) == 16); 674 test(logb(0.0)== -real.infinity); 675 test(ilogb(0.125*real.min_normal) == real.min_exp-4); 676 } 677 678 /************************************* 679 * Efficiently calculates x * 2$(SUP n). 680 * 681 * scalbn handles underflow and overflow in 682 * the same fashion as the basic arithmetic operators. 683 * 684 * $(TABLE_SV 685 * $(TR $(TH x) $(TH scalb(x))) 686 * $(TR $(TD $(PLUSMNINF)) $(TD $(PLUSMNINF)) ) 687 * $(TR $(TD $(PLUSMN)0.0) $(TD $(PLUSMN)0.0) ) 688 * ) 689 */ 690 real scalbn(real x, int n) 691 { 692 version(Naked_D_InlineAsm_X86) 693 { 694 asm { 695 fild n; 696 fld x; 697 fscale; 698 fstp ST(1); 699 } 700 } else { 701 // NOTE: Not implemented in DMD 702 return core.stdc.math.scalbnl(x, n); 703 } 704 } 705 706 unittest { 707 test(scalbn(-real.infinity, 5) == -real.infinity); 708 test(isIdentical(scalbn(NaN(0xABC),7), NaN(0xABC))); 709 } 710 711 /** 712 * Returns the positive difference between x and y. 713 * 714 * If either of x or y is $(NAN), it will be returned. 715 * Returns: 716 * $(TABLE_SV 717 * $(SVH Arguments, fdim(x, y)) 718 * $(SV x $(GT) y, x - y) 719 * $(SV x $(LT)= y, +0.0) 720 * ) 721 */ 722 real fdim(real x, real y) 723 { 724 return (tsm.isnan(x) || tsm.isnan(y) || x <= y) ? x - y : +0.0; 725 } 726 727 unittest { 728 test(isIdentical(fdim(NaN(0xABC), 58.2), NaN(0xABC))); 729 } 730 731 /******************************* 732 * Returns |x| 733 * 734 * $(TABLE_SV 735 * $(TR $(TH x) $(TH fabs(x))) 736 * $(TR $(TD $(PLUSMN)0.0) $(TD +0.0) ) 737 * $(TR $(TD $(PLUSMN)$(INFIN)) $(TD +$(INFIN)) ) 738 * ) 739 */ 740 real fabs(real x) /* intrinsic */ 741 { 742 version(D_InlineAsm_X86) 743 { 744 asm { 745 fld x; 746 fabs; 747 } 748 } 749 else 750 { 751 return core.stdc.math.fabsl(x); 752 } 753 } 754 755 unittest { 756 test(isIdentical(fabs(NaN(0xABC)), NaN(0xABC))); 757 } 758 759 /** 760 * Returns (x * y) + z, rounding only once according to the 761 * current rounding mode. 762 * 763 * BUGS: Not currently implemented - rounds twice. 764 */ 765 real fma(float x, float y, float z) 766 { 767 return (x * y) + z; 768 } 769 770 /** 771 * Calculate cos(y) + i sin(y). 772 * 773 * On x86 CPUs, this is a very efficient operation; 774 * almost twice as fast as calculating sin(y) and cos(y) 775 * seperately, and is the preferred method when both are required. 776 */ 777 deprecated("Use `std.complex.Complex` instead") 778 creal expi(real y) 779 { 780 version(Naked_D_InlineAsm_X86) 781 { 782 asm { 783 fld y; 784 fsincos; 785 fxch ST(1), ST(0); 786 } 787 } 788 else 789 { 790 return core.stdc.math.cosl(y) + core.stdc.math.sinl(y)*1i; 791 } 792 } 793 794 deprecated unittest 795 { 796 test(expi(1.3e5L) == core.stdc.math.cosl(1.3e5L) + core.stdc.math.sinl(1.3e5L) * 1i); 797 test(expi(0.0L) == 1L + 0.0Li); 798 } 799 800 /********************************* 801 * Returns !=0 if e is a NaN. 802 */ 803 804 int isNaN(real x) 805 { 806 alias floatTraits!(real) F; 807 static if (real.mant_dig==53) { // double 808 ulong* p = cast(ulong *)&x; 809 return ((*p & 0x7FF0_0000_0000_0000) == 0x7FF0_0000_0000_0000) && *p & 0x000F_FFFF_FFFF_FFFF; 810 } else static if (real.mant_dig==64) { // real80 811 ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT]; 812 ulong* ps = cast(ulong *)&x; 813 return e == F.EXPMASK && 814 *ps & 0x7FFF_FFFF_FFFF_FFFF; // not infinity 815 } else static if (real.mant_dig==113) { // quadruple 816 ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT]; 817 ulong* ps = cast(ulong *)&x; 818 return e == F.EXPMASK && 819 (ps[MANTISSA_LSB] | (ps[MANTISSA_MSB]& 0x0000_FFFF_FFFF_FFFF))!=0; 820 } else { 821 return x!=x; 822 } 823 } 824 825 826 unittest 827 { 828 test(isNaN(float.nan)); 829 test(isNaN(-double.nan)); 830 test(isNaN(real.nan)); 831 832 test(!isNaN(53.6)); 833 test(!isNaN(float.infinity)); 834 } 835 836 /** 837 * Returns !=0 if x is normalized. 838 * 839 * (Need one for each format because subnormal 840 * floats might be converted to normal reals) 841 */ 842 int isNormal(X)(X x) 843 { 844 alias floatTraits!(X) F; 845 846 static if(real.mant_dig==106) { // doubledouble 847 // doubledouble is normal if the least significant part is normal. 848 return isNormal((cast(double*)&x)[MANTISSA_LSB]); 849 } else { 850 ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT]; 851 return (e != F.EXPMASK && e!=0); 852 } 853 } 854 855 unittest 856 { 857 float f = 3; 858 double d = 500; 859 real e = 10e+48; 860 861 test(isNormal(f)); 862 test(isNormal(d)); 863 test(isNormal(e)); 864 f=d=e=0; 865 test(!isNormal(f)); 866 test(!isNormal(d)); 867 test(!isNormal(e)); 868 test(!isNormal(real.infinity)); 869 test(isNormal(-real.max)); 870 test(!isNormal(real.min_normal/4)); 871 872 } 873 874 /********************************* 875 * Is the binary representation of x identical to y? 876 * 877 * Same as ==, except that positive and negative zero are not identical, 878 * and two $(NAN)s are identical if they have the same 'payload'. 879 */ 880 881 bool isIdentical(real x, real y) 882 { 883 // We're doing a bitwise comparison so the endianness is irrelevant. 884 long* pxs = cast(long *)&x; 885 long* pys = cast(long *)&y; 886 static if (real.mant_dig == 53){ //double 887 return pxs[0] == pys[0]; 888 } else static if (real.mant_dig == 113 || real.mant_dig==106) { 889 // quadruple or doubledouble 890 return pxs[0] == pys[0] && pxs[1] == pys[1]; 891 } else { // real80 892 ushort* pxe = cast(ushort *)&x; 893 ushort* pye = cast(ushort *)&y; 894 return pxe[4] == pye[4] && pxs[0] == pys[0]; 895 } 896 } 897 898 /** ditto */ 899 deprecated("Use `real` instead") 900 bool isIdentical(ireal x, ireal y) { 901 return isIdentical(x.im, y.im); 902 } 903 904 /** ditto */ 905 deprecated("Use `std.complex.Complex` instead") 906 bool isIdentical(creal x, creal y) { 907 return isIdentical(x.re, y.re) && isIdentical(x.im, y.im); 908 } 909 910 unittest { 911 test(isIdentical(0.0, 0.0)); 912 test(!isIdentical(0.0, -0.0)); 913 test(isIdentical(NaN(0xABC), NaN(0xABC))); 914 test(!isIdentical(NaN(0xABC), NaN(218))); 915 test(isIdentical(1.234e56, 1.234e56)); 916 test(isNaN(NaN(0x12345))); 917 } 918 919 deprecated unittest { 920 test(isIdentical(3.1 + NaN(0xDEF) * 1i, 3.1 + NaN(0xDEF)*1i)); 921 test(!isIdentical(3.1+0.0i, 3.1-0i)); 922 test(!isIdentical(0.0i, 2.5e58i)); 923 } 924 925 /********************************* 926 * Is number subnormal? (Also called "denormal".) 927 * Subnormals have a 0 exponent and a 0 most significant significand bit, 928 * but are non-zero. 929 */ 930 931 /* Need one for each format because subnormal floats might 932 * be converted to normal reals. 933 */ 934 935 int isSubnormal(float f) 936 { 937 uint *p = cast(uint *)&f; 938 return (*p & 0x7F80_0000) == 0 && *p & 0x007F_FFFF; 939 } 940 941 unittest 942 { 943 float f = -float.min_normal; 944 test(!isSubnormal(f)); 945 f/=4; 946 test(isSubnormal(f)); 947 } 948 949 /// ditto 950 951 int isSubnormal(double d) 952 { 953 uint *p = cast(uint *)&d; 954 return (p[MANTISSA_MSB] & 0x7FF0_0000) == 0 && (p[MANTISSA_LSB] || p[MANTISSA_MSB] & 0x000F_FFFF); 955 } 956 957 unittest 958 { 959 double f; 960 961 for (f = 1; !isSubnormal(f); f /= 2) 962 test(f != 0); 963 } 964 965 /// ditto 966 967 int isSubnormal(real x) 968 { 969 alias floatTraits!(real) F; 970 static if (real.mant_dig == 53) { // double 971 return isSubnormal(cast(double)x); 972 } else static if (real.mant_dig == 113) { // quadruple 973 ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT]; 974 long* ps = cast(long *)&x; 975 return (e == 0 && (((ps[MANTISSA_LSB]|(ps[MANTISSA_MSB]& 0x0000_FFFF_FFFF_FFFF))) !=0)); 976 } else static if (real.mant_dig==64) { // real80 977 ushort* pe = cast(ushort *)&x; 978 long* ps = cast(long *)&x; 979 980 return (pe[F.EXPPOS_SHORT] & F.EXPMASK) == 0 && *ps > 0; 981 } else { // double double 982 return isSubnormal((cast(double*)&x)[MANTISSA_MSB]); 983 } 984 } 985 986 unittest 987 { 988 real f; 989 990 for (f = 1; !isSubnormal(f); f /= 2) 991 test(f != 0); 992 } 993 994 /********************************* 995 * Return !=0 if x is $(PLUSMN)0. 996 * 997 * Does not affect any floating-point flags 998 */ 999 int isZero(real x) 1000 { 1001 alias floatTraits!(real) F; 1002 static if (real.mant_dig == 53) { // double 1003 return ((*cast(ulong *)&x) & 0x7FFF_FFFF_FFFF_FFFF) == 0; 1004 } else static if (real.mant_dig == 113) { // quadruple 1005 long* ps = cast(long *)&x; 1006 return (ps[MANTISSA_LSB] | (ps[MANTISSA_MSB]& 0x7FFF_FFFF_FFFF_FFFF)) == 0; 1007 } else { // real80 1008 ushort* pe = cast(ushort *)&x; 1009 ulong* ps = cast(ulong *)&x; 1010 return (pe[F.EXPPOS_SHORT] & F.EXPMASK) == 0 && *ps == 0; 1011 } 1012 } 1013 1014 unittest 1015 { 1016 test(isZero(0.0)); 1017 test(isZero(-0.0)); 1018 test(!isZero(2.5)); 1019 test(!isZero(real.min_normal / 1000)); 1020 } 1021 1022 /********************************* 1023 * Return !=0 if e is $(PLUSMNINF);. 1024 */ 1025 1026 int isInfinity(real x) 1027 { 1028 alias floatTraits!(real) F; 1029 static if (real.mant_dig == 53) { // double 1030 return ((*cast(ulong *)&x) & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FF8_0000_0000_0000; 1031 } else static if(real.mant_dig == 106) { //doubledouble 1032 return (((cast(ulong *)&x)[MANTISSA_MSB]) & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FF8_0000_0000_0000; 1033 } else static if (real.mant_dig == 113) { // quadruple 1034 long* ps = cast(long *)&x; 1035 return (ps[MANTISSA_LSB] == 0) 1036 && (ps[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FFF_0000_0000_0000; 1037 } else { // real80 1038 ushort e = cast(ushort)(F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT]); 1039 ulong* ps = cast(ulong *)&x; 1040 1041 return e == F.EXPMASK && *ps == 0x8000_0000_0000_0000; 1042 } 1043 } 1044 1045 unittest 1046 { 1047 test(isInfinity(float.infinity)); 1048 test(!isInfinity(float.nan)); 1049 test(isInfinity(double.infinity)); 1050 test(isInfinity(-real.infinity)); 1051 1052 test(isInfinity(-1.0 / 0.0)); 1053 } 1054 1055 /** 1056 * Calculate the next largest floating point value after x. 1057 * 1058 * Return the least number greater than x that is representable as a real; 1059 * thus, it gives the next point on the IEEE number line. 1060 * 1061 * $(TABLE_SV 1062 * $(SVH x, nextUp(x) ) 1063 * $(SV -$(INFIN), -real.max ) 1064 * $(SV $(PLUSMN)0.0, real.min_normal*real.epsilon ) 1065 * $(SV real.max, $(INFIN) ) 1066 * $(SV $(INFIN), $(INFIN) ) 1067 * $(SV $(NAN), $(NAN) ) 1068 * ) 1069 * 1070 * Remarks: 1071 * This function is included in the IEEE 754-2008 standard. 1072 * 1073 * nextDoubleUp and nextFloatUp are the corresponding functions for 1074 * the IEEE double and IEEE float number lines. 1075 */ 1076 real nextUp(real x) 1077 { 1078 alias floatTraits!(real) F; 1079 static if (real.mant_dig == 53) { // double 1080 return nextDoubleUp(x); 1081 } else static if(real.mant_dig==113) { // quadruple 1082 ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT]; 1083 if (e == F.EXPMASK) { // NaN or Infinity 1084 if (x == -real.infinity) return -real.max; 1085 return x; // +Inf and NaN are unchanged. 1086 } 1087 ulong* ps = cast(ulong *)&e; 1088 if (ps[MANTISSA_LSB] & 0x8000_0000_0000_0000) { // Negative number 1089 if (ps[MANTISSA_LSB]==0 && ps[MANTISSA_MSB] == 0x8000_0000_0000_0000) { // it was negative zero 1090 ps[MANTISSA_LSB] = 0x0000_0000_0000_0001; // change to smallest subnormal 1091 ps[MANTISSA_MSB] = 0; 1092 return x; 1093 } 1094 --*ps; 1095 if (ps[MANTISSA_LSB]==0) --ps[MANTISSA_MSB]; 1096 } else { // Positive number 1097 ++ps[MANTISSA_LSB]; 1098 if (ps[MANTISSA_LSB]==0) ++ps[MANTISSA_MSB]; 1099 } 1100 return x; 1101 1102 } else static if(real.mant_dig==64){ // real80 1103 // For 80-bit reals, the "implied bit" is a nuisance... 1104 ushort *pe = cast(ushort *)&x; 1105 ulong *ps = cast(ulong *)&x; 1106 1107 if ((pe[F.EXPPOS_SHORT] & F.EXPMASK) == F.EXPMASK) { 1108 // First, deal with NANs and infinity 1109 if (x == -real.infinity) return -real.max; 1110 return x; // +Inf and NaN are unchanged. 1111 } 1112 if (pe[F.EXPPOS_SHORT] & 0x8000) { // Negative number -- need to decrease the significand 1113 --*ps; 1114 // Need to mask with 0x7FFF... so subnormals are treated correctly. 1115 if ((*ps & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FFF_FFFF_FFFF_FFFF) { 1116 if (pe[F.EXPPOS_SHORT] == 0x8000) { // it was negative zero 1117 *ps = 1; 1118 pe[F.EXPPOS_SHORT] = 0; // smallest subnormal. 1119 return x; 1120 } 1121 --pe[F.EXPPOS_SHORT]; 1122 if (pe[F.EXPPOS_SHORT] == 0x8000) { 1123 return x; // it's become a subnormal, implied bit stays low. 1124 } 1125 *ps = 0xFFFF_FFFF_FFFF_FFFF; // set the implied bit 1126 return x; 1127 } 1128 return x; 1129 } else { 1130 // Positive number -- need to increase the significand. 1131 // Works automatically for positive zero. 1132 ++*ps; 1133 if ((*ps & 0x7FFF_FFFF_FFFF_FFFF) == 0) { 1134 // change in exponent 1135 ++pe[F.EXPPOS_SHORT]; 1136 *ps = 0x8000_0000_0000_0000; // set the high bit 1137 } 1138 } 1139 return x; 1140 } else { // doubledouble 1141 static assert(0, "Not implemented"); 1142 } 1143 } 1144 1145 /** ditto */ 1146 double nextDoubleUp(double x) 1147 { 1148 ulong *ps = cast(ulong *)&x; 1149 1150 if ((*ps & 0x7FF0_0000_0000_0000) == 0x7FF0_0000_0000_0000) { 1151 // First, deal with NANs and infinity 1152 if (x == -x.infinity) return -x.max; 1153 return x; // +INF and NAN are unchanged. 1154 } 1155 if (*ps & 0x8000_0000_0000_0000) { // Negative number 1156 if (*ps == 0x8000_0000_0000_0000) { // it was negative zero 1157 *ps = 0x0000_0000_0000_0001; // change to smallest subnormal 1158 return x; 1159 } 1160 --*ps; 1161 } else { // Positive number 1162 ++*ps; 1163 } 1164 return x; 1165 } 1166 1167 /** ditto */ 1168 float nextFloatUp(float x) 1169 { 1170 uint *ps = cast(uint *)&x; 1171 1172 if ((*ps & 0x7F80_0000) == 0x7F80_0000) { 1173 // First, deal with NANs and infinity 1174 if (x == -x.infinity) return -x.max; 1175 return x; // +INF and NAN are unchanged. 1176 } 1177 if (*ps & 0x8000_0000) { // Negative number 1178 if (*ps == 0x8000_0000) { // it was negative zero 1179 *ps = 0x0000_0001; // change to smallest subnormal 1180 return x; 1181 } 1182 --*ps; 1183 } else { // Positive number 1184 ++*ps; 1185 } 1186 return x; 1187 } 1188 1189 unittest { 1190 static if (real.mant_dig == 64) { 1191 1192 // Tests for 80-bit reals 1193 1194 test(isIdentical(nextUp(NaN(0xABC)), NaN(0xABC))); 1195 // negative numbers 1196 test( nextUp(-real.infinity) == -real.max ); 1197 test( nextUp(-1-real.epsilon) == -1.0 ); 1198 test( nextUp(-2) == -2.0 + real.epsilon); 1199 // denormals and zero 1200 test( nextUp(-real.min_normal) == -real.min_normal*(1-real.epsilon) ); 1201 test( nextUp(-real.min_normal*(1-real.epsilon) == -real.min_normal*(1-2*real.epsilon)) ); 1202 test( isIdentical(-0.0L, nextUp(-real.min_normal*real.epsilon)) ); 1203 test( nextUp(-0.0) == real.min_normal*real.epsilon ); 1204 test( nextUp(0.0) == real.min_normal*real.epsilon ); 1205 test( nextUp(real.min_normal*(1-real.epsilon)) == real.min_normal ); 1206 test( nextUp(real.min_normal) == real.min_normal*(1+real.epsilon) ); 1207 // positive numbers 1208 test( nextUp(1) == 1.0 + real.epsilon ); 1209 test( nextUp(2.0-real.epsilon) == 2.0 ); 1210 test( nextUp(real.max) == real.infinity ); 1211 test( nextUp(real.infinity)==real.infinity ); 1212 } 1213 1214 test(isIdentical(nextDoubleUp(NaN(0xABC)), NaN(0xABC))); 1215 // negative numbers 1216 test( nextDoubleUp(-double.infinity) == -double.max ); 1217 test( nextDoubleUp(-1-double.epsilon) == -1.0 ); 1218 test( nextDoubleUp(-2) == -2.0 + double.epsilon); 1219 // denormals and zero 1220 1221 test( nextDoubleUp(-double.min_normal) == -double.min_normal*(1-double.epsilon) ); 1222 test( nextDoubleUp(-double.min_normal*(1-double.epsilon) == -double.min_normal*(1-2*double.epsilon)) ); 1223 test( isIdentical(-0.0, nextDoubleUp(-double.min_normal*double.epsilon)) ); 1224 test( nextDoubleUp(0.0) == double.min_normal*double.epsilon ); 1225 test( nextDoubleUp(-0.0) == double.min_normal*double.epsilon ); 1226 test( nextDoubleUp(double.min_normal*(1-double.epsilon)) == double.min_normal ); 1227 test( nextDoubleUp(double.min_normal) == double.min_normal*(1+double.epsilon) ); 1228 // positive numbers 1229 test( nextDoubleUp(1) == 1.0 + double.epsilon ); 1230 test( nextDoubleUp(2.0-double.epsilon) == 2.0 ); 1231 test( nextDoubleUp(double.max) == double.infinity ); 1232 1233 test(isIdentical(nextFloatUp(NaN(0xABC)), NaN(0xABC))); 1234 test( nextFloatUp(-float.min_normal) == -float.min_normal*(1-float.epsilon) ); 1235 test( nextFloatUp(1.0) == 1.0+float.epsilon ); 1236 test( nextFloatUp(-0.0) == float.min_normal*float.epsilon); 1237 test( nextFloatUp(float.infinity)==float.infinity ); 1238 1239 test(nextDown(1.0+real.epsilon)==1.0); 1240 test(nextDoubleDown(1.0+double.epsilon)==1.0); 1241 test(nextFloatDown(1.0+float.epsilon)==1.0); 1242 test(nextafter(1.0+real.epsilon, -real.infinity)==1.0); 1243 } 1244 1245 package { 1246 /** Reduces the magnitude of x, so the bits in the lower half of its significand 1247 * are all zero. Returns the amount which needs to be added to x to restore its 1248 * initial value; this amount will also have zeros in all bits in the lower half 1249 * of its significand. 1250 */ 1251 X splitSignificand(X)(ref X x) 1252 { 1253 if (isNaN(x) || isInfinity(x)) return 0; // don't change NaN or infinity 1254 X y = x; // copy the original value 1255 static if (X.mant_dig == float.mant_dig) { 1256 uint *ps = cast(uint *)&x; 1257 (*ps) &= 0xFFFF_FC00; 1258 } else static if (X.mant_dig == 53) { 1259 ulong *ps = cast(ulong *)&x; 1260 (*ps) &= 0xFFFF_FFFF_FC00_0000L; 1261 } else static if (X.mant_dig == 64){ // 80-bit real 1262 // An x87 real80 has 63 bits, because the 'implied' bit is stored explicitly. 1263 // This is annoying, because it means the significand cannot be 1264 // precisely halved. Instead, we split it into 31+32 bits. 1265 ulong *ps = cast(ulong *)&x; 1266 (*ps) &= 0xFFFF_FFFF_0000_0000L; 1267 } else static if (X.mant_dig==113) { // quadruple 1268 ulong *ps = cast(ulong *)&x; 1269 ps[MANTISSA_LSB] &= 0xFF00_0000_0000_0000L; 1270 } 1271 //else static assert(0, "Unsupported size"); 1272 1273 return y - x; 1274 } 1275 1276 unittest { 1277 double x = -0x1.234_567A_AAAA_AAp+250; 1278 double y = splitSignificand(x); 1279 test(x == -0x1.234_5678p+250); 1280 test(y == -0x0.000_000A_AAAA_A8p+248); 1281 test(x + y == -0x1.234_567A_AAAA_AAp+250); 1282 } 1283 } 1284 1285 /** 1286 * Calculate the next smallest floating point value before x. 1287 * 1288 * Return the greatest number less than x that is representable as a real; 1289 * thus, it gives the previous point on the IEEE number line. 1290 * 1291 * $(TABLE_SV 1292 * $(SVH x, nextDown(x) ) 1293 * $(SV $(INFIN), real.max ) 1294 * $(SV $(PLUSMN)0.0, -real.min_normal*real.epsilon ) 1295 * $(SV -real.max, -$(INFIN) ) 1296 * $(SV -$(INFIN), -$(INFIN) ) 1297 * $(SV $(NAN), $(NAN) ) 1298 * ) 1299 * 1300 * Remarks: 1301 * This function is included in the IEEE 754-2008 standard. 1302 * 1303 * nextDoubleDown and nextFloatDown are the corresponding functions for 1304 * the IEEE double and IEEE float number lines. 1305 */ 1306 real nextDown(real x) 1307 { 1308 return -nextUp(-x); 1309 } 1310 1311 /** ditto */ 1312 double nextDoubleDown(double x) 1313 { 1314 return -nextDoubleUp(-x); 1315 } 1316 1317 /** ditto */ 1318 float nextFloatDown(float x) 1319 { 1320 return -nextFloatUp(-x); 1321 } 1322 1323 unittest { 1324 test( nextDown(1.0 + real.epsilon) == 1.0); 1325 } 1326 1327 /** 1328 * Calculates the next representable value after x in the direction of y. 1329 * 1330 * If y > x, the result will be the next largest floating-point value; 1331 * if y < x, the result will be the next smallest value. 1332 * If x == y, the result is y. 1333 * 1334 * Remarks: 1335 * This function is not generally very useful; it's almost always better to use 1336 * the faster functions nextUp() or nextDown() instead. 1337 * 1338 * IEEE 754 requirements not implemented: 1339 * The FE_INEXACT and FE_OVERFLOW exceptions will be raised if x is finite and 1340 * the function result is infinite. The FE_INEXACT and FE_UNDERFLOW 1341 * exceptions will be raised if the function value is subnormal, and x is 1342 * not equal to y. 1343 */ 1344 real nextafter(real x, real y) 1345 { 1346 if (x==y) return y; 1347 return (y>x) ? nextUp(x) : nextDown(x); 1348 } 1349 1350 /************************************** 1351 * To what precision is x equal to y? 1352 * 1353 * Returns: the number of significand bits which are equal in x and y. 1354 * eg, 0x1.F8p+60 and 0x1.F1p+60 are equal to 5 bits of precision. 1355 * 1356 * $(TABLE_SV 1357 * $(SVH3 x, y, feqrel(x, y) ) 1358 * $(SV3 x, x, typeof(x).mant_dig ) 1359 * $(SV3 x, $(GT)= 2*x, 0 ) 1360 * $(SV3 x, $(LE)= x/2, 0 ) 1361 * $(SV3 $(NAN), any, 0 ) 1362 * $(SV3 any, $(NAN), 0 ) 1363 * ) 1364 * 1365 * Remarks: 1366 * This is a very fast operation, suitable for use in speed-critical code. 1367 */ 1368 int feqrel(X)(X x, X y) 1369 { 1370 /* Public Domain. Author: Don Clugston, 18 Aug 2005. 1371 */ 1372 static assert(is(X==real) || is(X==double) || is(X==float), "Only float, double, and real are supported by feqrel"); 1373 1374 static if (X.mant_dig == 106) { // doubledouble. 1375 int a = feqrel(cast(double*)(&x)[MANTISSA_MSB], cast(double*)(&y)[MANTISSA_MSB]); 1376 if (a != double.mant_dig) return a; 1377 return double.mant_dig + feqrel(cast(double*)(&x)[MANTISSA_LSB], cast(double*)(&y)[MANTISSA_LSB]); 1378 } else static if (X.mant_dig==64 || X.mant_dig==113 1379 || X.mant_dig==53 || X.mant_dig == 24) { 1380 if (x == y) return X.mant_dig; // ensure diff!=0, cope with INF. 1381 1382 X diff = fabs(x - y); 1383 1384 ushort *pa = cast(ushort *)(&x); 1385 ushort *pb = cast(ushort *)(&y); 1386 ushort *pd = cast(ushort *)(&diff); 1387 1388 alias floatTraits!(X) F; 1389 1390 // The difference in abs(exponent) between x or y and abs(x-y) 1391 // is equal to the number of significand bits of x which are 1392 // equal to y. If negative, x and y have different exponents. 1393 // If positive, x and y are equal to 'bitsdiff' bits. 1394 // AND with 0x7FFF to form the absolute value. 1395 // To avoid out-by-1 errors, we subtract 1 so it rounds down 1396 // if the exponents were different. This means 'bitsdiff' is 1397 // always 1 lower than we want, except that if bitsdiff==0, 1398 // they could have 0 or 1 bits in common. 1399 1400 static if (X.mant_dig==64 || X.mant_dig==113) { // real80 or quadruple 1401 int bitsdiff = ( ((pa[F.EXPPOS_SHORT] & F.EXPMASK) 1402 + (pb[F.EXPPOS_SHORT]& F.EXPMASK) 1403 - (0x8000-F.EXPMASK))>>1) 1404 - pd[F.EXPPOS_SHORT]; 1405 } else static if (X.mant_dig==53) { // double 1406 int bitsdiff = (( ((pa[F.EXPPOS_SHORT] & F.EXPMASK) 1407 + (pb[F.EXPPOS_SHORT] & F.EXPMASK) 1408 - (0x8000-F.EXPMASK))>>1) 1409 - (pd[F.EXPPOS_SHORT] & F.EXPMASK))>>4; 1410 } else static if (X.mant_dig == 24) { // float 1411 int bitsdiff = (( ((pa[F.EXPPOS_SHORT] & F.EXPMASK) 1412 + (pb[F.EXPPOS_SHORT] & F.EXPMASK) 1413 - (0x8000-F.EXPMASK))>>1) 1414 - (pd[F.EXPPOS_SHORT] & F.EXPMASK))>>7; 1415 } 1416 if (pd[F.EXPPOS_SHORT] == 0) 1417 { // Difference is denormal 1418 // For denormals, we need to add the number of zeros that 1419 // lie at the start of diff's significand. 1420 // We do this by multiplying by 2^real.mant_dig 1421 diff *= F.RECIP_EPSILON; 1422 return bitsdiff + X.mant_dig - pd[F.EXPPOS_SHORT]; 1423 } 1424 1425 if (bitsdiff > 0) 1426 return bitsdiff + 1; // add the 1 we subtracted before 1427 1428 // Avoid out-by-1 errors when factor is almost 2. 1429 static if (X.mant_dig==64 || X.mant_dig==113) { // real80 or quadruple 1430 return (bitsdiff == 0) ? (pa[F.EXPPOS_SHORT] == pb[F.EXPPOS_SHORT]) : 0; 1431 } else static if (X.mant_dig == 53 || X.mant_dig == 24) { // double or float 1432 return (bitsdiff == 0 && !((pa[F.EXPPOS_SHORT] ^ pb[F.EXPPOS_SHORT])& F.EXPMASK)) ? 1 : 0; 1433 } 1434 } else { 1435 static assert(0, "Unsupported"); 1436 } 1437 } 1438 1439 unittest 1440 { 1441 // Exact equality 1442 test(feqrel(real.max,real.max)==real.mant_dig); 1443 test(feqrel(0.0L,0.0L)==real.mant_dig); 1444 test(feqrel(7.1824L,7.1824L)==real.mant_dig); 1445 test(feqrel(real.infinity,real.infinity)==real.mant_dig); 1446 1447 // a few bits away from exact equality 1448 real w=1; 1449 for (int i=1; i<real.mant_dig-1; ++i) { 1450 test(feqrel(1+w*real.epsilon,1.0L)==real.mant_dig-i); 1451 test(feqrel(1-w*real.epsilon,1.0L)==real.mant_dig-i); 1452 test(feqrel(1.0L,1+(w-1)*real.epsilon)==real.mant_dig-i+1); 1453 w*=2; 1454 } 1455 test(feqrel(1.5+real.epsilon,1.5L)==real.mant_dig-1); 1456 test(feqrel(1.5-real.epsilon,1.5L)==real.mant_dig-1); 1457 test(feqrel(1.5-real.epsilon,1.5+real.epsilon)==real.mant_dig-2); 1458 1459 test(feqrel(real.min_normal/8,real.min_normal/17)==3); 1460 1461 // Numbers that are close 1462 test(feqrel(0x1.Bp+84, 0x1.B8p+84)==5); 1463 test(feqrel(0x1.8p+10, 0x1.Cp+10)==2); 1464 test(feqrel(1.5*(1-real.epsilon), 1.0L)==2); 1465 test(feqrel(1.5, 1.0)==1); 1466 test(feqrel(2*(1-real.epsilon), 1.0L)==1); 1467 1468 // Factors of 2 1469 test(feqrel(real.max,real.infinity)==0); 1470 test(feqrel(2*(1-real.epsilon), 1.0L)==1); 1471 test(feqrel(1.0, 2.0)==0); 1472 test(feqrel(4.0, 1.0)==0); 1473 1474 // Extreme inequality 1475 test(feqrel(real.nan,real.nan)==0); 1476 test(feqrel(0.0L,-real.nan)==0); 1477 test(feqrel(real.nan,real.infinity)==0); 1478 test(feqrel(real.infinity,-real.infinity)==0); 1479 test(feqrel(-real.max,real.infinity)==0); 1480 test(feqrel(real.max,-real.max)==0); 1481 1482 // floats 1483 test(feqrel(2.1f, 2.1f)==float.mant_dig); 1484 test(feqrel(1.5f, 1.0f)==1); 1485 } 1486 1487 /********************************* 1488 * Return 1 if sign bit of e is set, 0 if not. 1489 */ 1490 1491 int signbit(real x) 1492 { 1493 return ((cast(ubyte *)&x)[floatTraits!(real).SIGNPOS_BYTE] & 0x80) != 0; 1494 } 1495 1496 unittest 1497 { 1498 test(!signbit(float.nan)); 1499 test(signbit(-float.nan)); 1500 test(!signbit(168.1234)); 1501 test(signbit(-168.1234)); 1502 test(!signbit(0.0)); 1503 test(signbit(-0.0)); 1504 } 1505 1506 1507 /********************************* 1508 * Return a value composed of to with from's sign bit. 1509 */ 1510 1511 real copysign(real to, real from) 1512 { 1513 ubyte* pto = cast(ubyte *)&to; 1514 ubyte* pfrom = cast(ubyte *)&from; 1515 1516 alias floatTraits!(real) F; 1517 pto[F.SIGNPOS_BYTE] &= 0x7F; 1518 pto[F.SIGNPOS_BYTE] |= pfrom[F.SIGNPOS_BYTE] & 0x80; 1519 return to; 1520 } 1521 1522 unittest 1523 { 1524 real e; 1525 1526 e = copysign(21, 23.8); 1527 test(e == 21); 1528 1529 e = copysign(-21, 23.8); 1530 test(e == 21); 1531 1532 e = copysign(21, -23.8); 1533 test(e == -21); 1534 1535 e = copysign(-21, -23.8); 1536 test(e == -21); 1537 1538 e = copysign(real.nan, -23.8); 1539 test(isNaN(e) && signbit(e)); 1540 } 1541 1542 /** Return the value that lies halfway between x and y on the IEEE number line. 1543 * 1544 * Formally, the result is the arithmetic mean of the binary significands of x 1545 * and y, multiplied by the geometric mean of the binary exponents of x and y. 1546 * x and y must have the same sign, and must not be NaN. 1547 * Note: this function is useful for ensuring O(log n) behaviour in algorithms 1548 * involving a 'binary chop'. 1549 * 1550 * Special cases: 1551 * If x and y are within a factor of 2, (ie, feqrel(x, y) > 0), the return value 1552 * is the arithmetic mean (x + y) / 2. 1553 * If x and y are even powers of 2, the return value is the geometric mean, 1554 * ieeeMean(x, y) = sqrt(x * y). 1555 * 1556 */ 1557 T ieeeMean(T)(T x, T y) 1558 { 1559 // both x and y must have the same sign, and must not be NaN. 1560 verify(signbit(x) == signbit(y)); 1561 verify(!tsm.isnan(x) && !tsm.isnan(y)); 1562 1563 // Runtime behaviour for contract violation: 1564 // If signs are opposite, or one is a NaN, return 0. 1565 if (!((x>=0 && y>=0) || (x<=0 && y<=0))) return 0.0; 1566 1567 // The implementation is simple: cast x and y to integers, 1568 // average them (avoiding overflow), and cast the result back to a floating-point number. 1569 1570 alias floatTraits!(real) F; 1571 T u; 1572 static if (T.mant_dig==64) { // real80 1573 // There's slight additional complexity because they are actually 1574 // 79-bit reals... 1575 ushort *ue = cast(ushort *)&u; 1576 ulong *ul = cast(ulong *)&u; 1577 ushort *xe = cast(ushort *)&x; 1578 ulong *xl = cast(ulong *)&x; 1579 ushort *ye = cast(ushort *)&y; 1580 ulong *yl = cast(ulong *)&y; 1581 // Ignore the useless implicit bit. (Bonus: this prevents overflows) 1582 ulong m = ((*xl) & 0x7FFF_FFFF_FFFF_FFFFL) + ((*yl) & 0x7FFF_FFFF_FFFF_FFFFL); 1583 1584 ushort e = cast(ushort)((xe[F.EXPPOS_SHORT] & 0x7FFF) + (ye[F.EXPPOS_SHORT] & 0x7FFF)); 1585 if (m & 0x8000_0000_0000_0000L) { 1586 ++e; 1587 m &= 0x7FFF_FFFF_FFFF_FFFFL; 1588 } 1589 // Now do a multi-byte right shift 1590 uint c = e & 1; // carry 1591 e >>= 1; 1592 m >>>= 1; 1593 if (c) m |= 0x4000_0000_0000_0000L; // shift carry into significand 1594 if (e) *ul = m | 0x8000_0000_0000_0000L; // set implicit bit... 1595 else *ul = m; // ... unless exponent is 0 (denormal or zero). 1596 ue[4]= e | (xe[F.EXPPOS_SHORT]& F.SIGNMASK); // restore sign bit 1597 } else static if(T.mant_dig == 113) { //quadruple 1598 // This would be trivial if 'ucent' were implemented... 1599 ulong *ul = cast(ulong *)&u; 1600 ulong *xl = cast(ulong *)&x; 1601 ulong *yl = cast(ulong *)&y; 1602 // Multi-byte add, then multi-byte right shift. 1603 ulong mh = ((xl[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFFL) 1604 + (yl[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFFL)); 1605 // Discard the lowest bit (to avoid overflow) 1606 ulong ml = (xl[MANTISSA_LSB]>>>1) + (yl[MANTISSA_LSB]>>>1); 1607 // add the lowest bit back in, if necessary. 1608 if (xl[MANTISSA_LSB] & yl[MANTISSA_LSB] & 1) { 1609 ++ml; 1610 if (ml==0) ++mh; 1611 } 1612 mh >>>=1; 1613 ul[MANTISSA_MSB] = mh | (xl[MANTISSA_MSB] & 0x8000_0000_0000_0000); 1614 ul[MANTISSA_LSB] = ml; 1615 } else static if (T.mant_dig == double.mant_dig) { 1616 ulong *ul = cast(ulong *)&u; 1617 ulong *xl = cast(ulong *)&x; 1618 ulong *yl = cast(ulong *)&y; 1619 ulong m = (((*xl) & 0x7FFF_FFFF_FFFF_FFFFL) + ((*yl) & 0x7FFF_FFFF_FFFF_FFFFL)) >>> 1; 1620 m |= ((*xl) & 0x8000_0000_0000_0000L); 1621 *ul = m; 1622 } else static if (T.mant_dig == float.mant_dig) { 1623 uint *ul = cast(uint *)&u; 1624 uint *xl = cast(uint *)&x; 1625 uint *yl = cast(uint *)&y; 1626 uint m = (((*xl) & 0x7FFF_FFFF) + ((*yl) & 0x7FFF_FFFF)) >>> 1; 1627 m |= ((*xl) & 0x8000_0000); 1628 *ul = m; 1629 } else { 1630 static assert(0, "Not implemented"); 1631 } 1632 return u; 1633 } 1634 1635 unittest { 1636 test(ieeeMean(-0.0,-1e-20)<0); 1637 test(ieeeMean(0.0,1e-20)>0); 1638 1639 test(ieeeMean(1.0L,4.0L)==2L); 1640 test(ieeeMean(2.0*1.013,8.0*1.013)==4*1.013); 1641 test(ieeeMean(-1.0L,-4.0L)==-2L); 1642 test(ieeeMean(-1.0,-4.0)==-2); 1643 test(ieeeMean(-1.0f,-4.0f)==-2f); 1644 test(ieeeMean(-1.0,-2.0)==-1.5); 1645 test(ieeeMean(-1*(1+8*real.epsilon),-2*(1+8*real.epsilon))==-1.5*(1+5*real.epsilon)); 1646 test(ieeeMean(0x1p60,0x1p-10)==0x1p25); 1647 static if (real.mant_dig==64) { // x87, 80-bit reals 1648 test(ieeeMean(1.0L,real.infinity)==0x1p8192L); 1649 test(ieeeMean(0.0L,real.infinity)==1.5); 1650 } 1651 test(ieeeMean(0.5*real.min_normal*(1-4*real.epsilon),0.5*real.min_normal)==0.5*real.min_normal*(1-2*real.epsilon)); 1652 } 1653 1654 // Functions for NaN payloads 1655 /* 1656 * A 'payload' can be stored in the significand of a $(NAN). One bit is required 1657 * to distinguish between a quiet and a signalling $(NAN). This leaves 22 bits 1658 * of payload for a float; 51 bits for a double; 62 bits for an 80-bit real; 1659 * and 111 bits for a 128-bit quad. 1660 */ 1661 /** 1662 * Create a $(NAN), storing an integer inside the payload. 1663 * 1664 * For 80-bit or 128-bit reals, the largest possible payload is 0x3FFF_FFFF_FFFF_FFFF. 1665 * For doubles, it is 0x3_FFFF_FFFF_FFFF. 1666 * For floats, it is 0x3F_FFFF. 1667 */ 1668 real NaN(ulong payload) 1669 { 1670 static if (real.mant_dig == 64) { //real80 1671 ulong v = 3; // implied bit = 1, quiet bit = 1 1672 } else { 1673 ulong v = 2; // no implied bit. quiet bit = 1 1674 } 1675 1676 ulong a = payload; 1677 1678 // 22 Float bits 1679 ulong w = a & 0x3F_FFFF; 1680 a -= w; 1681 1682 v <<=22; 1683 v |= w; 1684 a >>=22; 1685 1686 // 29 Double bits 1687 v <<=29; 1688 w = a & 0xFFF_FFFF; 1689 v |= w; 1690 a -= w; 1691 a >>=29; 1692 1693 static if (real.mant_dig == 53) { // double 1694 v |=0x7FF0_0000_0000_0000; 1695 real x; 1696 * cast(ulong *)(&x) = v; 1697 return x; 1698 } else { 1699 v <<=11; 1700 a &= 0x7FF; 1701 v |= a; 1702 real x = real.nan; 1703 // Extended real bits 1704 static if (real.mant_dig==113) { //quadruple 1705 v<<=1; // there's no implicit bit 1706 version(LittleEndian) { 1707 *cast(ulong*)(6+cast(ubyte*)(&x)) = v; 1708 } else { 1709 *cast(ulong*)(2+cast(ubyte*)(&x)) = v; 1710 } 1711 } else { // real80 1712 * cast(ulong *)(&x) = v; 1713 } 1714 return x; 1715 } 1716 } 1717 1718 /** 1719 * Extract an integral payload from a $(NAN). 1720 * 1721 * Returns: 1722 * the integer payload as a ulong. 1723 * 1724 * For 80-bit or 128-bit reals, the largest possible payload is 0x3FFF_FFFF_FFFF_FFFF. 1725 * For doubles, it is 0x3_FFFF_FFFF_FFFF. 1726 * For floats, it is 0x3F_FFFF. 1727 */ 1728 ulong getNaNPayload(real x) 1729 { 1730 verify(!!isNaN(x)); 1731 // x_ptr is needed to create a separate alias to x 1732 // which the optimizer cannot see through 1733 // this will prevent an optimization which 1734 // will cause an ice in newer dmd versions 1735 auto x_ptr = &x; 1736 1737 static if (real.mant_dig == 53) { 1738 ulong m = *cast(ulong *)(x_ptr); 1739 // Make it look like an 80-bit significand. 1740 // Skip exponent, and quiet bit 1741 m &= 0x0007_FFFF_FFFF_FFFF; 1742 m <<= 10; 1743 } else static if (real.mant_dig==113) { // quadruple 1744 version(LittleEndian) { 1745 ulong m = *cast(ulong*)(6+cast(ubyte*)(x_ptr)); 1746 } else { 1747 ulong m = *cast(ulong*)(2+cast(ubyte*)(x_ptr)); 1748 } 1749 m>>=1; // there's no implicit bit 1750 } else { 1751 ulong m = *cast(ulong *)(x_ptr); 1752 } 1753 // ignore implicit bit and quiet bit 1754 ulong f = m & 0x3FFF_FF00_0000_0000L; 1755 ulong w = f >>> 40; 1756 w |= (m & 0x00FF_FFFF_F800L) << (22 - 11); 1757 w |= (m & 0x7FF) << 51; 1758 return w; 1759 } 1760 1761 unittest { 1762 real nan4 = NaN(0x789_ABCD_EF12_3456); 1763 static if (real.mant_dig == 64 || real.mant_dig==113) { 1764 test (getNaNPayload(nan4) == 0x789_ABCD_EF12_3456); 1765 } else { 1766 test (getNaNPayload(nan4) == 0x1_ABCD_EF12_3456); 1767 } 1768 double nan5 = nan4; 1769 // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743 1770 //assert (getNaNPayload(nan5) == 0x1_ABCD_EF12_3456); 1771 float nan6 = nan4; 1772 // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743 1773 //assert (getNaNPayload(nan6) == 0x12_3456); 1774 nan4 = NaN(0xFABCD); 1775 // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743 1776 //assert (getNaNPayload(nan4) == 0xFABCD); 1777 nan6 = nan4; 1778 // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743 1779 //assert (getNaNPayload(nan6) == 0xFABCD); 1780 nan5 = NaN(0x100_0000_0000_3456); 1781 // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743 1782 //assert(getNaNPayload(nan5) == 0x0000_0000_3456); 1783 }