ocean.math.IEEE source code

1 /**
2  * Low-level Mathematical Functions which take advantage of the IEEE754 ABI.
3  *
4  * Copyright:
5  *     Portions Copyright (C) 2001-2005 Digital Mars.
6  *     Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
7  *     All rights reserved.
8  *
9  * License:
10  *     Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
11  *     See LICENSE_TANGO.txt for details.
12  *
13  * Authors: Don Clugston, Walter Bright, Sean Kelly
14  *
15  */
16 /**
17  * Macros:
18  *
19  *  TABLE_SV = <table border=1 cellpadding=4 cellspacing=0>
20  *      <caption>Special Values</caption>
21  *      $0</table>
22  *  SVH = $(TR $(TH $1) $(TH $2))
23  *  SV  = $(TR $(TD $1) $(TD $2))
24  *  SVH3 = $(TR $(TH $1) $(TH $2) $(TH $3))
25  *  SV3  = $(TR $(TD $1) $(TD $2) $(TD $3))
26  *  NAN = $(RED NAN)
27  *  PLUSMN = &plusmn;
28  *  INFIN = &infin;
29  *  PLUSMNINF = &plusmn;&infin;
30  *  PI = &pi;
31  *  LT = &lt;
32  *  GT = &gt;
33  *  SQRT = &radix;
34  *  HALF = &frac12;
35  */
36 module ocean.math.IEEE;
37 
38 import ocean.meta.types.Qualifiers;
39 import ocean.core.Verify;
40 
41 version (unittest) import ocean.core.Test;
42 
43 version(TangoNoAsm) {
44 
45 } else version(D_InlineAsm_X86) {
46     version = Naked_D_InlineAsm_X86;
47 }
48 
49 version (X86){
50     version = X86_Any;
51 }
52 
53 version (X86_64){
54     version = X86_Any;
55 }
56 
57 version (Naked_D_InlineAsm_X86) {
58     // Don't include this extra dependency unless we need to.
59     version (unittest) {
60         static import core.stdc.math;
61     }
62 } else {
63     // Needed for cos(), sin(), tan() on GNU.
64     static import core.stdc.math;
65 }
66 static import tsm = core.stdc.math;
67 
68 // Standard Tango NaN payloads.
69 // NOTE: These values may change in future Tango releases
70 // The lowest three bits indicate the cause of the NaN:
71 // 0 = error other than those listed below:
72 // 1 = domain error
73 // 2 = singularity
74 // 3 = range
75 // 4-7 = reserved.
76 enum TANGO_NAN {
77     // General errors
78     DOMAIN_ERROR = 0x0101,
79     SINGULARITY  = 0x0102,
80     RANGE_ERROR  = 0x0103,
81     // NaNs created by functions in the basic library
82     TAN_DOMAIN   = 0x1001,
83     POW_DOMAIN   = 0x1021,
84     GAMMA_DOMAIN = 0x1101,
85     GAMMA_POLE   = 0x1102,
86     SGNGAMMA     = 0x1112,
87     BETA_DOMAIN  = 0x1131,
88     // NaNs from statistical functions
89     NORMALDISTRIBUTION_INV_DOMAIN = 0x2001,
90     STUDENTSDDISTRIBUTION_DOMAIN  = 0x2011
91 }
92 
93 private:
94 /* Most of the functions depend on the format of the largest IEEE floating-point type.
95  * These code will differ depending on whether 'real' is 64, 80, or 128 bits,
96  * and whether it is a big-endian or little-endian architecture.
97  * Only five 'real' ABIs are currently supported:
98  * 64 bit Big-endian  'double' (eg PowerPC)
99  * 128 bit Big-endian 'quadruple' (eg SPARC)
100  * 64 bit Little-endian 'double' (eg x86-SSE2)
101  * 80 bit Little-endian, with implied bit 'real80' (eg x87, Itanium).
102  * 128 bit Little-endian 'quadruple' (not implemented on any known processor!)
103  *
104  * There is also an unsupported ABI which does not follow IEEE; several of its functions
105  *  will generate run-time errors if used.
106  * 128 bit Big-endian 'doubledouble' (used by GDC <= 0.23 for PowerPC)
107  */
108 
109 version(LittleEndian) {
110     static assert(real.mant_dig == 53 || real.mant_dig==64 || real.mant_dig == 113,
111         "Only 64-bit, 80-bit, and 128-bit reals are supported for LittleEndian CPUs");
112 } else {
113     static assert(real.mant_dig == 53 || real.mant_dig==106 || real.mant_dig == 113,
114      "Only 64-bit and 128-bit reals are supported for BigEndian CPUs. double-double reals have partial support");
115 }
116 
117 // Constants used for extracting the components of the representation.
118 // They supplement the built-in floating point properties.
119 template floatTraits(T) {
120  // EXPMASK is a ushort mask to select the exponent portion (without sign)
121  // SIGNMASK is a ushort mask to select the sign bit.
122  // EXPPOS_SHORT is the index of the exponent when represented as a ushort array.
123  // SIGNPOS_BYTE is the index of the sign when represented as a ubyte array.
124  // RECIP_EPSILON is the value such that (smallest_denormal) * RECIP_EPSILON == T.min
125  static immutable T RECIP_EPSILON = (1/T.epsilon);
126 
127  static if (T.mant_dig == 24) { // float
128     enum : ushort {
129         EXPMASK = 0x7F80,
130         SIGNMASK = 0x8000,
131         EXPBIAS = 0x3F00
132     }
133     static immutable uint EXPMASK_INT = 0x7F80_0000;
134     static immutable uint MANTISSAMASK_INT = 0x007F_FFFF;
135     version(LittleEndian) {
136       static immutable EXPPOS_SHORT = 1;
137     } else {
138       static immutable EXPPOS_SHORT = 0;
139     }
140  } else static if (T.mant_dig==53) { // double, or real==double
141      enum : ushort {
142          EXPMASK = 0x7FF0,
143          SIGNMASK = 0x8000,
144          EXPBIAS = 0x3FE0
145     }
146     static immutable uint EXPMASK_INT = 0x7FF0_0000;
147     static immutable uint MANTISSAMASK_INT = 0x000F_FFFF; // for the MSB only
148     version(LittleEndian) {
149       static immutable EXPPOS_SHORT = 3;
150       static immutable SIGNPOS_BYTE = 7;
151     } else {
152       static immutable EXPPOS_SHORT = 0;
153       static immutable SIGNPOS_BYTE = 0;
154     }
155  } else static if (T.mant_dig==64) { // real80
156      enum : ushort {
157          EXPMASK = 0x7FFF,
158          SIGNMASK = 0x8000,
159          EXPBIAS = 0x3FFE
160      }
161 //    const ulong QUIETNANMASK = 0xC000_0000_0000_0000; // Converts a signaling NaN to a quiet NaN.
162     version(LittleEndian) {
163       static immutable EXPPOS_SHORT = 4;
164       static immutable SIGNPOS_BYTE = 9;
165     } else {
166       static immutable EXPPOS_SHORT = 0;
167       static immutable SIGNPOS_BYTE = 0;
168     }
169  } else static if (real.mant_dig==113){ // quadruple
170      enum : ushort {
171          EXPMASK = 0x7FFF,
172          SIGNMASK = 0x8000,
173          EXPBIAS = 0x3FFE
174      }
175     version(LittleEndian) {
176       static immutable EXPPOS_SHORT = 7;
177       static immutable SIGNPOS_BYTE = 15;
178     } else {
179       static immutable EXPPOS_SHORT = 0;
180       static immutable SIGNPOS_BYTE = 0;
181     }
182  } else static if (real.mant_dig==106) { // doubledouble
183      enum : ushort {
184          EXPMASK = 0x7FF0,
185          SIGNMASK = 0x8000
186 //         EXPBIAS = 0x3FE0
187      }
188     // the exponent byte is not unique
189     version(LittleEndian) {
190       static immutable EXPPOS_SHORT = 7; // 3 is also an exp short
191       static immutable SIGNPOS_BYTE = 15;
192     } else {
193       static immutable EXPPOS_SHORT = 0; // 4 is also an exp short
194       static immutable SIGNPOS_BYTE = 0;
195     }
196  }
197 }
198 
199 // These apply to all floating-point types
200 version(LittleEndian) {
201     static immutable MANTISSA_LSB = 0;
202     static immutable MANTISSA_MSB = 1;
203 } else {
204     static immutable MANTISSA_LSB = 1;
205     static immutable MANTISSA_MSB = 0;
206 }
207 
208 public:
209 
210 /** IEEE exception status flags
211 
212  These flags indicate that an exceptional floating-point condition has occured.
213  They indicate that a NaN or an infinity has been generated, that a result
214  is inexact, or that a signalling NaN has been encountered.
215  The return values of the properties should be treated as booleans, although
216  each is returned as an int, for speed.
217 
218  Example:
219  ----
220     real a=3.5;
221     // Set all the flags to zero
222     resetIeeeFlags();
223     assert(!ieeeFlags.divByZero);
224     // Perform a division by zero.
225     a/=0.0L;
226     assert(a==real.infinity);
227     assert(ieeeFlags.divByZero);
228     // Create a NaN
229     a*=0.0L;
230     assert(ieeeFlags.invalid);
231     assert(isNaN(a));
232 
233     // Check that calling func() has no effect on the
234     // status flags.
235     IeeeFlags f = ieeeFlags;
236     func();
237     assert(ieeeFlags == f);
238 
239  ----
240  */
241 struct IeeeFlags
242 {
243 private:
244     // The x87 FPU status register is 16 bits.
245     // The Pentium SSE2 status register is 32 bits.
246     int m_flags;
247     version (X86_Any) {
248         // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits).
249         enum : int {
250             INEXACT_MASK   = 0x20,
251             UNDERFLOW_MASK = 0x10,
252             OVERFLOW_MASK  = 0x08,
253             DIVBYZERO_MASK = 0x04,
254             INVALID_MASK   = 0x01
255         }
256         // Don't bother about denormals, they are not supported on most CPUs.
257         //  DENORMAL_MASK = 0x02;
258     } else version (PPC) {
259         // PowerPC FPSCR is a 32-bit register.
260         enum : int {
261             INEXACT_MASK   = 0x600,
262             UNDERFLOW_MASK = 0x010,
263             OVERFLOW_MASK  = 0x008,
264             DIVBYZERO_MASK = 0x020,
265             INVALID_MASK   = 0xF80
266         }
267     } else { // SPARC FSR is a 32bit register
268              //(64 bits for Sparc 7 & 8, but high 32 bits are uninteresting).
269         enum : int {
270             INEXACT_MASK   = 0x020,
271             UNDERFLOW_MASK = 0x080,
272             OVERFLOW_MASK  = 0x100,
273             DIVBYZERO_MASK = 0x040,
274             INVALID_MASK   = 0x200
275         }
276     }
277 private:
278     static IeeeFlags getIeeeFlags()
279     {
280         version(D_InlineAsm_X86)
281         {
282             asm
283             {
284                  naked;
285                  fstsw AX;
286                  // NOTE: If compiler supports SSE2, need to OR the result with
287                  // the SSE2 status register.
288                  // Clear all irrelevant bits
289                  and EAX, 0x03D;
290                  ret;
291             }
292         }
293         else version(D_InlineAsm_X86_64)
294         {
295             asm
296             {
297                  naked;
298                  fstsw AX;
299                  // NOTE: If compiler supports SSE2, need to OR the result with
300                  // the SSE2 status register.
301                  // Clear all irrelevant bits
302                  and RAX, 0x03D;
303                  ret;
304             }
305         } else {
306            /*   SPARC:
307                int retval;
308                asm { st %fsr, retval; }
309                return retval;
310             */
311            static assert(0, "Not yet supported");
312        }
313     }
314     static void resetIeeeFlags()
315     {
316         version (D_InlineAsm_X86)
317             asm {fnclex;}
318         else version (D_InlineAsm_X86_64)
319             asm {fnclex;}
320         else {
321             /* SPARC:
322               int tmpval;
323               asm { st %fsr, tmpval; }
324               tmpval &=0xFFFF_FC00;
325               asm { ld tmpval, %fsr; }
326             */
327            throw new SanityException("Not yet supported");
328         }
329     }
330 public:
331     /// The result cannot be represented exactly, so rounding occured.
332     /// (example: x = sin(0.1); )
333     int inexact() { return m_flags & INEXACT_MASK; }
334     /// A zero was generated by underflow (example: x = real.min_normal*real.epsilon/2;)
335     int underflow() { return m_flags & UNDERFLOW_MASK; }
336     /// An infinity was generated by overflow (example: x = real.max*2;)
337     int overflow() { return m_flags & OVERFLOW_MASK; }
338     /// An infinity was generated by division by zero (example: x = 3/0.0; )
339     int divByZero() { return m_flags & DIVBYZERO_MASK; }
340     /// A machine NaN was generated. (example: x = real.infinity * 0.0; )
341     int invalid() { return m_flags & INVALID_MASK; }
342 }
343 
344 /// Return a snapshot of the current state of the floating-point status flags.
345 IeeeFlags ieeeFlags() { return IeeeFlags.getIeeeFlags(); }
346 
347 /// Set all of the floating-point status flags to false.
348 void resetIeeeFlags() { IeeeFlags.resetIeeeFlags; }
349 
350 unittest {
351     static real a = 3.5;
352     resetIeeeFlags();
353     test(!ieeeFlags.divByZero);
354     a /= 0.0L;
355     test(ieeeFlags.divByZero);
356     test(a == real.infinity);
357     a *= 0.0L;
358     test(ieeeFlags.invalid);
359     test(isNaN(a));
360     a = real.max;
361     a *= 2;
362     test(ieeeFlags.overflow);
363     a = real.min_normal * real.epsilon;
364     a /= 99;
365     test(ieeeFlags.underflow);
366     test(ieeeFlags.inexact);
367 }
368 
369 /*********************************************************************
370  * Separate floating point value into significand and exponent.
371  *
372  * Returns:
373  *      Calculate and return $(I x) and $(I exp) such that
374  *      value =$(I x)*2$(SUP exp) and
375  *      .5 $(LT)= |$(I x)| $(LT) 1.0
376  *
377  *      $(I x) has same sign as value.
378  *
379  *      $(TABLE_SV
380  *      $(TR $(TH value)           $(TH returns)         $(TH exp))
381  *      $(TR $(TD $(PLUSMN)0.0)    $(TD $(PLUSMN)0.0)    $(TD 0))
382  *      $(TR $(TD +$(INFIN))       $(TD +$(INFIN))       $(TD int.max))
383  *      $(TR $(TD -$(INFIN))       $(TD -$(INFIN))       $(TD int.min))
384  *      $(TR $(TD $(PLUSMN)$(NAN)) $(TD $(PLUSMN)$(NAN)) $(TD int.min))
385  *      )
386  */
387 real frexp(real value, out int exp)
388 {
389     ushort* vu = cast(ushort*)&value;
390     long* vl = cast(long*)&value;
391     uint ex;
392     alias floatTraits!(real) F;
393 
394     ex = vu[F.EXPPOS_SHORT] & F.EXPMASK;
395   static if (real.mant_dig == 64) { // real80
396     if (ex) { // If exponent is non-zero
397         if (ex == F.EXPMASK) {   // infinity or NaN
398             if (*vl &  0x7FFF_FFFF_FFFF_FFFF) {  // NaN
399                 *vl |= 0xC000_0000_0000_0000;  // convert $(NAN)S to $(NAN)Q
400                 exp = int.min;
401             } else if (vu[F.EXPPOS_SHORT] & 0x8000) {   // negative infinity
402                 exp = int.min;
403             } else {   // positive infinity
404                 exp = int.max;
405             }
406         } else {
407             exp = ex - F.EXPBIAS;
408             vu[F.EXPPOS_SHORT] = cast(ushort)((0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FFE);
409         }
410     } else if (!*vl) {
411         // value is +-0.0
412         exp = 0;
413     } else {
414         // denormal
415         value *= F.RECIP_EPSILON;
416         ex = vu[F.EXPPOS_SHORT] & F.EXPMASK;
417         exp = ex - F.EXPBIAS - 63;
418         vu[F.EXPPOS_SHORT] = cast(ushort)((0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FFE);
419     }
420     return value;
421   } else static if (real.mant_dig == 113) { // quadruple
422         if (ex) { // If exponent is non-zero
423             if (ex == F.EXPMASK) {   // infinity or NaN
424                 if (vl[MANTISSA_LSB] |( vl[MANTISSA_MSB]&0x0000_FFFF_FFFF_FFFF)) {  // NaN
425                     vl[MANTISSA_MSB] |= 0x0000_8000_0000_0000;  // convert $(NAN)S to $(NAN)Q
426                     exp = int.min;
427                 } else if (vu[F.EXPPOS_SHORT] & 0x8000) {   // negative infinity
428                     exp = int.min;
429                 } else {   // positive infinity
430                     exp = int.max;
431                 }
432             } else {
433                 exp = ex - F.EXPBIAS;
434                 vu[F.EXPPOS_SHORT] = cast(ushort)((0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FFE);
435             }
436         } else if ((vl[MANTISSA_LSB] |(vl[MANTISSA_MSB]&0x0000_FFFF_FFFF_FFFF))==0) {
437             // value is +-0.0
438             exp = 0;
439     } else {
440         // denormal
441         value *= F.RECIP_EPSILON;
442         ex = vu[F.EXPPOS_SHORT] & F.EXPMASK;
443         exp = ex - F.EXPBIAS - 113;
444         vu[F.EXPPOS_SHORT] = cast(ushort)((0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FFE);
445     }
446     return value;
447   } else static if (real.mant_dig==53) { // real is double
448     if (ex) { // If exponent is non-zero
449         if (ex == F.EXPMASK) {   // infinity or NaN
450             if (*vl==0x7FF0_0000_0000_0000) {  // positive infinity
451                 exp = int.max;
452             } else if (*vl==0xFFF0_0000_0000_0000) { // negative infinity
453                 exp = int.min;
454             } else { // NaN
455                 *vl |= 0x0008_0000_0000_0000;  // convert $(NAN)S to $(NAN)Q
456                 exp = int.min;
457             }
458         } else {
459             exp = (ex - F.EXPBIAS) >>> 4;
460             vu[F.EXPPOS_SHORT] = (0x8000 & vu[F.EXPPOS_SHORT]) | 0x3FE0;
461         }
462     } else if (!(*vl & 0x7FFF_FFFF_FFFF_FFFF)) {
463         // value is +-0.0
464         exp = 0;
465     } else {
466         // denormal
467         ushort sgn;
468         sgn = (0x8000 & vu[F.EXPPOS_SHORT])| 0x3FE0;
469         *vl &= 0x7FFF_FFFF_FFFF_FFFF;
470 
471         int i = -0x3FD+11;
472         do {
473             i--;
474             *vl <<= 1;
475         } while (*vl > 0);
476         exp = i;
477         vu[F.EXPPOS_SHORT] = sgn;
478     }
479     return value;
480   }else { //static if(real.mant_dig==106) // doubledouble
481         static assert(0, "Unsupported");
482   }
483 }
484 
485 unittest
486 {
487     static real[3][] vals = // x,frexp,exp
488     [
489         [0.0,   0.0,    0],
490         [-0.0,  -0.0,   0],
491         [1.0,   .5, 1],
492         [-1.0,  -.5,    1],
493         [2.0,   .5, 2],
494         [double.min_normal/2.0, .5, -1022],
495         [real.infinity,real.infinity,int.max],
496         [-real.infinity,-real.infinity,int.min],
497     ];
498 
499     int i;
500     int eptr;
501     real v = frexp(NaN(0xABC), eptr);
502     test(isIdentical(NaN(0xABC), v));
503     test(eptr ==int.min);
504     v = frexp(-NaN(0xABC), eptr);
505     test(isIdentical(-NaN(0xABC), v));
506     test(eptr ==int.min);
507 
508     for (i = 0; i < vals.length; i++) {
509         real x = vals[i][0];
510         real e = vals[i][1];
511         int exp = cast(int)vals[i][2];
512         v = frexp(x, eptr);
513 //        printf("frexp(%La) = %La, should be %La, eptr = %d, should be %d\n", x, v, e, eptr, exp);
514         test(isIdentical(e, v));
515         test(exp == eptr);
516 
517     }
518    static if (real.mant_dig == 64) {
519      static real[3][] extendedvals = [ // x,frexp,exp
520         [0x1.a5f1c2eb3fe4efp+73L, 0x1.A5F1C2EB3FE4EFp-1L,   74],    // normal
521         [0x1.fa01712e8f0471ap-1064L,  0x1.fa01712e8f0471ap-1L,     -1063],
522         [real.min_normal,  .5,     -16381],
523         [real.min_normal/2.0L, .5,     -16382]    // denormal
524      ];
525 
526     for (i = 0; i < extendedvals.length; i++) {
527         real x = extendedvals[i][0];
528         real e = extendedvals[i][1];
529         int exp = cast(int)extendedvals[i][2];
530         v = frexp(x, eptr);
531         test(isIdentical(e, v));
532         test(exp == eptr);
533 
534     }
535   }
536 }
537 
538 /**
539  * Compute n * 2$(SUP exp)
540  * References: frexp
541  */
542 real ldexp(real n, int exp) /* intrinsic */
543 {
544     version(Naked_D_InlineAsm_X86)
545     {
546         asm {
547             fild exp;
548             fld n;
549             fscale;
550             fstp ST(1);
551         }
552     }
553     else
554     {
555         return core.stdc.math.ldexpl(n, exp);
556     }
557 }
558 
559 /******************************************
560  * Extracts the exponent of x as a signed integral value.
561  *
562  * If x is not a special value, the result is the same as
563  * $(D cast(int)logb(x)).
564  *
565  * Remarks: This function is consistent with IEEE754R, but it
566  * differs from the C function of the same name
567  * in the return value of infinity. (in C, ilogb(real.infinity)== int.max).
568  * Note that the special return values may all be equal.
569  *
570  *      $(TABLE_SV
571  *      $(TR $(TH x)                $(TH ilogb(x))     $(TH Invalid?))
572  *      $(TR $(TD 0)                 $(TD FP_ILOGB0)   $(TD yes))
573  *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD FP_ILOGBINFINITY) $(TD yes))
574  *      $(TR $(TD $(NAN))            $(TD FP_ILOGBNAN) $(TD yes))
575  *      )
576  */
577 int ilogb(real x)
578 {
579         version(Naked_D_InlineAsm_X86)
580         {
581             int y;
582             asm {
583                 fld x;
584                 fxtract;
585                 fstp ST(0); // drop significand
586                 fistp y; // and return the exponent
587             }
588             return y;
589         } else static if (real.mant_dig==64) { // 80-bit reals
590             alias floatTraits!(real) F;
591             short e = cast(short)((cast(short *)&x)[F.EXPPOS_SHORT] & F.EXPMASK);
592             if (e == F.EXPMASK) {
593                 // BUG: should also set the invalid exception
594                 ulong s = *cast(ulong *)&x;
595                 if (s == 0x8000_0000_0000_0000) {
596                     return FP_ILOGBINFINITY;
597                 }
598                 else return FP_ILOGBNAN;
599             }
600             if (e==0) {
601                 ulong s = *cast(ulong *)&x;
602                 if (s == 0x0000_0000_0000_0000) {
603                     // BUG: should also set the invalid exception
604                     return FP_ILOGB0;
605                 }
606                 // Denormals
607                 x *= F.RECIP_EPSILON;
608                 short f = (cast(short *)&x)[F.EXPPOS_SHORT];
609                 return -0x3FFF - (63-f);
610             }
611             return e - 0x3FFF;
612         } else {
613         return core.stdc.math.ilogbl(x);
614     }
615 }
616 
617 version (X86)
618 {
619     static immutable int FP_ILOGB0        = -int.max-1;
620     static immutable int FP_ILOGBNAN      = -int.max-1;
621     static immutable int FP_ILOGBINFINITY = -int.max-1;
622 } else {
623     alias core.stdc.math.FP_ILOGB0   FP_ILOGB0;
624     alias core.stdc.math.FP_ILOGBNAN FP_ILOGBNAN;
625     static immutable int FP_ILOGBINFINITY = int.max;
626 }
627 
628 unittest {
629     test(ilogb(1.0) == 0);
630     test(ilogb(65536) == 16);
631     test(ilogb(-65536) == 16);
632     test(ilogb(1.0 / 65536) == -16);
633     test(ilogb(real.nan) == FP_ILOGBNAN);
634     test(ilogb(0.0) == FP_ILOGB0);
635     test(ilogb(-0.0) == FP_ILOGB0);
636     // denormal
637     test(ilogb(0.125 * real.min_normal) == real.min_exp - 4);
638     test(ilogb(real.infinity) == FP_ILOGBINFINITY);
639 }
640 
641 /*****************************************
642  * Extracts the exponent of x as a signed integral value.
643  *
644  * If x is subnormal, it is treated as if it were normalized.
645  * For a positive, finite x:
646  *
647  * 1 $(LT)= $(I x) * FLT_RADIX$(SUP -logb(x)) $(LT) FLT_RADIX
648  *
649  *      $(TABLE_SV
650  *      $(TR $(TH x)                 $(TH logb(x))   $(TH divide by 0?) )
651  *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD +$(INFIN)) $(TD no))
652  *      $(TR $(TD $(PLUSMN)0.0)      $(TD -$(INFIN)) $(TD yes) )
653  *      )
654  */
655 real logb(real x)
656 {
657     version(Naked_D_InlineAsm_X86)
658     {
659         asm {
660             fld x;
661             fxtract;
662             fstp ST(0); // drop significand
663         }
664     } else {
665         return core.stdc.math.logbl(x);
666     }
667 }
668 
669 unittest {
670     test(logb(real.infinity)== real.infinity);
671     test(isIdentical(logb(NaN(0xFCD)), NaN(0xFCD)));
672     test(logb(1.0)== 0.0);
673     test(logb(-65536) == 16);
674     test(logb(0.0)== -real.infinity);
675     test(ilogb(0.125*real.min_normal) == real.min_exp-4);
676 }
677 
678 /*************************************
679  * Efficiently calculates x * 2$(SUP n).
680  *
681  * scalbn handles underflow and overflow in
682  * the same fashion as the basic arithmetic operators.
683  *
684  *  $(TABLE_SV
685  *      $(TR $(TH x)                 $(TH scalb(x)))
686  *      $(TR $(TD $(PLUSMNINF))      $(TD $(PLUSMNINF)) )
687  *      $(TR $(TD $(PLUSMN)0.0)      $(TD $(PLUSMN)0.0) )
688  *  )
689  */
690 real scalbn(real x, int n)
691 {
692     version(Naked_D_InlineAsm_X86)
693     {
694         asm {
695             fild n;
696             fld x;
697             fscale;
698             fstp ST(1);
699         }
700     } else {
701         // NOTE: Not implemented in DMD
702         return core.stdc.math.scalbnl(x, n);
703     }
704 }
705 
706 unittest {
707     test(scalbn(-real.infinity, 5) == -real.infinity);
708     test(isIdentical(scalbn(NaN(0xABC),7), NaN(0xABC)));
709 }
710 
711 /**
712  * Returns the positive difference between x and y.
713  *
714  * If either of x or y is $(NAN), it will be returned.
715  * Returns:
716  * $(TABLE_SV
717  *  $(SVH Arguments, fdim(x, y))
718  *  $(SV x $(GT) y, x - y)
719  *  $(SV x $(LT)= y, +0.0)
720  * )
721  */
722 real fdim(real x, real y)
723 {
724     return (tsm.isnan(x) || tsm.isnan(y) || x <= y) ? x - y : +0.0;
725 }
726 
727 unittest {
728     test(isIdentical(fdim(NaN(0xABC), 58.2), NaN(0xABC)));
729 }
730 
731 /*******************************
732  * Returns |x|
733  *
734  *      $(TABLE_SV
735  *      $(TR $(TH x)                 $(TH fabs(x)))
736  *      $(TR $(TD $(PLUSMN)0.0)      $(TD +0.0) )
737  *      $(TR $(TD $(PLUSMN)$(INFIN)) $(TD +$(INFIN)) )
738  *      )
739  */
740 real fabs(real x) /* intrinsic */
741 {
742     version(D_InlineAsm_X86)
743     {
744         asm {
745             fld x;
746             fabs;
747         }
748     }
749     else
750     {
751         return core.stdc.math.fabsl(x);
752     }
753 }
754 
755 unittest {
756     test(isIdentical(fabs(NaN(0xABC)), NaN(0xABC)));
757 }
758 
759 /**
760  * Returns (x * y) + z, rounding only once according to the
761  * current rounding mode.
762  *
763  * BUGS: Not currently implemented - rounds twice.
764  */
765 real fma(float x, float y, float z)
766 {
767     return (x * y) + z;
768 }
769 
770 /**
771  * Calculate cos(y) + i sin(y).
772  *
773  * On x86 CPUs, this is a very efficient operation;
774  * almost twice as fast as calculating sin(y) and cos(y)
775  * seperately, and is the preferred method when both are required.
776  */
777 deprecated("Use `std.complex.Complex` instead")
778 creal expi(real y)
779 {
780     version(Naked_D_InlineAsm_X86)
781     {
782         asm {
783             fld y;
784             fsincos;
785             fxch ST(1), ST(0);
786         }
787     }
788     else
789     {
790         return core.stdc.math.cosl(y) + core.stdc.math.sinl(y)*1i;
791     }
792 }
793 
794 deprecated unittest
795 {
796     test(expi(1.3e5L) == core.stdc.math.cosl(1.3e5L) + core.stdc.math.sinl(1.3e5L) * 1i);
797     test(expi(0.0L) == 1L + 0.0Li);
798 }
799 
800 /*********************************
801  * Returns !=0 if e is a NaN.
802  */
803 
804 int isNaN(real x)
805 {
806   alias floatTraits!(real) F;
807   static if (real.mant_dig==53) { // double
808         ulong*  p = cast(ulong *)&x;
809         return ((*p & 0x7FF0_0000_0000_0000) == 0x7FF0_0000_0000_0000) && *p & 0x000F_FFFF_FFFF_FFFF;
810   } else static if (real.mant_dig==64) {     // real80
811         ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT];
812         ulong*  ps = cast(ulong *)&x;
813         return e == F.EXPMASK &&
814             *ps & 0x7FFF_FFFF_FFFF_FFFF; // not infinity
815   } else static if (real.mant_dig==113) {  // quadruple
816         ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT];
817         ulong*  ps = cast(ulong *)&x;
818         return e == F.EXPMASK &&
819            (ps[MANTISSA_LSB] | (ps[MANTISSA_MSB]& 0x0000_FFFF_FFFF_FFFF))!=0;
820   } else {
821       return x!=x;
822   }
823 }
824 
825 
826 unittest
827 {
828     test(isNaN(float.nan));
829     test(isNaN(-double.nan));
830     test(isNaN(real.nan));
831 
832     test(!isNaN(53.6));
833     test(!isNaN(float.infinity));
834 }
835 
836 /**
837  * Returns !=0 if x is normalized.
838  *
839  * (Need one for each format because subnormal
840  *  floats might be converted to normal reals)
841  */
842 int isNormal(X)(X x)
843 {
844     alias floatTraits!(X) F;
845 
846     static if(real.mant_dig==106) { // doubledouble
847     // doubledouble is normal if the least significant part is normal.
848         return isNormal((cast(double*)&x)[MANTISSA_LSB]);
849     } else {
850         ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT];
851         return (e != F.EXPMASK && e!=0);
852     }
853 }
854 
855 unittest
856 {
857     float f = 3;
858     double d = 500;
859     real e = 10e+48;
860 
861     test(isNormal(f));
862     test(isNormal(d));
863     test(isNormal(e));
864     f=d=e=0;
865     test(!isNormal(f));
866     test(!isNormal(d));
867     test(!isNormal(e));
868     test(!isNormal(real.infinity));
869     test(isNormal(-real.max));
870     test(!isNormal(real.min_normal/4));
871 
872 }
873 
874 /*********************************
875  * Is the binary representation of x identical to y?
876  *
877  * Same as ==, except that positive and negative zero are not identical,
878  * and two $(NAN)s are identical if they have the same 'payload'.
879  */
880 
881 bool isIdentical(real x, real y)
882 {
883     // We're doing a bitwise comparison so the endianness is irrelevant.
884     long*   pxs = cast(long *)&x;
885     long*   pys = cast(long *)&y;
886   static if (real.mant_dig == 53){ //double
887     return pxs[0] == pys[0];
888   } else static if (real.mant_dig == 113 || real.mant_dig==106) {
889       // quadruple or doubledouble
890     return pxs[0] == pys[0] && pxs[1] == pys[1];
891   } else { // real80
892     ushort* pxe = cast(ushort *)&x;
893     ushort* pye = cast(ushort *)&y;
894     return pxe[4] == pye[4] && pxs[0] == pys[0];
895   }
896 }
897 
898 /** ditto */
899 deprecated("Use `real` instead")
900 bool isIdentical(ireal x, ireal y) {
901     return isIdentical(x.im, y.im);
902 }
903 
904 /** ditto */
905 deprecated("Use `std.complex.Complex` instead")
906 bool isIdentical(creal x, creal y) {
907     return isIdentical(x.re, y.re) && isIdentical(x.im, y.im);
908 }
909 
910 unittest {
911     test(isIdentical(0.0, 0.0));
912     test(!isIdentical(0.0, -0.0));
913     test(isIdentical(NaN(0xABC), NaN(0xABC)));
914     test(!isIdentical(NaN(0xABC), NaN(218)));
915     test(isIdentical(1.234e56, 1.234e56));
916     test(isNaN(NaN(0x12345)));
917 }
918 
919 deprecated unittest {
920     test(isIdentical(3.1 + NaN(0xDEF) * 1i, 3.1 + NaN(0xDEF)*1i));
921     test(!isIdentical(3.1+0.0i, 3.1-0i));
922     test(!isIdentical(0.0i, 2.5e58i));
923 }
924 
925 /*********************************
926  * Is number subnormal? (Also called "denormal".)
927  * Subnormals have a 0 exponent and a 0 most significant significand bit,
928  * but are non-zero.
929  */
930 
931 /* Need one for each format because subnormal floats might
932  * be converted to normal reals.
933  */
934 
935 int isSubnormal(float f)
936 {
937     uint *p = cast(uint *)&f;
938     return (*p & 0x7F80_0000) == 0 && *p & 0x007F_FFFF;
939 }
940 
941 unittest
942 {
943     float f = -float.min_normal;
944     test(!isSubnormal(f));
945     f/=4;
946     test(isSubnormal(f));
947 }
948 
949 /// ditto
950 
951 int isSubnormal(double d)
952 {
953     uint *p = cast(uint *)&d;
954     return (p[MANTISSA_MSB] & 0x7FF0_0000) == 0 && (p[MANTISSA_LSB] || p[MANTISSA_MSB] & 0x000F_FFFF);
955 }
956 
957 unittest
958 {
959     double f;
960 
961     for (f = 1; !isSubnormal(f); f /= 2)
962     test(f != 0);
963 }
964 
965 /// ditto
966 
967 int isSubnormal(real x)
968 {
969     alias floatTraits!(real) F;
970     static if (real.mant_dig == 53) { // double
971         return isSubnormal(cast(double)x);
972     } else static if (real.mant_dig == 113) { // quadruple
973         ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT];
974         long*   ps = cast(long *)&x;
975         return (e == 0 && (((ps[MANTISSA_LSB]|(ps[MANTISSA_MSB]& 0x0000_FFFF_FFFF_FFFF))) !=0));
976     } else static if (real.mant_dig==64) { // real80
977         ushort* pe = cast(ushort *)&x;
978         long*   ps = cast(long *)&x;
979 
980         return (pe[F.EXPPOS_SHORT] & F.EXPMASK) == 0 && *ps > 0;
981     } else { // double double
982         return isSubnormal((cast(double*)&x)[MANTISSA_MSB]);
983     }
984 }
985 
986 unittest
987 {
988     real f;
989 
990     for (f = 1; !isSubnormal(f); f /= 2)
991     test(f != 0);
992 }
993 
994 /*********************************
995  * Return !=0 if x is $(PLUSMN)0.
996  *
997  * Does not affect any floating-point flags
998  */
999 int isZero(real x)
1000 {
1001     alias floatTraits!(real) F;
1002     static if (real.mant_dig == 53) { // double
1003         return ((*cast(ulong *)&x) & 0x7FFF_FFFF_FFFF_FFFF) == 0;
1004     } else static if (real.mant_dig == 113) { // quadruple
1005         long*   ps = cast(long *)&x;
1006         return (ps[MANTISSA_LSB] | (ps[MANTISSA_MSB]& 0x7FFF_FFFF_FFFF_FFFF)) == 0;
1007     } else { // real80
1008         ushort* pe = cast(ushort *)&x;
1009         ulong*  ps = cast(ulong  *)&x;
1010         return (pe[F.EXPPOS_SHORT] & F.EXPMASK) == 0 && *ps == 0;
1011     }
1012 }
1013 
1014 unittest
1015 {
1016     test(isZero(0.0));
1017     test(isZero(-0.0));
1018     test(!isZero(2.5));
1019     test(!isZero(real.min_normal / 1000));
1020 }
1021 
1022 /*********************************
1023  * Return !=0 if e is $(PLUSMNINF);.
1024  */
1025 
1026 int isInfinity(real x)
1027 {
1028     alias floatTraits!(real) F;
1029     static if (real.mant_dig == 53) { // double
1030         return ((*cast(ulong *)&x) & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FF8_0000_0000_0000;
1031     } else static if(real.mant_dig == 106) { //doubledouble
1032         return (((cast(ulong *)&x)[MANTISSA_MSB]) & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FF8_0000_0000_0000;
1033     } else static if (real.mant_dig == 113) { // quadruple
1034         long*   ps = cast(long *)&x;
1035         return (ps[MANTISSA_LSB] == 0)
1036          && (ps[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FFF_0000_0000_0000;
1037     } else { // real80
1038         ushort e = cast(ushort)(F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT]);
1039         ulong*  ps = cast(ulong *)&x;
1040 
1041         return e == F.EXPMASK && *ps == 0x8000_0000_0000_0000;
1042    }
1043 }
1044 
1045 unittest
1046 {
1047     test(isInfinity(float.infinity));
1048     test(!isInfinity(float.nan));
1049     test(isInfinity(double.infinity));
1050     test(isInfinity(-real.infinity));
1051 
1052     test(isInfinity(-1.0 / 0.0));
1053 }
1054 
1055 /**
1056  * Calculate the next largest floating point value after x.
1057  *
1058  * Return the least number greater than x that is representable as a real;
1059  * thus, it gives the next point on the IEEE number line.
1060  *
1061  *  $(TABLE_SV
1062  *    $(SVH x,            nextUp(x)   )
1063  *    $(SV  -$(INFIN),    -real.max   )
1064  *    $(SV  $(PLUSMN)0.0, real.min_normal*real.epsilon )
1065  *    $(SV  real.max,     $(INFIN) )
1066  *    $(SV  $(INFIN),     $(INFIN) )
1067  *    $(SV  $(NAN),       $(NAN)   )
1068  * )
1069  *
1070  * Remarks:
1071  * This function is included in the IEEE 754-2008 standard.
1072  *
1073  * nextDoubleUp and nextFloatUp are the corresponding functions for
1074  * the IEEE double and IEEE float number lines.
1075  */
1076 real nextUp(real x)
1077 {
1078     alias floatTraits!(real) F;
1079     static if (real.mant_dig == 53) { // double
1080         return nextDoubleUp(x);
1081     } else static if(real.mant_dig==113) {  // quadruple
1082         ushort e = F.EXPMASK & (cast(ushort *)&x)[F.EXPPOS_SHORT];
1083         if (e == F.EXPMASK) { // NaN or Infinity
1084              if (x == -real.infinity) return -real.max;
1085              return x; // +Inf and NaN are unchanged.
1086         }
1087         ulong*   ps = cast(ulong *)&e;
1088         if (ps[MANTISSA_LSB] & 0x8000_0000_0000_0000)  { // Negative number
1089             if (ps[MANTISSA_LSB]==0 && ps[MANTISSA_MSB] == 0x8000_0000_0000_0000) { // it was negative zero
1090                 ps[MANTISSA_LSB] = 0x0000_0000_0000_0001; // change to smallest subnormal
1091                 ps[MANTISSA_MSB] = 0;
1092                 return x;
1093             }
1094             --*ps;
1095             if (ps[MANTISSA_LSB]==0) --ps[MANTISSA_MSB];
1096         } else { // Positive number
1097             ++ps[MANTISSA_LSB];
1098             if (ps[MANTISSA_LSB]==0) ++ps[MANTISSA_MSB];
1099         }
1100         return x;
1101 
1102     } else static if(real.mant_dig==64){ // real80
1103         // For 80-bit reals, the "implied bit" is a nuisance...
1104         ushort *pe = cast(ushort *)&x;
1105         ulong  *ps = cast(ulong  *)&x;
1106 
1107         if ((pe[F.EXPPOS_SHORT] & F.EXPMASK) == F.EXPMASK) {
1108             // First, deal with NANs and infinity
1109             if (x == -real.infinity) return -real.max;
1110             return x; // +Inf and NaN are unchanged.
1111         }
1112         if (pe[F.EXPPOS_SHORT] & 0x8000)  { // Negative number -- need to decrease the significand
1113             --*ps;
1114             // Need to mask with 0x7FFF... so subnormals are treated correctly.
1115             if ((*ps & 0x7FFF_FFFF_FFFF_FFFF) == 0x7FFF_FFFF_FFFF_FFFF) {
1116                 if (pe[F.EXPPOS_SHORT] == 0x8000) { // it was negative zero
1117                     *ps = 1;
1118                     pe[F.EXPPOS_SHORT] = 0; // smallest subnormal.
1119                     return x;
1120                 }
1121                 --pe[F.EXPPOS_SHORT];
1122                 if (pe[F.EXPPOS_SHORT] == 0x8000) {
1123                     return x; // it's become a subnormal, implied bit stays low.
1124                 }
1125                 *ps = 0xFFFF_FFFF_FFFF_FFFF; // set the implied bit
1126                 return x;
1127             }
1128             return x;
1129         } else {
1130             // Positive number -- need to increase the significand.
1131             // Works automatically for positive zero.
1132             ++*ps;
1133             if ((*ps & 0x7FFF_FFFF_FFFF_FFFF) == 0) {
1134                 // change in exponent
1135                 ++pe[F.EXPPOS_SHORT];
1136                 *ps = 0x8000_0000_0000_0000; // set the high bit
1137             }
1138         }
1139         return x;
1140     } else { // doubledouble
1141         static assert(0, "Not implemented");
1142     }
1143 }
1144 
1145 /** ditto */
1146 double nextDoubleUp(double x)
1147 {
1148     ulong *ps = cast(ulong *)&x;
1149 
1150     if ((*ps & 0x7FF0_0000_0000_0000) == 0x7FF0_0000_0000_0000) {
1151         // First, deal with NANs and infinity
1152         if (x == -x.infinity) return -x.max;
1153         return x; // +INF and NAN are unchanged.
1154     }
1155     if (*ps & 0x8000_0000_0000_0000)  { // Negative number
1156         if (*ps == 0x8000_0000_0000_0000) { // it was negative zero
1157             *ps = 0x0000_0000_0000_0001; // change to smallest subnormal
1158             return x;
1159         }
1160         --*ps;
1161     } else { // Positive number
1162         ++*ps;
1163     }
1164     return x;
1165 }
1166 
1167 /** ditto */
1168 float nextFloatUp(float x)
1169 {
1170     uint *ps = cast(uint *)&x;
1171 
1172     if ((*ps & 0x7F80_0000) == 0x7F80_0000) {
1173         // First, deal with NANs and infinity
1174         if (x == -x.infinity) return -x.max;
1175         return x; // +INF and NAN are unchanged.
1176     }
1177     if (*ps & 0x8000_0000)  { // Negative number
1178         if (*ps == 0x8000_0000) { // it was negative zero
1179             *ps = 0x0000_0001; // change to smallest subnormal
1180             return x;
1181         }
1182         --*ps;
1183     } else { // Positive number
1184         ++*ps;
1185     }
1186     return x;
1187 }
1188 
1189 unittest {
1190     static if (real.mant_dig == 64) {
1191 
1192         // Tests for 80-bit reals
1193 
1194         test(isIdentical(nextUp(NaN(0xABC)), NaN(0xABC)));
1195         // negative numbers
1196         test( nextUp(-real.infinity) == -real.max );
1197         test( nextUp(-1-real.epsilon) == -1.0 );
1198         test( nextUp(-2) == -2.0 + real.epsilon);
1199         // denormals and zero
1200         test( nextUp(-real.min_normal) == -real.min_normal*(1-real.epsilon) );
1201         test( nextUp(-real.min_normal*(1-real.epsilon) == -real.min_normal*(1-2*real.epsilon)) );
1202         test( isIdentical(-0.0L, nextUp(-real.min_normal*real.epsilon)) );
1203         test( nextUp(-0.0) == real.min_normal*real.epsilon );
1204         test( nextUp(0.0) == real.min_normal*real.epsilon );
1205         test( nextUp(real.min_normal*(1-real.epsilon)) == real.min_normal );
1206         test( nextUp(real.min_normal) == real.min_normal*(1+real.epsilon) );
1207         // positive numbers
1208         test( nextUp(1) == 1.0 + real.epsilon );
1209         test( nextUp(2.0-real.epsilon) == 2.0 );
1210         test( nextUp(real.max) == real.infinity );
1211         test( nextUp(real.infinity)==real.infinity );
1212     }
1213 
1214     test(isIdentical(nextDoubleUp(NaN(0xABC)), NaN(0xABC)));
1215     // negative numbers
1216     test( nextDoubleUp(-double.infinity) == -double.max );
1217     test( nextDoubleUp(-1-double.epsilon) == -1.0 );
1218     test( nextDoubleUp(-2) == -2.0 + double.epsilon);
1219     // denormals and zero
1220 
1221     test( nextDoubleUp(-double.min_normal) == -double.min_normal*(1-double.epsilon) );
1222     test( nextDoubleUp(-double.min_normal*(1-double.epsilon) == -double.min_normal*(1-2*double.epsilon)) );
1223     test( isIdentical(-0.0, nextDoubleUp(-double.min_normal*double.epsilon)) );
1224     test( nextDoubleUp(0.0) == double.min_normal*double.epsilon );
1225     test( nextDoubleUp(-0.0) == double.min_normal*double.epsilon );
1226     test( nextDoubleUp(double.min_normal*(1-double.epsilon)) == double.min_normal );
1227     test( nextDoubleUp(double.min_normal) == double.min_normal*(1+double.epsilon) );
1228     // positive numbers
1229     test( nextDoubleUp(1) == 1.0 + double.epsilon );
1230     test( nextDoubleUp(2.0-double.epsilon) == 2.0 );
1231     test( nextDoubleUp(double.max) == double.infinity );
1232 
1233     test(isIdentical(nextFloatUp(NaN(0xABC)), NaN(0xABC)));
1234     test( nextFloatUp(-float.min_normal) == -float.min_normal*(1-float.epsilon) );
1235     test( nextFloatUp(1.0) == 1.0+float.epsilon );
1236     test( nextFloatUp(-0.0) == float.min_normal*float.epsilon);
1237     test( nextFloatUp(float.infinity)==float.infinity );
1238 
1239     test(nextDown(1.0+real.epsilon)==1.0);
1240     test(nextDoubleDown(1.0+double.epsilon)==1.0);
1241     test(nextFloatDown(1.0+float.epsilon)==1.0);
1242     test(nextafter(1.0+real.epsilon, -real.infinity)==1.0);
1243 }
1244 
1245 package {
1246 /** Reduces the magnitude of x, so the bits in the lower half of its significand
1247  * are all zero. Returns the amount which needs to be added to x to restore its
1248  * initial value; this amount will also have zeros in all bits in the lower half
1249  * of its significand.
1250  */
1251 X splitSignificand(X)(ref X x)
1252 {
1253     if (isNaN(x) || isInfinity(x)) return 0; // don't change NaN or infinity
1254     X y = x; // copy the original value
1255     static if (X.mant_dig == float.mant_dig) {
1256         uint *ps = cast(uint *)&x;
1257         (*ps) &= 0xFFFF_FC00;
1258     } else static if (X.mant_dig == 53) {
1259         ulong *ps = cast(ulong *)&x;
1260         (*ps) &= 0xFFFF_FFFF_FC00_0000L;
1261     } else static if (X.mant_dig == 64){ // 80-bit real
1262         // An x87 real80 has 63 bits, because the 'implied' bit is stored explicitly.
1263         // This is annoying, because it means the significand cannot be
1264         // precisely halved. Instead, we split it into 31+32 bits.
1265         ulong *ps = cast(ulong *)&x;
1266         (*ps) &= 0xFFFF_FFFF_0000_0000L;
1267     } else static if (X.mant_dig==113) { // quadruple
1268         ulong *ps = cast(ulong *)&x;
1269         ps[MANTISSA_LSB] &= 0xFF00_0000_0000_0000L;
1270     }
1271     //else static assert(0, "Unsupported size");
1272 
1273     return y - x;
1274 }
1275 
1276 unittest {
1277     double x = -0x1.234_567A_AAAA_AAp+250;
1278     double y = splitSignificand(x);
1279     test(x == -0x1.234_5678p+250);
1280     test(y == -0x0.000_000A_AAAA_A8p+248);
1281     test(x + y == -0x1.234_567A_AAAA_AAp+250);
1282 }
1283 }
1284 
1285 /**
1286  * Calculate the next smallest floating point value before x.
1287  *
1288  * Return the greatest number less than x that is representable as a real;
1289  * thus, it gives the previous point on the IEEE number line.
1290  *
1291  *  $(TABLE_SV
1292  *    $(SVH x,            nextDown(x)   )
1293  *    $(SV  $(INFIN),     real.max  )
1294  *    $(SV  $(PLUSMN)0.0, -real.min_normal*real.epsilon )
1295  *    $(SV  -real.max,    -$(INFIN) )
1296  *    $(SV  -$(INFIN),    -$(INFIN) )
1297  *    $(SV  $(NAN),       $(NAN)    )
1298  * )
1299  *
1300  * Remarks:
1301  * This function is included in the IEEE 754-2008 standard.
1302  *
1303  * nextDoubleDown and nextFloatDown are the corresponding functions for
1304  * the IEEE double and IEEE float number lines.
1305  */
1306 real nextDown(real x)
1307 {
1308     return -nextUp(-x);
1309 }
1310 
1311 /** ditto */
1312 double nextDoubleDown(double x)
1313 {
1314     return -nextDoubleUp(-x);
1315 }
1316 
1317 /** ditto */
1318 float nextFloatDown(float x)
1319 {
1320     return -nextFloatUp(-x);
1321 }
1322 
1323 unittest {
1324     test( nextDown(1.0 + real.epsilon) == 1.0);
1325 }
1326 
1327 /**
1328  * Calculates the next representable value after x in the direction of y.
1329  *
1330  * If y > x, the result will be the next largest floating-point value;
1331  * if y < x, the result will be the next smallest value.
1332  * If x == y, the result is y.
1333  *
1334  * Remarks:
1335  * This function is not generally very useful; it's almost always better to use
1336  * the faster functions nextUp() or nextDown() instead.
1337  *
1338  * IEEE 754 requirements not implemented:
1339  * The FE_INEXACT and FE_OVERFLOW exceptions will be raised if x is finite and
1340  * the function result is infinite. The FE_INEXACT and FE_UNDERFLOW
1341  * exceptions will be raised if the function value is subnormal, and x is
1342  * not equal to y.
1343  */
1344 real nextafter(real x, real y)
1345 {
1346     if (x==y) return y;
1347     return (y>x) ? nextUp(x) : nextDown(x);
1348 }
1349 
1350 /**************************************
1351  * To what precision is x equal to y?
1352  *
1353  * Returns: the number of significand bits which are equal in x and y.
1354  * eg, 0x1.F8p+60 and 0x1.F1p+60 are equal to 5 bits of precision.
1355  *
1356  *  $(TABLE_SV
1357  *    $(SVH3 x,      y,         feqrel(x, y)  )
1358  *    $(SV3  x,      x,         typeof(x).mant_dig )
1359  *    $(SV3  x,      $(GT)= 2*x, 0 )
1360  *    $(SV3  x,      $(LE)= x/2, 0 )
1361  *    $(SV3  $(NAN), any,       0 )
1362  *    $(SV3  any,    $(NAN),    0 )
1363  *  )
1364  *
1365  * Remarks:
1366  * This is a very fast operation, suitable for use in speed-critical code.
1367  */
1368 int feqrel(X)(X x, X y)
1369 {
1370     /* Public Domain. Author: Don Clugston, 18 Aug 2005.
1371      */
1372   static assert(is(X==real) || is(X==double) || is(X==float), "Only float, double, and real are supported by feqrel");
1373 
1374   static if (X.mant_dig == 106) { // doubledouble.
1375      int a = feqrel(cast(double*)(&x)[MANTISSA_MSB], cast(double*)(&y)[MANTISSA_MSB]);
1376      if (a != double.mant_dig) return a;
1377      return double.mant_dig + feqrel(cast(double*)(&x)[MANTISSA_LSB], cast(double*)(&y)[MANTISSA_LSB]);
1378   } else static if (X.mant_dig==64 || X.mant_dig==113
1379                  || X.mant_dig==53 || X.mant_dig == 24) {
1380     if (x == y) return X.mant_dig; // ensure diff!=0, cope with INF.
1381 
1382     X diff = fabs(x - y);
1383 
1384     ushort *pa = cast(ushort *)(&x);
1385     ushort *pb = cast(ushort *)(&y);
1386     ushort *pd = cast(ushort *)(&diff);
1387 
1388     alias floatTraits!(X) F;
1389 
1390     // The difference in abs(exponent) between x or y and abs(x-y)
1391     // is equal to the number of significand bits of x which are
1392     // equal to y. If negative, x and y have different exponents.
1393     // If positive, x and y are equal to 'bitsdiff' bits.
1394     // AND with 0x7FFF to form the absolute value.
1395     // To avoid out-by-1 errors, we subtract 1 so it rounds down
1396     // if the exponents were different. This means 'bitsdiff' is
1397     // always 1 lower than we want, except that if bitsdiff==0,
1398     // they could have 0 or 1 bits in common.
1399 
1400  static if (X.mant_dig==64 || X.mant_dig==113) { // real80 or quadruple
1401     int bitsdiff = ( ((pa[F.EXPPOS_SHORT] & F.EXPMASK)
1402                      + (pb[F.EXPPOS_SHORT]& F.EXPMASK)
1403                      - (0x8000-F.EXPMASK))>>1)
1404                 - pd[F.EXPPOS_SHORT];
1405  } else static if (X.mant_dig==53) { // double
1406     int bitsdiff = (( ((pa[F.EXPPOS_SHORT] & F.EXPMASK)
1407                      + (pb[F.EXPPOS_SHORT] & F.EXPMASK)
1408                      - (0x8000-F.EXPMASK))>>1)
1409                  - (pd[F.EXPPOS_SHORT] & F.EXPMASK))>>4;
1410  } else static if (X.mant_dig == 24) { // float
1411      int bitsdiff = (( ((pa[F.EXPPOS_SHORT] & F.EXPMASK)
1412                       + (pb[F.EXPPOS_SHORT] & F.EXPMASK)
1413                       - (0x8000-F.EXPMASK))>>1)
1414              - (pd[F.EXPPOS_SHORT] & F.EXPMASK))>>7;
1415  }
1416     if (pd[F.EXPPOS_SHORT] == 0)
1417     {   // Difference is denormal
1418         // For denormals, we need to add the number of zeros that
1419         // lie at the start of diff's significand.
1420         // We do this by multiplying by 2^real.mant_dig
1421         diff *= F.RECIP_EPSILON;
1422         return bitsdiff + X.mant_dig - pd[F.EXPPOS_SHORT];
1423     }
1424 
1425     if (bitsdiff > 0)
1426         return bitsdiff + 1; // add the 1 we subtracted before
1427 
1428     // Avoid out-by-1 errors when factor is almost 2.
1429      static if (X.mant_dig==64 || X.mant_dig==113) { // real80 or quadruple
1430         return (bitsdiff == 0) ? (pa[F.EXPPOS_SHORT] == pb[F.EXPPOS_SHORT]) : 0;
1431      } else static if (X.mant_dig == 53 || X.mant_dig == 24) { // double or float
1432         return (bitsdiff == 0 && !((pa[F.EXPPOS_SHORT] ^ pb[F.EXPPOS_SHORT])& F.EXPMASK)) ? 1 : 0;
1433      }
1434  } else {
1435     static assert(0, "Unsupported");
1436  }
1437 }
1438 
1439 unittest
1440 {
1441    // Exact equality
1442    test(feqrel(real.max,real.max)==real.mant_dig);
1443    test(feqrel(0.0L,0.0L)==real.mant_dig);
1444    test(feqrel(7.1824L,7.1824L)==real.mant_dig);
1445    test(feqrel(real.infinity,real.infinity)==real.mant_dig);
1446 
1447    // a few bits away from exact equality
1448    real w=1;
1449    for (int i=1; i<real.mant_dig-1; ++i) {
1450       test(feqrel(1+w*real.epsilon,1.0L)==real.mant_dig-i);
1451       test(feqrel(1-w*real.epsilon,1.0L)==real.mant_dig-i);
1452       test(feqrel(1.0L,1+(w-1)*real.epsilon)==real.mant_dig-i+1);
1453       w*=2;
1454    }
1455    test(feqrel(1.5+real.epsilon,1.5L)==real.mant_dig-1);
1456    test(feqrel(1.5-real.epsilon,1.5L)==real.mant_dig-1);
1457    test(feqrel(1.5-real.epsilon,1.5+real.epsilon)==real.mant_dig-2);
1458 
1459    test(feqrel(real.min_normal/8,real.min_normal/17)==3);
1460 
1461    // Numbers that are close
1462    test(feqrel(0x1.Bp+84, 0x1.B8p+84)==5);
1463    test(feqrel(0x1.8p+10, 0x1.Cp+10)==2);
1464    test(feqrel(1.5*(1-real.epsilon), 1.0L)==2);
1465    test(feqrel(1.5, 1.0)==1);
1466    test(feqrel(2*(1-real.epsilon), 1.0L)==1);
1467 
1468    // Factors of 2
1469    test(feqrel(real.max,real.infinity)==0);
1470    test(feqrel(2*(1-real.epsilon), 1.0L)==1);
1471    test(feqrel(1.0, 2.0)==0);
1472    test(feqrel(4.0, 1.0)==0);
1473 
1474    // Extreme inequality
1475    test(feqrel(real.nan,real.nan)==0);
1476    test(feqrel(0.0L,-real.nan)==0);
1477    test(feqrel(real.nan,real.infinity)==0);
1478    test(feqrel(real.infinity,-real.infinity)==0);
1479    test(feqrel(-real.max,real.infinity)==0);
1480    test(feqrel(real.max,-real.max)==0);
1481 
1482    // floats
1483    test(feqrel(2.1f, 2.1f)==float.mant_dig);
1484    test(feqrel(1.5f, 1.0f)==1);
1485 }
1486 
1487 /*********************************
1488  * Return 1 if sign bit of e is set, 0 if not.
1489  */
1490 
1491 int signbit(real x)
1492 {
1493     return ((cast(ubyte *)&x)[floatTraits!(real).SIGNPOS_BYTE] & 0x80) != 0;
1494 }
1495 
1496 unittest
1497 {
1498     test(!signbit(float.nan));
1499     test(signbit(-float.nan));
1500     test(!signbit(168.1234));
1501     test(signbit(-168.1234));
1502     test(!signbit(0.0));
1503     test(signbit(-0.0));
1504 }
1505 
1506 
1507 /*********************************
1508  * Return a value composed of to with from's sign bit.
1509  */
1510 
1511 real copysign(real to, real from)
1512 {
1513     ubyte* pto   = cast(ubyte *)&to;
1514     ubyte* pfrom = cast(ubyte *)&from;
1515 
1516     alias floatTraits!(real) F;
1517     pto[F.SIGNPOS_BYTE] &= 0x7F;
1518     pto[F.SIGNPOS_BYTE] |= pfrom[F.SIGNPOS_BYTE] & 0x80;
1519     return to;
1520 }
1521 
1522 unittest
1523 {
1524     real e;
1525 
1526     e = copysign(21, 23.8);
1527     test(e == 21);
1528 
1529     e = copysign(-21, 23.8);
1530     test(e == 21);
1531 
1532     e = copysign(21, -23.8);
1533     test(e == -21);
1534 
1535     e = copysign(-21, -23.8);
1536     test(e == -21);
1537 
1538     e = copysign(real.nan, -23.8);
1539     test(isNaN(e) && signbit(e));
1540 }
1541 
1542 /** Return the value that lies halfway between x and y on the IEEE number line.
1543  *
1544  * Formally, the result is the arithmetic mean of the binary significands of x
1545  * and y, multiplied by the geometric mean of the binary exponents of x and y.
1546  * x and y must have the same sign, and must not be NaN.
1547  * Note: this function is useful for ensuring O(log n) behaviour in algorithms
1548  * involving a 'binary chop'.
1549  *
1550  * Special cases:
1551  * If x and y are within a factor of 2, (ie, feqrel(x, y) > 0), the return value
1552  * is the arithmetic mean (x + y) / 2.
1553  * If x and y are even powers of 2, the return value is the geometric mean,
1554  *   ieeeMean(x, y) = sqrt(x * y).
1555  *
1556  */
1557 T ieeeMean(T)(T x, T y)
1558 {
1559     // both x and y must have the same sign, and must not be NaN.
1560     verify(signbit(x) == signbit(y));
1561     verify(!tsm.isnan(x) && !tsm.isnan(y));
1562 
1563     // Runtime behaviour for contract violation:
1564     // If signs are opposite, or one is a NaN, return 0.
1565     if (!((x>=0 && y>=0) || (x<=0 && y<=0))) return 0.0;
1566 
1567     // The implementation is simple: cast x and y to integers,
1568     // average them (avoiding overflow), and cast the result back to a floating-point number.
1569 
1570     alias floatTraits!(real) F;
1571     T u;
1572     static if (T.mant_dig==64) { // real80
1573         // There's slight additional complexity because they are actually
1574         // 79-bit reals...
1575         ushort *ue = cast(ushort *)&u;
1576         ulong *ul = cast(ulong *)&u;
1577         ushort *xe = cast(ushort *)&x;
1578         ulong *xl = cast(ulong *)&x;
1579         ushort *ye = cast(ushort *)&y;
1580         ulong *yl = cast(ulong *)&y;
1581         // Ignore the useless implicit bit. (Bonus: this prevents overflows)
1582         ulong m = ((*xl) & 0x7FFF_FFFF_FFFF_FFFFL) + ((*yl) & 0x7FFF_FFFF_FFFF_FFFFL);
1583 
1584         ushort e = cast(ushort)((xe[F.EXPPOS_SHORT] & 0x7FFF) + (ye[F.EXPPOS_SHORT] & 0x7FFF));
1585         if (m & 0x8000_0000_0000_0000L) {
1586             ++e;
1587             m &= 0x7FFF_FFFF_FFFF_FFFFL;
1588         }
1589         // Now do a multi-byte right shift
1590         uint c = e & 1; // carry
1591         e >>= 1;
1592         m >>>= 1;
1593         if (c) m |= 0x4000_0000_0000_0000L; // shift carry into significand
1594         if (e) *ul = m | 0x8000_0000_0000_0000L; // set implicit bit...
1595         else *ul = m; // ... unless exponent is 0 (denormal or zero).
1596         ue[4]=  e | (xe[F.EXPPOS_SHORT]& F.SIGNMASK); // restore sign bit
1597     } else static if(T.mant_dig == 113) { //quadruple
1598         // This would be trivial if 'ucent' were implemented...
1599         ulong *ul = cast(ulong *)&u;
1600         ulong *xl = cast(ulong *)&x;
1601         ulong *yl = cast(ulong *)&y;
1602         // Multi-byte add, then multi-byte right shift.
1603         ulong mh = ((xl[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFFL)
1604                   + (yl[MANTISSA_MSB] & 0x7FFF_FFFF_FFFF_FFFFL));
1605         // Discard the lowest bit (to avoid overflow)
1606         ulong ml = (xl[MANTISSA_LSB]>>>1) + (yl[MANTISSA_LSB]>>>1);
1607         // add the lowest bit back in, if necessary.
1608         if (xl[MANTISSA_LSB] & yl[MANTISSA_LSB] & 1) {
1609             ++ml;
1610             if (ml==0) ++mh;
1611         }
1612         mh >>>=1;
1613         ul[MANTISSA_MSB] = mh | (xl[MANTISSA_MSB] & 0x8000_0000_0000_0000);
1614         ul[MANTISSA_LSB] = ml;
1615     } else static if (T.mant_dig == double.mant_dig) {
1616         ulong *ul = cast(ulong *)&u;
1617         ulong *xl = cast(ulong *)&x;
1618         ulong *yl = cast(ulong *)&y;
1619         ulong m = (((*xl) & 0x7FFF_FFFF_FFFF_FFFFL) + ((*yl) & 0x7FFF_FFFF_FFFF_FFFFL)) >>> 1;
1620         m |= ((*xl) & 0x8000_0000_0000_0000L);
1621         *ul = m;
1622     } else static if (T.mant_dig == float.mant_dig) {
1623         uint *ul = cast(uint *)&u;
1624         uint *xl = cast(uint *)&x;
1625         uint *yl = cast(uint *)&y;
1626         uint m = (((*xl) & 0x7FFF_FFFF) + ((*yl) & 0x7FFF_FFFF)) >>> 1;
1627         m |= ((*xl) & 0x8000_0000);
1628         *ul = m;
1629     } else {
1630         static assert(0, "Not implemented");
1631     }
1632     return u;
1633 }
1634 
1635 unittest {
1636     test(ieeeMean(-0.0,-1e-20)<0);
1637     test(ieeeMean(0.0,1e-20)>0);
1638 
1639     test(ieeeMean(1.0L,4.0L)==2L);
1640     test(ieeeMean(2.0*1.013,8.0*1.013)==4*1.013);
1641     test(ieeeMean(-1.0L,-4.0L)==-2L);
1642     test(ieeeMean(-1.0,-4.0)==-2);
1643     test(ieeeMean(-1.0f,-4.0f)==-2f);
1644     test(ieeeMean(-1.0,-2.0)==-1.5);
1645     test(ieeeMean(-1*(1+8*real.epsilon),-2*(1+8*real.epsilon))==-1.5*(1+5*real.epsilon));
1646     test(ieeeMean(0x1p60,0x1p-10)==0x1p25);
1647     static if (real.mant_dig==64) { // x87, 80-bit reals
1648       test(ieeeMean(1.0L,real.infinity)==0x1p8192L);
1649       test(ieeeMean(0.0L,real.infinity)==1.5);
1650     }
1651     test(ieeeMean(0.5*real.min_normal*(1-4*real.epsilon),0.5*real.min_normal)==0.5*real.min_normal*(1-2*real.epsilon));
1652 }
1653 
1654 // Functions for NaN payloads
1655 /*
1656  * A 'payload' can be stored in the significand of a $(NAN). One bit is required
1657  * to distinguish between a quiet and a signalling $(NAN). This leaves 22 bits
1658  * of payload for a float; 51 bits for a double; 62 bits for an 80-bit real;
1659  * and 111 bits for a 128-bit quad.
1660 */
1661 /**
1662  * Create a $(NAN), storing an integer inside the payload.
1663  *
1664  * For 80-bit or 128-bit reals, the largest possible payload is 0x3FFF_FFFF_FFFF_FFFF.
1665  * For doubles, it is 0x3_FFFF_FFFF_FFFF.
1666  * For floats, it is 0x3F_FFFF.
1667  */
1668 real NaN(ulong payload)
1669 {
1670     static if (real.mant_dig == 64) { //real80
1671       ulong v = 3; // implied bit = 1, quiet bit = 1
1672     } else {
1673       ulong v = 2; // no implied bit. quiet bit = 1
1674     }
1675 
1676     ulong a = payload;
1677 
1678     // 22 Float bits
1679     ulong w = a & 0x3F_FFFF;
1680     a -= w;
1681 
1682     v <<=22;
1683     v |= w;
1684     a >>=22;
1685 
1686     // 29 Double bits
1687     v <<=29;
1688     w = a & 0xFFF_FFFF;
1689     v |= w;
1690     a -= w;
1691     a >>=29;
1692 
1693     static if (real.mant_dig == 53) { // double
1694         v |=0x7FF0_0000_0000_0000;
1695         real x;
1696         * cast(ulong *)(&x) = v;
1697         return x;
1698     } else {
1699         v <<=11;
1700         a &= 0x7FF;
1701         v |= a;
1702         real x = real.nan;
1703         // Extended real bits
1704         static if (real.mant_dig==113) { //quadruple
1705           v<<=1; // there's no implicit bit
1706           version(LittleEndian) {
1707             *cast(ulong*)(6+cast(ubyte*)(&x)) = v;
1708           } else {
1709             *cast(ulong*)(2+cast(ubyte*)(&x)) = v;
1710           }
1711         } else { // real80
1712             * cast(ulong *)(&x) = v;
1713         }
1714         return x;
1715     }
1716 }
1717 
1718 /**
1719  * Extract an integral payload from a $(NAN).
1720  *
1721  * Returns:
1722  * the integer payload as a ulong.
1723  *
1724  * For 80-bit or 128-bit reals, the largest possible payload is 0x3FFF_FFFF_FFFF_FFFF.
1725  * For doubles, it is 0x3_FFFF_FFFF_FFFF.
1726  * For floats, it is 0x3F_FFFF.
1727  */
1728 ulong getNaNPayload(real x)
1729 {
1730     verify(!!isNaN(x));
1731     // x_ptr is needed to create a separate alias to x
1732     // which the optimizer cannot see through
1733     // this will prevent an optimization which
1734     // will cause an ice in newer dmd versions
1735     auto x_ptr = &x;
1736 
1737     static if (real.mant_dig == 53) {
1738         ulong m = *cast(ulong *)(x_ptr);
1739         // Make it look like an 80-bit significand.
1740         // Skip exponent, and quiet bit
1741         m &= 0x0007_FFFF_FFFF_FFFF;
1742         m <<= 10;
1743     } else static if (real.mant_dig==113) { // quadruple
1744         version(LittleEndian) {
1745             ulong m = *cast(ulong*)(6+cast(ubyte*)(x_ptr));
1746         } else {
1747             ulong m = *cast(ulong*)(2+cast(ubyte*)(x_ptr));
1748         }
1749         m>>=1; // there's no implicit bit
1750     } else {
1751         ulong m = *cast(ulong *)(x_ptr);
1752     }
1753     // ignore implicit bit and quiet bit
1754     ulong f = m & 0x3FFF_FF00_0000_0000L;
1755     ulong w = f >>> 40;
1756     w |= (m & 0x00FF_FFFF_F800L) << (22 - 11);
1757     w |= (m & 0x7FF) << 51;
1758     return w;
1759 }
1760 
1761 unittest {
1762   real nan4 = NaN(0x789_ABCD_EF12_3456);
1763   static if (real.mant_dig == 64 || real.mant_dig==113) {
1764       test (getNaNPayload(nan4) == 0x789_ABCD_EF12_3456);
1765   } else {
1766       test (getNaNPayload(nan4) == 0x1_ABCD_EF12_3456);
1767   }
1768   double nan5 = nan4;
1769   // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743
1770   //assert (getNaNPayload(nan5) == 0x1_ABCD_EF12_3456);
1771   float nan6 = nan4;
1772   // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743
1773   //assert (getNaNPayload(nan6) == 0x12_3456);
1774   nan4 = NaN(0xFABCD);
1775   // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743
1776   //assert (getNaNPayload(nan4) == 0xFABCD);
1777   nan6 = nan4;
1778   // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743
1779   //assert (getNaNPayload(nan6) == 0xFABCD);
1780   nan5 = NaN(0x100_0000_0000_3456);
1781   // FIXME: https://issues.dlang.org/show_bug.cgi?id=13743
1782   //assert(getNaNPayload(nan5) == 0x0000_0000_3456);
1783 }