ocean.text.convert.Float source code

1 /*******************************************************************************
2 
3     A set of functions for converting between string and floating-
4     point values.
5 
6     Applying the D "import alias" mechanism to this module is highly
7     recommended, in order to limit namespace pollution:
8     ---
9     import Float = ocean.text.convert.Float;
10 
11     auto f = Float.parse ("3.14159");
12     ---
13 
14     Copyright:
15         Copyright (c) 2004 Kris Bell.
16         Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
17         All rights reserved.
18 
19     License:
20         Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
21         See LICENSE_TANGO.txt for details.
22 
23     Version:
24         Nov 2005: Initial release
25         Jan 2010: added internal ecvt()
26 
27     Authors: Kris
28 
29 ********************************************************************************/
30 
31 module ocean.text.convert.Float;
32 
33 import ocean.transition;
34 
35 import ocean.core.ExceptionDefinitions;
36 import ocean.math.IEEE;
37 import ocean.core.Verify;
38 
39 static import tsm = core.stdc.math;
40 static import Integer = ocean.text.convert.Integer_tango;
41 
42 private alias real NumType;
43 
44 /******************************************************************************
45 
46   optional math functions
47 
48  ******************************************************************************/
49 
50 private extern (C)
51 {
52     real log10l (real x);
53     real ceill (real num);
54     real modfl (real num, real *i);
55     real powl  (real base, real exp);
56 }
57 
58 /******************************************************************************
59 
60   Constants
61 
62  ******************************************************************************/
63 
64 private enum
65 {
66     Pad = 0,                // default trailing decimal zero
67     Dec = 2,                // default decimal places
68     Exp = 10,               // default switch to scientific notation
69 }
70 
71 /******************************************************************************
72 
73   Convert a formatted string of digits to a floating-point
74   number. Throws an exception where the input text is not
75   parsable in its entirety.
76 
77  ******************************************************************************/
78 
79 NumType toFloat(T) (T[] src)
80 {
81     uint len;
82 
83     auto x = parse (src, &len);
84     if (len < src.length || len == 0)
85         throw new IllegalArgumentException ("Float.toFloat :: invalid number");
86     return x;
87 }
88 
89 /******************************************************************************
90 
91   Template wrapper to make life simpler. Returns a text version
92   of the provided value.
93 
94   See format() for details
95 
96  ******************************************************************************/
97 
98 char[] toString (NumType d, uint decimals=Dec, int e=Exp)
99 {
100     char[64] tmp = void;
101 
102     return format (tmp, d, decimals, e).dup;
103 }
104 
105 /******************************************************************************
106 
107   Truncate trailing '0' and '.' from a string, such that 200.000
108   becomes 200, and 20.10 becomes 20.1
109 
110   Returns a potentially shorter slice of what you give it.
111 
112  ******************************************************************************/
113 
114 T[] truncate(T) (T[] s)
115 {
116     auto tmp = s;
117     int i = tmp.length;
118     foreach (int idx, T c; tmp)
119     {
120         if (c is '.')
121         {
122             while (--i >= idx)
123             {
124                 if (tmp[i] != '0')
125                 {
126                     if (tmp[i] is '.')
127                         --i;
128                     s = tmp [0 .. i+1];
129                     while (--i >= idx)
130                         if (tmp[i] is 'e')
131                             return tmp;
132                     break;
133                 }
134             }
135         }
136     }
137     return s;
138 }
139 
140 /******************************************************************************
141 
142   Extract a sign-bit
143 
144  ******************************************************************************/
145 
146 private bool negative (NumType x)
147 {
148     static if (NumType.sizeof is 4)
149         return ((*cast(uint *)&x) & 0x8000_0000) != 0;
150     else
151         static if (NumType.sizeof is 8)
152             return ((*cast(ulong *)&x) & 0x8000_0000_0000_0000) != 0;
153     else
154     {
155         auto pe = cast(ubyte *)&x;
156         return (pe[9] & 0x80) != 0;
157     }
158 }
159 
160 
161 /*******************************************************************************
162 
163     Format a floating-point value according to a format string
164 
165     Defaults to 2 decimal places and 10 exponent, as the other format overload
166     does.
167 
168     Format specifiers (additive unless stated otherwise):
169         '.' = Do not pad
170         'e' or 'E' = Display exponential notation
171         Any number = Set the decimal precision
172 
173     Params:
174         T      = character type
175         V      = Floating point type
176         output = Where to write the string to - expected to be large enough
177         v      = Number to format
178         fmt    = Format string, see this function's description
179 
180     Returns:
181         A const reference to `output`
182 
183 *******************************************************************************/
184 
185 public Const!(T)[] format (T, V) (T[] output, V v, in T[] fmt)
186 {
187     static assert(is(V : Const!(real)),
188                   "Float.format only support floating point types or types that"
189                   ~ "implicitly convert to them");
190 
191     int dec = Dec;
192     int exp = Exp;
193     bool pad = true;
194 
195     for (auto p = fmt.ptr, e = p + fmt.length; p < e; ++p)
196         switch (*p)
197         {
198         case '.':
199             pad = false;
200             break;
201         case 'e':
202         case 'E':
203             exp = 0;
204             break;
205         default:
206             Unqual!(T) c = *p;
207             if (c >= '0' && c <= '9')
208             {
209                 dec = c - '0', c = p[1];
210                 if (c >= '0' && c <= '9' && ++p < e)
211                     dec = dec * 10 + c - '0';
212             }
213             break;
214         }
215 
216     return format!(T)(output, v, dec, exp, pad);
217 }
218 
219 /******************************************************************************
220 
221   Convert a floating-point number to a string.
222 
223   The e parameter controls the number of exponent places emitted,
224   and can thus control where the output switches to the scientific
225   notation. For example, setting e=2 for 0.01 or 10.0 would result
226   in normal output. Whereas setting e=1 would result in both those
227   values being rendered in scientific notation instead. Setting e
228   to 0 forces that notation on for everything. Parameter pad will
229   append trailing '0' decimals when set ~ otherwise trailing '0's
230   will be elided
231 
232  ******************************************************************************/
233 
234 T[] format(T) (T[] dst, NumType x, int decimals=Dec, int e=Exp, bool pad=Pad)
235 {
236     Const!(char)*  end, str;
237     int       exp,
238               sign,
239               mode=5;
240     char[32]  buf = void;
241 
242     // test exponent to determine mode
243     exp = (x == 0) ? 1 : cast(int) log10l (x < 0 ? -x : x);
244     if (exp <= -e || exp >= e)
245         mode = 2, ++decimals;
246 
247     str = convertl (buf.ptr, x, decimals, &exp, &sign, mode is 5);
248 
249     auto p = dst.ptr;
250     if (sign)
251         *p++ = '-';
252 
253     if (exp is 9999)
254         while (*str)
255             *p++ = *str++;
256     else
257     {
258         if (mode is 2)
259         {
260             --exp;
261             *p++ = *str++;
262             if (*str || pad)
263             {
264                 auto d = p;
265                 *p++ = '.';
266                 while (*str)
267                     *p++ = *str++;
268                 if (pad)
269                     while (p-d < decimals)
270                         *p++ = '0';
271             }
272             *p++ = 'e';
273             if (exp < 0)
274                 *p++ = '-', exp = -exp;
275             else
276                 *p++ = '+';
277             if (exp >= 1000)
278             {
279                 *p++ = cast(T)((exp/1000) + '0');
280                 exp %= 1000;
281             }
282             if (exp >= 100)
283             {
284                 *p++ = cast(char) (exp / 100 + '0');
285                 exp %= 100;
286             }
287             *p++ = cast(char) (exp / 10 + '0');
288             *p++ = cast(char) (exp % 10 + '0');
289         }
290         else
291         {
292             if (exp <= 0)
293                 *p++ = '0';
294             else
295                 for (; exp > 0; --exp)
296                     *p++ = (*str) ? *str++ : '0';
297             if (*str || pad)
298             {
299                 *p++ = '.';
300                 auto d = p;
301                 for (; exp < 0; ++exp)
302                     *p++ = '0';
303                 while (*str)
304                     *p++ = *str++;
305                 if (pad)
306                     while (p-d < decimals)
307                         *p++ = '0';
308             }
309         }
310     }
311 
312     // stuff a C terminator in there too ...
313     *p = 0;
314     return dst[0..(p - dst.ptr)];
315 }
316 
317 
318 /******************************************************************************
319 
320   ecvt() and fcvt() for 80bit FP, which DMD does not include. Based
321   upon the following:
322 
323   Copyright (c) 2009 Ian Piumarta
324 
325   All rights reserved.
326 
327   Permission is hereby granted, free of charge, to any person
328   obtaining a copy of this software and associated documentation
329   files (the 'Software'), to deal in the Software without restriction,
330   including without limitation the rights to use, copy, modify, merge,
331   publish, distribute, and/or sell copies of the Software, and to permit
332   persons to whom the Software is furnished to do so, provided that the
333   above copyright notice(s) and this permission notice appear in all
334   copies of the Software.
335 
336  ******************************************************************************/
337 
338 private Const!(char)* convertl (char* buf, real value, int ndigit,
339     int *decpt, int *sign, int fflag)
340 {
341     if ((*sign = negative(value)) != 0)
342         value = -value;
343 
344     *decpt = 9999;
345     if (tsm.isnan(value))
346         return "nan\0".ptr;
347 
348     if (isInfinity(value))
349         return "inf\0".ptr;
350 
351     int exp10 = (value == 0) ? !fflag : cast(int) ceill(log10l(value));
352     if (exp10 < -4931)
353         exp10 = -4931;
354     value *= powl (10.0, -exp10);
355     if (value)
356     {
357         while (value <  0.1) { value *= 10;  --exp10; }
358         while (value >= 1.0) { value /= 10;  ++exp10; }
359     }
360     verify(isZero(value) || (0.1 <= value && value < 1.0));
361     //auto zero = pad ? int.max : 1;
362     auto zero = 1;
363     if (fflag)
364     {
365         // if (! pad)
366         zero = exp10;
367         if (ndigit + exp10 < 0)
368         {
369             *decpt= -ndigit;
370             return "\0".ptr;
371         }
372         ndigit += exp10;
373     }
374     *decpt = exp10;
375     int ptr = 1;
376 
377     if (ndigit > real.dig)
378         ndigit = real.dig;
379     //printf ("< flag %d, digits %d, exp10 %d, decpt %d\n", fflag, ndigit, exp10, *decpt);
380     while (ptr <= ndigit)
381     {
382         real i = void;
383         value = modfl (value * 10, &i);
384         buf [ptr++]= cast(char) ('0' + cast(int) i);
385     }
386 
387     if (value >= 0.5)
388         while (--ptr && ++buf[ptr] > '9')
389             buf[ptr] = (ptr > zero) ? '\0' : '0';
390     else
391         for (auto i=ptr; i && --i > zero && buf[i] is '0';)
392             buf[i] = '\0';
393 
394     if (ptr)
395     {
396         buf [ndigit + 1] = '\0';
397         return buf + 1;
398     }
399     if (fflag)
400     {
401         ++ndigit;
402     }
403     buf[0]= '1';
404     ++*decpt;
405     buf[ndigit]= '\0';
406     return buf;
407 }
408 
409 
410 /******************************************************************************
411 
412   Convert a formatted string of digits to a floating-point number.
413   Good for general use, but use David Gay's dtoa package if serious
414   rounding adjustments should be applied.
415 
416  ******************************************************************************/
417 
418 NumType parse(T) (in T[] src, uint* ate=null)
419 {
420     T           c;
421     Const!(T)*  p;
422     int         exp;
423     bool        sign;
424     uint        radix;
425     NumType     value = 0.0;
426 
427     static bool match (Const!(T)* aa, in T[] bb)
428     {
429         foreach (b; bb)
430         {
431             T a = *aa++;
432             if (a >= 'A' && a <= 'Z')
433                 a += 'a' - 'A';
434             if (a != b)
435                 return false;
436         }
437         return true;
438     }
439 
440     // remove leading space, and sign
441     p = src.ptr + Integer.trim (src, sign, radix);
442 
443     // bail out if the string is empty
444     if (src.length == 0 || p > &src[$-1])
445         return NumType.nan;
446     c = *p;
447 
448     // handle non-decimal representations
449     if (radix != 10)
450     {
451         long v = Integer.parse (src, radix, ate);
452         return cast(NumType) v;
453     }
454 
455     // set begin and end checks
456     auto begin = p;
457     auto end = src.ptr + src.length;
458 
459     // read leading digits; note that leading
460     // zeros are simply multiplied away
461     while (c >= '0' && c <= '9' && p < end)
462     {
463         value = value * 10 + (c - '0');
464         c = *++p;
465     }
466 
467     // gobble up the point
468     if (c is '.' && p < end)
469         c = *++p;
470 
471     // read fractional digits; note that we accumulate
472     // all digits ... very long numbers impact accuracy
473     // to a degree, but perhaps not as much as one might
474     // expect. A prior version limited the digit count,
475     // but did not show marked improvement. For maximum
476     // accuracy when reading and writing, use David Gay's
477     // dtoa package instead
478     while (c >= '0' && c <= '9' && p < end)
479     {
480         value = value * 10 + (c - '0');
481         c = *++p;
482         --exp;
483     }
484 
485     // did we get something?
486     if (p > begin)
487     {
488         // parse base10 exponent?
489         if ((c is 'e' || c is 'E') && p < end )
490         {
491             uint eaten;
492             exp += Integer.parse (src[(++p-src.ptr) .. $], 0, &eaten);
493             p += eaten;
494         }
495 
496         // adjust mantissa; note that the exponent has
497         // already been adjusted for fractional digits
498         if (exp < 0)
499             value /= pow10 (-exp);
500         else
501             value *= pow10 (exp);
502     }
503     else
504     {
505         if (end - p >= 3)
506         {
507             switch (*p)
508             {
509                 case 'I': case 'i':
510                     if (match (p+1, "nf"))
511                     {
512                         value = value.infinity;
513                         p += 3;
514                         if (end - p >= 5 && match (p, "inity"))
515                             p += 5;
516                     }
517                     break;
518 
519                 case 'N': case 'n':
520                     if (match (p+1, "an"))
521                     {
522                         value = value.nan;
523                         p += 3;
524                     }
525                     break;
526                 default:
527                     break;
528             }
529         }
530     }
531 
532     // set parse length, and return value
533     if (ate)
534     {
535         ptrdiff_t diff = p - src.ptr;
536         verify (diff >= 0 && diff <= uint.max);
537         *ate = cast(uint) diff;
538     }
539 
540     if (sign)
541         value = -value;
542     return value;
543 }
544 
545 /******************************************************************************
546 
547   Internal function to convert an exponent specifier to a floating
548   point value.
549 
550  ******************************************************************************/
551 
552 private NumType pow10 (uint exp)
553 {
554     static NumType[] Powers = [
555         1.0e1L,
556         1.0e2L,
557         1.0e4L,
558         1.0e8L,
559         1.0e16L,
560         1.0e32L,
561         1.0e64L,
562         1.0e128L,
563         1.0e256L,
564         1.0e512L,
565         1.0e1024L,
566         1.0e2048L,
567         1.0e4096L,
568         1.0e8192L,
569     ];
570 
571     if (exp >= 16384)
572         throw new IllegalArgumentException ("Float.pow10 :: exponent too large");
573 
574     NumType mult = 1.0;
575     foreach (NumType power; Powers)
576     {
577         if (exp & 1)
578             mult *= power;
579         if ((exp >>= 1) == 0)
580             break;
581     }
582     return mult;
583 }
584 
585 /******************************************************************************
586 
587  ******************************************************************************/
588 
589 debug (Float)
590 {
591     import ocean.io.Console;
592 
593     void main()
594     {
595         char[500] tmp;
596         /+
597             Cout (format(tmp, NumType.max)).newline;
598         Cout (format(tmp, -NumType.nan)).newline;
599         Cout (format(tmp, -NumType.infinity)).newline;
600         Cout (format(tmp, toFloat("nan"w))).newline;
601         Cout (format(tmp, toFloat("-nan"d))).newline;
602         Cout (format(tmp, toFloat("inf"))).newline;
603         Cout (format(tmp, toFloat("-inf"))).newline;
604         +/
605             Cout (format(tmp, toFloat ("0.000000e+00"))).newline;
606         Cout (format(tmp, toFloat("0x8000000000000000"))).newline;
607         Cout (format(tmp, 1)).newline;
608         Cout (format(tmp, -0)).newline;
609         Cout (format(tmp, 0.000001)).newline.newline;
610 
611         Cout (format(tmp, 3.14159, 6, 0)).newline;
612         Cout (format(tmp, 3.0e10, 6, 3)).newline;
613         Cout (format(tmp, 314159, 6)).newline;
614         Cout (format(tmp, 314159123213, 6, 15)).newline;
615         Cout (format(tmp, 3.14159, 6, 2)).newline;
616         Cout (format(tmp, 3.14159, 3, 2)).newline;
617         Cout (format(tmp, 0.00003333, 6, 2)).newline;
618         Cout (format(tmp, 0.00333333, 6, 3)).newline;
619         Cout (format(tmp, 0.03333333, 6, 2)).newline;
620         Cout.newline;
621 
622         Cout (format(tmp, -3.14159, 6, 0)).newline;
623         Cout (format(tmp, -3e100, 6, 3)).newline;
624         Cout (format(tmp, -314159, 6)).newline;
625         Cout (format(tmp, -314159123213, 6, 15)).newline;
626         Cout (format(tmp, -3.14159, 6, 2)).newline;
627         Cout (format(tmp, -3.14159, 2, 2)).newline;
628         Cout (format(tmp, -0.00003333, 6, 2)).newline;
629         Cout (format(tmp, -0.00333333, 6, 3)).newline;
630         Cout (format(tmp, -0.03333333, 6, 2)).newline;
631         Cout.newline;
632 
633         Cout (format(tmp, -0.9999999, 7, 3)).newline;
634         Cout (format(tmp, -3.0e100, 6, 3)).newline;
635         Cout ((format(tmp, 1.0, 6))).newline;
636         Cout ((format(tmp, 30, 6))).newline;
637         Cout ((format(tmp, 3.14159, 6, 0))).newline;
638         Cout ((format(tmp, 3e100, 6, 3))).newline;
639         Cout ((format(tmp, 314159, 6))).newline;
640         Cout ((format(tmp, 314159123213.0, 3, 15))).newline;
641         Cout ((format(tmp, 3.14159, 6, 2))).newline;
642         Cout ((format(tmp, 3.14159, 4, 2))).newline;
643         Cout ((format(tmp, 0.00003333, 6, 2))).newline;
644         Cout ((format(tmp, 0.00333333, 6, 3))).newline;
645         Cout ((format(tmp, 0.03333333, 6, 2))).newline;
646         Cout (format(tmp, NumType.min, 6)).newline;
647         Cout (format(tmp, -1)).newline;
648         Cout (format(tmp, toFloat(format(tmp, -1)))).newline;
649         Cout.newline;
650     }
651 }