ocean.text.convert.Integer source code

1 /*******************************************************************************
2 
3     A set of functions for converting strings to integer values.
4 
5     This module is adapted from ocean.text.convert.Integer_tango. The functions have
6     been modified so that they do not throw exceptions, instead denoting errors
7     via their bool return value. This is more efficient and avoids the tango
8     style of always throwing new Exceptions upon error.
9 
10     Copyright:
11         Copyright (c) 2004 Kris Bell.
12         Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
13         All rights reserved.
14 
15     License:
16         Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
17         See LICENSE_TANGO.txt for details.
18 
19     Version:
20         Initial release: Nov 2005
21         Ocean adaptation: July 2012
22 
23     Authors: Kris Bell, Gavin Norman
24 
25 *******************************************************************************/
26 
27 module ocean.text.convert.Integer;
28 
29 import ocean.transition;
30 
31 import ocean.meta.traits.Basic;
32 
33 import ocean.core.array.Search : find;
34 
35 import ocean.math.Math;
36 
37 import ocean.core.Verify;
38 
39 
40 /*******************************************************************************
41 
42     Parse an integer value from the provided string. The exact type of integer
43     parsed is determined by the template parameter T (see below).
44 
45     The string is inspected for a sign and an optional radix prefix. A radix may
46     be provided as an argument instead, whereupon it must match the prefix
47     (where present). When radix is set to zero, conversion will default to
48     decimal.
49 
50     Params:
51         C = char type of string
52         T = type of integer to parse (must be byte, ubyte, short, ushort,
53             int, uint, long or ulong)
54         digits = string to parse
55         value = receives parsed integer
56         radix = specifies which radix to interpret the string as
57 
58     Returns:
59         true if parsing succeeded
60 
61 *******************************************************************************/
62 
63 public bool toInteger ( C, T ) ( C[] digits, out T value, uint radix = 0 )
64 {
65     static if (is(T == byte))
66     {
67         return toByte(digits, value, radix);
68     }
69     else static if (is(T == ubyte))
70     {
71         return toUbyte(digits, value, radix);
72     }
73     else static if (is(T == short))
74     {
75         return toShort(digits, value, radix);
76     }
77     else static if (is(T == ushort))
78     {
79         return toUshort(digits, value, radix);
80     }
81     else static if (is(T == int))
82     {
83         return toInt(digits, value, radix);
84     }
85     else static if (is(T == uint))
86     {
87         return toUint(digits, value, radix);
88     }
89     else static if (is(T == long))
90     {
91         return toLong(digits, value, radix);
92     }
93     else static if (is(T == ulong))
94     {
95         return toUlong(digits, value, radix);
96     }
97     else
98     {
99         static assert(false, "toInteger: T must be one of {byte, ubyte, short, "
100                     ~ "ushort, int, uint, long, ulong}, not " ~ T.stringof);
101     }
102 }
103 
104 
105 /*******************************************************************************
106 
107     Parse an integer value from the provided string.
108 
109     The string is inspected for a sign and an optional radix prefix. A radix may
110     be provided as an argument instead, whereupon it must match the prefix
111     (where present). When radix is set to zero, conversion will default to
112     decimal.
113 
114     Params:
115         T = char type of string
116         digits = string to parse
117         value = receives parsed integer
118         radix = specifies which radix to interpret the string as
119 
120     Returns:
121         true if parsing succeeded
122 
123 *******************************************************************************/
124 
125 public bool toByte ( T ) ( T[] digits, out byte value, uint radix = 0 )
126 {
127     return toSignedInteger(digits, value, radix);
128 }
129 
130 /// Ditto
131 public bool toUbyte ( T ) ( T[] digits, out ubyte value, uint radix = 0 )
132 {
133     return toUnsignedInteger(digits, value, radix);
134 }
135 
136 /// Ditto
137 public bool toShort ( T ) ( T[] digits, out short value, uint radix = 0 )
138 {
139     return toSignedInteger(digits, value, radix);
140 }
141 
142 /// Ditto
143 public bool toUshort ( T ) ( T[] digits, out ushort value, uint radix = 0 )
144 {
145     return toUnsignedInteger(digits, value, radix);
146 }
147 
148 /// Ditto
149 public bool toInt ( T ) ( T[] digits, out int value, uint radix = 0 )
150 {
151     return toSignedInteger(digits, value, radix);
152 }
153 
154 /// Ditto
155 public bool toUint ( T ) ( T[] digits, out uint value, uint radix = 0 )
156 {
157     return toUnsignedInteger(digits, value, radix);
158 }
159 
160 /// Ditto
161 public bool toLong ( T ) ( T[] digits, out long value, uint radix = 0 )
162 {
163     return toSignedInteger(digits, value, radix);
164 }
165 
166 /// Ditto
167 public bool toUlong ( T ) ( T[] digits, out ulong value, uint radix = 0 )
168 {
169     return toUnsignedInteger(digits, value, radix);
170 }
171 
172 
173 /*******************************************************************************
174 
175     Parses a floating point number represented as a string directly to an
176     integer value.
177 
178     To represent the fractional part we multiply the value by the requested
179     amount of decimal points and add it up. For example:
180 
181     "1.123" -> 1123  (decimal_points = 3)
182     "0.01"  ->   10  (decimal_points = 3)
183 
184     Any characters longer than the requested amount of decimal points will be
185     cut off:
186 
187     "1.2345"  ->  123 (decimal_points = 2)
188     "10.2030" -> 1020 (decimal_points = 2)
189 
190     "1.2345"  ->  1 (decimal_points = 0)
191     "10.2030" -> 10 (decimal_points = 0)
192 
193     Params:
194         T              = type of the integer
195         float_str = floating point number string to parse
196         value     = out parameter containing the result
197         decimal_points = amount of decimal points to consider
198 
199     Returns:
200         true if the parsing was successful, else false
201 
202 *******************************************************************************/
203 
204 public bool floatStringToInt ( T = ulong ) ( cstring float_str, out T value,
205                                              size_t decimal_points = 0 )
206 {
207     static immutable MaxDecimal = 16;
208 
209     verify(decimal_points <= MaxDecimal);
210 
211     T multiplier = pow(cast(T)10, decimal_points);
212     char[MaxDecimal] zeros_suffix_buf = '0';
213     char[] zeros_suffix = zeros_suffix_buf[0 .. decimal_points];
214 
215     cstring[2] num_parts;
216 
217     // Split string at '.'
218     auto idx = find(float_str, '.');
219 
220     if (idx == float_str.length)
221     {
222         num_parts[0] = float_str;
223         num_parts[1] = zeros_suffix;
224     }
225     else
226     {
227         num_parts[0] = float_str[0 .. idx];
228         num_parts[1] = float_str[idx+1..$];
229     }
230 
231     // Cut off if too long
232     if (num_parts[1].length > decimal_points)
233         num_parts[1].length = decimal_points;
234 
235     // Fill with zeros if too short
236     if (num_parts[1].length < decimal_points)
237     {
238         zeros_suffix[0 .. num_parts[1].length] = num_parts[1];
239         num_parts[1] = zeros_suffix;
240     }
241 
242     if (!toUlong(num_parts[0], value))
243         return false;
244 
245     T frac_value;
246 
247     if (num_parts[1].length > 0 && !toUlong(num_parts[1], frac_value))
248         return false;
249 
250     value *= multiplier;
251     value += frac_value;
252 
253     return true;
254 }
255 
256 version ( UnitTest )
257 {
258     import ocean.core.Test;
259 }
260 
261 unittest
262 {
263     void testWith ( cstring str, ulong result, size_t dec_points )
264     {
265         ulong ret;
266         test(floatStringToInt(str, ret, dec_points));
267         test!("==")(ret, result);
268     }
269 
270     testWith("0.16",   160, 3);
271     testWith("0.59",   590, 3);
272     testWith("3.29", 3_290, 3);
273     testWith("0.16",   160, 3);
274     testWith("4.00", 4_000, 3);
275     testWith("3.5993754486719", 3_599, 3);
276     testWith("0.99322729901677", 993, 3);
277     testWith("1.05", 1_050, 3);
278     testWith("0.5",  500, 3);
279     testWith("2",    2_000, 3);
280 
281     testWith("2",        2, 0);
282     testWith("2.1",      2, 0);
283     testWith("2.123",    2, 0);
284     testWith("2.123456", 2, 0);
285 
286     testWith("0.1", 10, 2);
287     testWith("1.1", 110, 2);
288     testWith("1",  100, 2);
289     testWith("01", 100, 2);
290     testWith("10.10",  1010, 2);
291     testWith("225.04", 22504, 2);
292     testWith("225.100000000000004", 22510, 2);
293     testWith("225.000000000000004", 22500, 2);
294     testWith("225.009999", 22500, 2);
295 
296     ulong result;
297     test(!floatStringToInt("225.0.09999", result, 2));
298     test(!floatStringToInt("10,10", result, 2));
299     test(!floatStringToInt("0,1", result, 2));
300     test(!floatStringToInt("1,1", result, 2));
301     test(!floatStringToInt("6,6", result, 2));
302 }
303 
304 
305 /*******************************************************************************
306 
307     Parse a signed integer value from the provided string.
308 
309     The string is inspected for a sign and an optional radix prefix. A radix may
310     be provided as an argument instead, whereupon it must match the prefix
311     (where present). When radix is set to zero, conversion will default to
312     decimal.
313 
314     Params:
315         T = char type of string
316         I = type of integer to extract
317         digits = string to parse
318         value = receives parsed integer
319         radix = specifies which radix to interpret the string as
320 
321     Returns:
322         true if parsing succeeded
323 
324 *******************************************************************************/
325 
326 private bool toSignedInteger ( T, I ) ( T[] digits, out I value, uint radix = 0 )
327 {
328     static assert(isSignedIntegerType!(I), "I must be signed integer type.");
329     static assert(I.max <= long.max, "I cannot be larger than long.");
330 
331     static if (I.max < long.max)
332     {
333         long long_value;
334         if ( !toSignedInteger(digits, long_value, radix) )
335         {
336             return false;
337         }
338 
339         if ( long_value > value.max || long_value < value.min )
340         {
341             return false;
342         }
343 
344         value = cast(I) long_value;
345         return true;
346     }
347     else
348     {
349         static assert(is(I == long),
350                       "Largest signed integer type should be long.");
351 
352         bool negative;
353         uint len;
354         ulong x;
355 
356         auto trimmed = trim(digits, negative, radix);
357         convert(digits[trimmed..$], x, len, radix);
358 
359         if (len == 0 || trimmed + len < digits.length)
360         {
361             return false;
362         }
363 
364         if ((negative && -x < value.min) || (!negative && x > value.max))
365         {
366             return false;
367         }
368 
369         value = cast(long)(negative ? -x : x);
370         return true;
371     }
372 }
373 
374 
375 /*******************************************************************************
376 
377     Parse an unsigned integer value from the provided string.
378 
379     The string is inspected for a sign and an optional radix prefix. A radix may
380     be provided as an argument instead, whereupon it must match the prefix
381     (where present). When radix is set to zero, conversion will default to
382     decimal.
383 
384     Params:
385         T = char type of string
386         U = type of unsigned integer to extract
387         digits = string to parse
388         value = receives parsed unsigned integer
389         radix = specifies which radix to interpret the string as
390 
391     Returns:
392         true if parsing succeeded
393 
394 *******************************************************************************/
395 
396 private bool toUnsignedInteger ( T, U ) ( T[] digits, out U value, uint radix = 0 )
397 {
398     static assert(isUnsignedIntegerType!(U), "U must be unsigned integer type.");
399 
400     static if (U.max < ulong.max)
401     {
402         ulong long_value;
403         if (!toUnsignedInteger(digits, long_value, radix))
404         {
405             return false;
406         }
407 
408         if (long_value > value.max || long_value < value.min)
409         {
410             return false;
411         }
412 
413         value = cast(typeof(value)) long_value;
414         return true;
415     }
416     else
417     {
418         static assert(is(U == ulong),
419                       "Largest unsigned integer type should be ulong.");
420 
421         bool negative;
422         uint len;
423         ulong x;
424 
425         auto trimmed = trim(digits, negative, radix);
426         if ( negative )
427         {
428             return false;
429         }
430 
431         convert(digits[trimmed..$], x, len, radix);
432         if (len == 0 || trimmed + len < digits.length)
433         {
434             return false;
435         }
436 
437         value = x;
438         return true;
439     }
440 }
441 
442 
443 /*******************************************************************************
444 
445     Convert the provided 'digits' into an integer value, without checking for a
446     sign or radix. The radix defaults to decimal (10).
447 
448     Parsing fails (returning false) if 'digits' represents an integer of greater
449     magnitude than the type T can store.
450 
451     Params:
452         T = char type of string
453         digits = string to parse
454         value = receives parsed integer
455         eaten = receives the number of characters parsed
456         radix = specifies which radix to interpret the string as
457 
458     Returns:
459         true if parsing succeeded
460 
461 *******************************************************************************/
462 
463 private bool convert ( T ) ( T[] digits, out ulong value, out uint eaten,
464     uint radix = 10 )
465 {
466     foreach (Unqual!(T) c; digits)
467     {
468         if (c >= '0' && c <= '9')
469         {}
470         else
471            if (c >= 'a' && c <= 'z')
472                c -= 39;
473            else
474               if (c >= 'A' && c <= 'Z')
475                   c -= 7;
476               else
477                  break;
478 
479         if ((c -= '0') < radix)
480         {
481             if ( value > 0 && radix > value.max / value )
482             {
483                 return false; // multiplication overflow
484             }
485             value *= radix;
486 
487             if ( (value.max - value) < c )
488             {
489                 return false; // addition overflow
490             }
491             value += c;
492 
493             ++eaten;
494         }
495         else
496            break;
497     }
498 
499     return true;
500 }
501 
502 
503 /*******************************************************************************
504 
505     Strip leading whitespace, extract an optional +/- sign, and an optional
506     radix prefix. If the radix value matches an optional prefix, or the radix is
507     zero, the prefix will be consumed and assigned. Where the radix is non zero
508     and does not match an explicit prefix, the latter will remain unconsumed.
509     Otherwise, radix will default to 10.
510 
511     Params:
512         T = char type of string
513         digits = string to parse
514         negative = set to true if the string indicates a negative number
515         radix = receives the radix parsed form the string
516 
517     Returns:
518         the number of characters consumed
519 
520 *******************************************************************************/
521 
522 private ptrdiff_t trim ( T ) ( T[] digits, ref bool negative, ref uint radix )
523 {
524     Unqual!(T) c;
525     T*         p = digits.ptr;
526     auto       len = digits.length;
527 
528     if (len)
529        {
530        // strip off whitespace and sign characters
531        for (c = *p; len; c = *++p, --len)
532             if (c is ' ' || c is '\t')
533                {}
534             else
535                if (c is '-')
536                    negative = true;
537                else
538                   if (c is '+')
539                       negative = false;
540                else
541                   break;
542 
543        // strip off a radix specifier also?
544        auto r = radix;
545        if (c is '0' && len > 1)
546            switch (*++p)
547                   {
548                   case 'x':
549                   case 'X':
550                        ++p;
551                        r = 16;
552                        break;
553 
554                   case 'b':
555                   case 'B':
556                        ++p;
557                        r = 2;
558                        break;
559 
560                   case 'o':
561                   case 'O':
562                        ++p;
563                        r = 8;
564                        break;
565 
566                   default:
567                         --p;
568                        break;
569                   }
570 
571        // default the radix to 10
572        if (r is 0)
573            radix = 10;
574        else
575           // explicit radix must match (optional) prefix
576           if (radix != r)
577           {
578               if (radix)
579                   p -= 2;
580               else
581                  radix = r;
582           }
583        }
584 
585     // return number of characters eaten
586     return (p - digits.ptr);
587 }
588 
589 
590 
591 /*******************************************************************************
592 
593     Unit test
594 
595 *******************************************************************************/
596 
597 unittest
598 {
599     byte b;
600     ubyte ub;
601     short s;
602     ushort us;
603     int i;
604     uint ui;
605     long l;
606     ulong ul;
607 
608     // basic functionality
609     toByte("1", b); test(b == 1);
610     toUbyte("1", ub); test(ub == 1);
611     toShort("1", s); test(s == 1);
612     toUshort("1", us); test(us == 1);
613     toInt("1", i); test(i == 1);
614     toUint("1", ui); test(ui == 1);
615     toLong("1", l); test(l == 1);
616     toUlong("1", ul); test(ul == 1);
617 
618     // basic functionality with wide chars
619     toByte("1"w, b); test(b == 1);
620     toUbyte("1"w, ub); test(ub == 1);
621     toShort("1"w, s); test(s == 1);
622     toUshort("1"w, us); test(us == 1);
623     toInt("1"w, i); test(i == 1);
624     toUint("1"w, ui); test(ui == 1);
625     toLong("1"w, l); test(l == 1);
626     toUlong("1"w, ul); test(ul == 1);
627 
628     // basic functionality with double chars
629     toByte("1"d, b); test(b == 1);
630     toUbyte("1"d, ub); test(ub == 1);
631     toShort("1"d, s); test(s == 1);
632     toUshort("1"d, us); test(us == 1);
633     toInt("1"d, i); test(i == 1);
634     toUint("1"d, ui); test(ui == 1);
635     toLong("1"d, l); test(l == 1);
636     toUlong("1"d, ul); test(ul == 1);
637 
638     // basic signed functionality
639     toByte("+1", b); test(b == 1);
640     toUbyte("+1", ub); test(ub == 1);
641     toShort("+1", s); test(s == 1);
642     toUshort("+1", us); test(us == 1);
643     toInt("+1", i); test(i == 1);
644     toUint("+1", ui); test(ui == 1);
645     toLong("+1", l); test(l == 1);
646     toUlong("+1", ul); test(ul == 1);
647 
648     toByte("-1", b); test(b == -1);
649     test(!toUbyte("-1", ub));
650     toShort("-1", s); test(s == -1);
651     test(!toUshort("-1", us));
652     toInt("-1", i); test(i == -1);
653     test(!toUint("-1", ui));
654     toLong("-1", l); test(l == -1);
655     test(!toUlong("-1", ul));
656 
657     // basic functionality + radix
658     toByte("1", b, 10); test(b == 1);
659     toUbyte("1", ub, 10); test(ub == 1);
660     toShort("1", s, 10); test(s == 1);
661     toUshort("1", us, 10); test(us == 1);
662     toInt("1", i, 10); test(i == 1);
663     toUint("1", ui, 10); test(ui == 1);
664     toLong("1", l, 10); test(l == 1);
665     toUlong("1", ul, 10); test(ul == 1);
666 
667     // numerical limits
668     toByte("-128", b); test(b == byte.min);
669     toByte("127", b); test(b == byte.max);
670     toUbyte("255", ub); test(ub == ubyte.max);
671     toShort("-32768", s); test(s == short.min);
672     toShort("32767", s); test(s == short.max);
673     toUshort("65535", us); test(us == ushort.max);
674     toInt("-2147483648", i); test(i == int.min);
675     toInt("2147483647", i); test(i == int.max);
676     toUint("4294967295", ui); test(ui == uint.max);
677     toLong("-9223372036854775808", l); test(l == long.min);
678     toLong("9223372036854775807", l); test(l == long.max);
679     toUlong("18446744073709551615", ul); test(ul == ulong.max);
680 
681     // beyond numerical limits
682     test(!toByte("-129", b));
683     test(!toByte("128", b));
684     test(!toUbyte("256", ub));
685     test(!toShort("-32769", s));
686     test(!toShort("32768", s));
687     test(!toUshort("65536", us));
688     test(!toInt("-2147483649", i));
689     test(!toInt("2147483648", i));
690     test(!toUint("4294967296", ui));
691     test(!toLong("-9223372036854775809", l));
692     test(!toLong("9223372036854775808", l));
693     test(!toUlong("18446744073709551616", ul));
694 
695     test(!toLong("-0x12345678123456789", l));
696     test(!toLong("0x12345678123456789", l));
697     test(!toUlong("0x12345678123456789", ul));
698 
699     // hex
700     toInt("a", i, 16); test(i == 0xa);
701     toInt("b", i, 16); test(i == 0xb);
702     toInt("c", i, 16); test(i == 0xc);
703     toInt("d", i, 16); test(i == 0xd);
704     toInt("e", i, 16); test(i == 0xe);
705     toInt("f", i, 16); test(i == 0xf);
706     toInt("A", i, 16); test(i == 0xa);
707     toInt("B", i, 16); test(i == 0xb);
708     toInt("C", i, 16); test(i == 0xc);
709     toInt("D", i, 16); test(i == 0xd);
710     toInt("E", i, 16); test(i == 0xe);
711     toInt("F", i, 16); test(i == 0xf);
712 
713     toUlong("FF", ul, 16); test(ul == ubyte.max);
714     toUlong("FFFF", ul, 16); test(ul == ushort.max);
715     toUlong("ffffFFFF", ul, 16); test(ul == uint.max);
716     toUlong("ffffFFFFffffFFFF", ul, 16); test(ul == ulong.max);
717 
718     // oct
719     toInt("55", i, 8); test(i == 45);
720     toInt("100", i, 8); test(i == 64);
721 
722     // bin
723     toInt("10000", i, 2); test(i == 0b10000);
724 
725     // trim
726     toInt("    \t20", i); test(i == 20);
727     toInt("    \t-20", i); test(i == -20);
728     toInt("-    \t 20", i); test(i == -20);
729 
730     // recognise radix prefix
731     toUlong("0xFFFF", ul); test(ul == ushort.max);
732     toUlong("0XffffFFFF", ul); test(ul == uint.max);
733     toUlong("0o55", ul); test(ul == 45);
734     toUlong("0O100", ul); test(ul == 64);
735     toUlong("0b10000", ul); test(ul == 0b10000);
736     toUlong("0B1010", ul); test(ul == 0b1010);
737 
738     // recognise wrong radix prefix
739     test(!toUlong("0x10", ul, 10));
740     test(!toUlong("0b10", ul, 10));
741     test(!toUlong("0o10", ul, 10));
742 
743     // empty string handling (pasring error)
744     test(!toInt("", i));
745     test(!toUint("", ui));
746     test(!toLong("", l));
747     test(!toUlong("", ul));
748 }