1 /*******************************************************************************
2
3 A set of functions for converting strings to integer values.
4
5 This module is adapted from ocean.text.convert.Integer_tango. The functions have
6 been modified so that they do not throw exceptions, instead denoting errors
7 via their bool return value. This is more efficient and avoids the tango
8 style of always throwing new Exceptions upon error.
9
10 Copyright:
11 Copyright (c) 2004 Kris Bell.
12 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
13 All rights reserved.
14
15 License:
16 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
17 See LICENSE_TANGO.txt for details.
18
19 Version:
20 Initial release: Nov 2005
21 Ocean adaptation: July 2012
22
23 Authors: Kris Bell, Gavin Norman
24
25 *******************************************************************************/
26
27 module ocean.text.convert.Integer;
28
29 import ocean.transition;
30
31 import ocean.meta.traits.Basic;
32
33 import ocean.core.array.Search : find;
34
35 import ocean.math.Math;
36
37 import ocean.core.Verify;
38
39
40 /*******************************************************************************
41
42 Parse an integer value from the provided string. The exact type of integer
43 parsed is determined by the template parameter T (see below).
44
45 The string is inspected for a sign and an optional radix prefix. A radix may
46 be provided as an argument instead, whereupon it must match the prefix
47 (where present). When radix is set to zero, conversion will default to
48 decimal.
49
50 Params:
51 C = char type of string
52 T = type of integer to parse (must be byte, ubyte, short, ushort,
53 int, uint, long or ulong)
54 digits = string to parse
55 value = receives parsed integer
56 radix = specifies which radix to interpret the string as
57
58 Returns:
59 true if parsing succeeded
60
61 *******************************************************************************/
62
63 public bool toInteger ( C, T ) ( C[] digits, out T value, uint radix = 0 )
64 {
65 static if (is(T == byte))
66 {
67 return toByte(digits, value, radix);
68 }
69 else static if (is(T == ubyte))
70 {
71 return toUbyte(digits, value, radix);
72 }
73 else static if (is(T == short))
74 {
75 return toShort(digits, value, radix);
76 }
77 else static if (is(T == ushort))
78 {
79 return toUshort(digits, value, radix);
80 }
81 else static if (is(T == int))
82 {
83 return toInt(digits, value, radix);
84 }
85 else static if (is(T == uint))
86 {
87 return toUint(digits, value, radix);
88 }
89 else static if (is(T == long))
90 {
91 return toLong(digits, value, radix);
92 }
93 else static if (is(T == ulong))
94 {
95 return toUlong(digits, value, radix);
96 }
97 else
98 {
99 static assert(false, "toInteger: T must be one of {byte, ubyte, short, "
100 ~ "ushort, int, uint, long, ulong}, not " ~ T.stringof);
101 }
102 }
103
104
105 /*******************************************************************************
106
107 Parse an integer value from the provided string.
108
109 The string is inspected for a sign and an optional radix prefix. A radix may
110 be provided as an argument instead, whereupon it must match the prefix
111 (where present). When radix is set to zero, conversion will default to
112 decimal.
113
114 Params:
115 T = char type of string
116 digits = string to parse
117 value = receives parsed integer
118 radix = specifies which radix to interpret the string as
119
120 Returns:
121 true if parsing succeeded
122
123 *******************************************************************************/
124
125 public bool toByte ( T ) ( T[] digits, out byte value, uint radix = 0 )
126 {
127 return toSignedInteger(digits, value, radix);
128 }
129
130 /// Ditto
131 public bool toUbyte ( T ) ( T[] digits, out ubyte value, uint radix = 0 )
132 {
133 return toUnsignedInteger(digits, value, radix);
134 }
135
136 /// Ditto
137 public bool toShort ( T ) ( T[] digits, out short value, uint radix = 0 )
138 {
139 return toSignedInteger(digits, value, radix);
140 }
141
142 /// Ditto
143 public bool toUshort ( T ) ( T[] digits, out ushort value, uint radix = 0 )
144 {
145 return toUnsignedInteger(digits, value, radix);
146 }
147
148 /// Ditto
149 public bool toInt ( T ) ( T[] digits, out int value, uint radix = 0 )
150 {
151 return toSignedInteger(digits, value, radix);
152 }
153
154 /// Ditto
155 public bool toUint ( T ) ( T[] digits, out uint value, uint radix = 0 )
156 {
157 return toUnsignedInteger(digits, value, radix);
158 }
159
160 /// Ditto
161 public bool toLong ( T ) ( T[] digits, out long value, uint radix = 0 )
162 {
163 return toSignedInteger(digits, value, radix);
164 }
165
166 /// Ditto
167 public bool toUlong ( T ) ( T[] digits, out ulong value, uint radix = 0 )
168 {
169 return toUnsignedInteger(digits, value, radix);
170 }
171
172
173 /*******************************************************************************
174
175 Parses a floating point number represented as a string directly to an
176 integer value.
177
178 To represent the fractional part we multiply the value by the requested
179 amount of decimal points and add it up. For example:
180
181 "1.123" -> 1123 (decimal_points = 3)
182 "0.01" -> 10 (decimal_points = 3)
183
184 Any characters longer than the requested amount of decimal points will be
185 cut off:
186
187 "1.2345" -> 123 (decimal_points = 2)
188 "10.2030" -> 1020 (decimal_points = 2)
189
190 "1.2345" -> 1 (decimal_points = 0)
191 "10.2030" -> 10 (decimal_points = 0)
192
193 Params:
194 T = type of the integer
195 float_str = floating point number string to parse
196 value = out parameter containing the result
197 decimal_points = amount of decimal points to consider
198
199 Returns:
200 true if the parsing was successful, else false
201
202 *******************************************************************************/
203
204 public bool floatStringToInt ( T = ulong ) ( cstring float_str, out T value,
205 size_t decimal_points = 0 )
206 {
207 static immutable MaxDecimal = 16;
208
209 verify(decimal_points <= MaxDecimal);
210
211 T multiplier = pow(cast(T)10, decimal_points);
212 char[MaxDecimal] zeros_suffix_buf = '0';
213 char[] zeros_suffix = zeros_suffix_buf[0 .. decimal_points];
214
215 cstring[2] num_parts;
216
217 // Split string at '.'
218 auto idx = find(float_str, '.');
219
220 if (idx == float_str.length)
221 {
222 num_parts[0] = float_str;
223 num_parts[1] = zeros_suffix;
224 }
225 else
226 {
227 num_parts[0] = float_str[0 .. idx];
228 num_parts[1] = float_str[idx+1..$];
229 }
230
231 // Cut off if too long
232 if (num_parts[1].length > decimal_points)
233 num_parts[1].length = decimal_points;
234
235 // Fill with zeros if too short
236 if (num_parts[1].length < decimal_points)
237 {
238 zeros_suffix[0 .. num_parts[1].length] = num_parts[1];
239 num_parts[1] = zeros_suffix;
240 }
241
242 if (!toUlong(num_parts[0], value))
243 return false;
244
245 T frac_value;
246
247 if (num_parts[1].length > 0 && !toUlong(num_parts[1], frac_value))
248 return false;
249
250 value *= multiplier;
251 value += frac_value;
252
253 return true;
254 }
255
256 version ( UnitTest )
257 {
258 import ocean.core.Test;
259 }
260
261 unittest
262 {
263 void testWith ( cstring str, ulong result, size_t dec_points )
264 {
265 ulong ret;
266 test(floatStringToInt(str, ret, dec_points));
267 test!("==")(ret, result);
268 }
269
270 testWith("0.16", 160, 3);
271 testWith("0.59", 590, 3);
272 testWith("3.29", 3_290, 3);
273 testWith("0.16", 160, 3);
274 testWith("4.00", 4_000, 3);
275 testWith("3.5993754486719", 3_599, 3);
276 testWith("0.99322729901677", 993, 3);
277 testWith("1.05", 1_050, 3);
278 testWith("0.5", 500, 3);
279 testWith("2", 2_000, 3);
280
281 testWith("2", 2, 0);
282 testWith("2.1", 2, 0);
283 testWith("2.123", 2, 0);
284 testWith("2.123456", 2, 0);
285
286 testWith("0.1", 10, 2);
287 testWith("1.1", 110, 2);
288 testWith("1", 100, 2);
289 testWith("01", 100, 2);
290 testWith("10.10", 1010, 2);
291 testWith("225.04", 22504, 2);
292 testWith("225.100000000000004", 22510, 2);
293 testWith("225.000000000000004", 22500, 2);
294 testWith("225.009999", 22500, 2);
295
296 ulong result;
297 test(!floatStringToInt("225.0.09999", result, 2));
298 test(!floatStringToInt("10,10", result, 2));
299 test(!floatStringToInt("0,1", result, 2));
300 test(!floatStringToInt("1,1", result, 2));
301 test(!floatStringToInt("6,6", result, 2));
302 }
303
304
305 /*******************************************************************************
306
307 Parse a signed integer value from the provided string.
308
309 The string is inspected for a sign and an optional radix prefix. A radix may
310 be provided as an argument instead, whereupon it must match the prefix
311 (where present). When radix is set to zero, conversion will default to
312 decimal.
313
314 Params:
315 T = char type of string
316 I = type of integer to extract
317 digits = string to parse
318 value = receives parsed integer
319 radix = specifies which radix to interpret the string as
320
321 Returns:
322 true if parsing succeeded
323
324 *******************************************************************************/
325
326 private bool toSignedInteger ( T, I ) ( T[] digits, out I value, uint radix = 0 )
327 {
328 static assert(isSignedIntegerType!(I), "I must be signed integer type.");
329 static assert(I.max <= long.max, "I cannot be larger than long.");
330
331 static if (I.max < long.max)
332 {
333 long long_value;
334 if ( !toSignedInteger(digits, long_value, radix) )
335 {
336 return false;
337 }
338
339 if ( long_value > value.max || long_value < value.min )
340 {
341 return false;
342 }
343
344 value = cast(I) long_value;
345 return true;
346 }
347 else
348 {
349 static assert(is(I == long),
350 "Largest signed integer type should be long.");
351
352 bool negative;
353 uint len;
354 ulong x;
355
356 auto trimmed = trim(digits, negative, radix);
357 convert(digits[trimmed..$], x, len, radix);
358
359 if (len == 0 || trimmed + len < digits.length)
360 {
361 return false;
362 }
363
364 if ((negative && -x < value.min) || (!negative && x > value.max))
365 {
366 return false;
367 }
368
369 value = cast(long)(negative ? -x : x);
370 return true;
371 }
372 }
373
374
375 /*******************************************************************************
376
377 Parse an unsigned integer value from the provided string.
378
379 The string is inspected for a sign and an optional radix prefix. A radix may
380 be provided as an argument instead, whereupon it must match the prefix
381 (where present). When radix is set to zero, conversion will default to
382 decimal.
383
384 Params:
385 T = char type of string
386 U = type of unsigned integer to extract
387 digits = string to parse
388 value = receives parsed unsigned integer
389 radix = specifies which radix to interpret the string as
390
391 Returns:
392 true if parsing succeeded
393
394 *******************************************************************************/
395
396 private bool toUnsignedInteger ( T, U ) ( T[] digits, out U value, uint radix = 0 )
397 {
398 static assert(isUnsignedIntegerType!(U), "U must be unsigned integer type.");
399
400 static if (U.max < ulong.max)
401 {
402 ulong long_value;
403 if (!toUnsignedInteger(digits, long_value, radix))
404 {
405 return false;
406 }
407
408 if (long_value > value.max || long_value < value.min)
409 {
410 return false;
411 }
412
413 value = cast(typeof(value)) long_value;
414 return true;
415 }
416 else
417 {
418 static assert(is(U == ulong),
419 "Largest unsigned integer type should be ulong.");
420
421 bool negative;
422 uint len;
423 ulong x;
424
425 auto trimmed = trim(digits, negative, radix);
426 if ( negative )
427 {
428 return false;
429 }
430
431 convert(digits[trimmed..$], x, len, radix);
432 if (len == 0 || trimmed + len < digits.length)
433 {
434 return false;
435 }
436
437 value = x;
438 return true;
439 }
440 }
441
442
443 /*******************************************************************************
444
445 Convert the provided 'digits' into an integer value, without checking for a
446 sign or radix. The radix defaults to decimal (10).
447
448 Parsing fails (returning false) if 'digits' represents an integer of greater
449 magnitude than the type T can store.
450
451 Params:
452 T = char type of string
453 digits = string to parse
454 value = receives parsed integer
455 eaten = receives the number of characters parsed
456 radix = specifies which radix to interpret the string as
457
458 Returns:
459 true if parsing succeeded
460
461 *******************************************************************************/
462
463 private bool convert ( T ) ( T[] digits, out ulong value, out uint eaten,
464 uint radix = 10 )
465 {
466 foreach (Unqual!(T) c; digits)
467 {
468 if (c >= '0' && c <= '9')
469 {}
470 else
471 if (c >= 'a' && c <= 'z')
472 c -= 39;
473 else
474 if (c >= 'A' && c <= 'Z')
475 c -= 7;
476 else
477 break;
478
479 if ((c -= '0') < radix)
480 {
481 if ( value > 0 && radix > value.max / value )
482 {
483 return false; // multiplication overflow
484 }
485 value *= radix;
486
487 if ( (value.max - value) < c )
488 {
489 return false; // addition overflow
490 }
491 value += c;
492
493 ++eaten;
494 }
495 else
496 break;
497 }
498
499 return true;
500 }
501
502
503 /*******************************************************************************
504
505 Strip leading whitespace, extract an optional +/- sign, and an optional
506 radix prefix. If the radix value matches an optional prefix, or the radix is
507 zero, the prefix will be consumed and assigned. Where the radix is non zero
508 and does not match an explicit prefix, the latter will remain unconsumed.
509 Otherwise, radix will default to 10.
510
511 Params:
512 T = char type of string
513 digits = string to parse
514 negative = set to true if the string indicates a negative number
515 radix = receives the radix parsed form the string
516
517 Returns:
518 the number of characters consumed
519
520 *******************************************************************************/
521
522 private ptrdiff_t trim ( T ) ( T[] digits, ref bool negative, ref uint radix )
523 {
524 Unqual!(T) c;
525 T* p = digits.ptr;
526 auto len = digits.length;
527
528 if (len)
529 {
530 // strip off whitespace and sign characters
531 for (c = *p; len; c = *++p, --len)
532 if (c is ' ' || c is '\t')
533 {}
534 else
535 if (c is '-')
536 negative = true;
537 else
538 if (c is '+')
539 negative = false;
540 else
541 break;
542
543 // strip off a radix specifier also?
544 auto r = radix;
545 if (c is '0' && len > 1)
546 switch (*++p)
547 {
548 case 'x':
549 case 'X':
550 ++p;
551 r = 16;
552 break;
553
554 case 'b':
555 case 'B':
556 ++p;
557 r = 2;
558 break;
559
560 case 'o':
561 case 'O':
562 ++p;
563 r = 8;
564 break;
565
566 default:
567 --p;
568 break;
569 }
570
571 // default the radix to 10
572 if (r is 0)
573 radix = 10;
574 else
575 // explicit radix must match (optional) prefix
576 if (radix != r)
577 {
578 if (radix)
579 p -= 2;
580 else
581 radix = r;
582 }
583 }
584
585 // return number of characters eaten
586 return (p - digits.ptr);
587 }
588
589
590
591 /*******************************************************************************
592
593 Unit test
594
595 *******************************************************************************/
596
597 unittest
598 {
599 byte b;
600 ubyte ub;
601 short s;
602 ushort us;
603 int i;
604 uint ui;
605 long l;
606 ulong ul;
607
608 // basic functionality
609 toByte("1", b); test(b == 1);
610 toUbyte("1", ub); test(ub == 1);
611 toShort("1", s); test(s == 1);
612 toUshort("1", us); test(us == 1);
613 toInt("1", i); test(i == 1);
614 toUint("1", ui); test(ui == 1);
615 toLong("1", l); test(l == 1);
616 toUlong("1", ul); test(ul == 1);
617
618 // basic functionality with wide chars
619 toByte("1"w, b); test(b == 1);
620 toUbyte("1"w, ub); test(ub == 1);
621 toShort("1"w, s); test(s == 1);
622 toUshort("1"w, us); test(us == 1);
623 toInt("1"w, i); test(i == 1);
624 toUint("1"w, ui); test(ui == 1);
625 toLong("1"w, l); test(l == 1);
626 toUlong("1"w, ul); test(ul == 1);
627
628 // basic functionality with double chars
629 toByte("1"d, b); test(b == 1);
630 toUbyte("1"d, ub); test(ub == 1);
631 toShort("1"d, s); test(s == 1);
632 toUshort("1"d, us); test(us == 1);
633 toInt("1"d, i); test(i == 1);
634 toUint("1"d, ui); test(ui == 1);
635 toLong("1"d, l); test(l == 1);
636 toUlong("1"d, ul); test(ul == 1);
637
638 // basic signed functionality
639 toByte("+1", b); test(b == 1);
640 toUbyte("+1", ub); test(ub == 1);
641 toShort("+1", s); test(s == 1);
642 toUshort("+1", us); test(us == 1);
643 toInt("+1", i); test(i == 1);
644 toUint("+1", ui); test(ui == 1);
645 toLong("+1", l); test(l == 1);
646 toUlong("+1", ul); test(ul == 1);
647
648 toByte("-1", b); test(b == -1);
649 test(!toUbyte("-1", ub));
650 toShort("-1", s); test(s == -1);
651 test(!toUshort("-1", us));
652 toInt("-1", i); test(i == -1);
653 test(!toUint("-1", ui));
654 toLong("-1", l); test(l == -1);
655 test(!toUlong("-1", ul));
656
657 // basic functionality + radix
658 toByte("1", b, 10); test(b == 1);
659 toUbyte("1", ub, 10); test(ub == 1);
660 toShort("1", s, 10); test(s == 1);
661 toUshort("1", us, 10); test(us == 1);
662 toInt("1", i, 10); test(i == 1);
663 toUint("1", ui, 10); test(ui == 1);
664 toLong("1", l, 10); test(l == 1);
665 toUlong("1", ul, 10); test(ul == 1);
666
667 // numerical limits
668 toByte("-128", b); test(b == byte.min);
669 toByte("127", b); test(b == byte.max);
670 toUbyte("255", ub); test(ub == ubyte.max);
671 toShort("-32768", s); test(s == short.min);
672 toShort("32767", s); test(s == short.max);
673 toUshort("65535", us); test(us == ushort.max);
674 toInt("-2147483648", i); test(i == int.min);
675 toInt("2147483647", i); test(i == int.max);
676 toUint("4294967295", ui); test(ui == uint.max);
677 toLong("-9223372036854775808", l); test(l == long.min);
678 toLong("9223372036854775807", l); test(l == long.max);
679 toUlong("18446744073709551615", ul); test(ul == ulong.max);
680
681 // beyond numerical limits
682 test(!toByte("-129", b));
683 test(!toByte("128", b));
684 test(!toUbyte("256", ub));
685 test(!toShort("-32769", s));
686 test(!toShort("32768", s));
687 test(!toUshort("65536", us));
688 test(!toInt("-2147483649", i));
689 test(!toInt("2147483648", i));
690 test(!toUint("4294967296", ui));
691 test(!toLong("-9223372036854775809", l));
692 test(!toLong("9223372036854775808", l));
693 test(!toUlong("18446744073709551616", ul));
694
695 test(!toLong("-0x12345678123456789", l));
696 test(!toLong("0x12345678123456789", l));
697 test(!toUlong("0x12345678123456789", ul));
698
699 // hex
700 toInt("a", i, 16); test(i == 0xa);
701 toInt("b", i, 16); test(i == 0xb);
702 toInt("c", i, 16); test(i == 0xc);
703 toInt("d", i, 16); test(i == 0xd);
704 toInt("e", i, 16); test(i == 0xe);
705 toInt("f", i, 16); test(i == 0xf);
706 toInt("A", i, 16); test(i == 0xa);
707 toInt("B", i, 16); test(i == 0xb);
708 toInt("C", i, 16); test(i == 0xc);
709 toInt("D", i, 16); test(i == 0xd);
710 toInt("E", i, 16); test(i == 0xe);
711 toInt("F", i, 16); test(i == 0xf);
712
713 toUlong("FF", ul, 16); test(ul == ubyte.max);
714 toUlong("FFFF", ul, 16); test(ul == ushort.max);
715 toUlong("ffffFFFF", ul, 16); test(ul == uint.max);
716 toUlong("ffffFFFFffffFFFF", ul, 16); test(ul == ulong.max);
717
718 // oct
719 toInt("55", i, 8); test(i == 45);
720 toInt("100", i, 8); test(i == 64);
721
722 // bin
723 toInt("10000", i, 2); test(i == 0b10000);
724
725 // trim
726 toInt(" \t20", i); test(i == 20);
727 toInt(" \t-20", i); test(i == -20);
728 toInt("- \t 20", i); test(i == -20);
729
730 // recognise radix prefix
731 toUlong("0xFFFF", ul); test(ul == ushort.max);
732 toUlong("0XffffFFFF", ul); test(ul == uint.max);
733 toUlong("0o55", ul); test(ul == 45);
734 toUlong("0O100", ul); test(ul == 64);
735 toUlong("0b10000", ul); test(ul == 0b10000);
736 toUlong("0B1010", ul); test(ul == 0b1010);
737
738 // recognise wrong radix prefix
739 test(!toUlong("0x10", ul, 10));
740 test(!toUlong("0b10", ul, 10));
741 test(!toUlong("0o10", ul, 10));
742
743 // empty string handling (pasring error)
744 test(!toInt("", i));
745 test(!toUint("", ui));
746 test(!toLong("", l));
747 test(!toUlong("", ul));
748 }