1 /******************************************************************************* 2 3 A set of functions for converting strings to integer values. 4 5 This module is adapted from ocean.text.convert.Integer_tango. The functions have 6 been modified so that they do not throw exceptions, instead denoting errors 7 via their bool return value. This is more efficient and avoids the tango 8 style of always throwing new Exceptions upon error. 9 10 Copyright: 11 Copyright (c) 2004 Kris Bell. 12 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 13 All rights reserved. 14 15 License: 16 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 17 See LICENSE_TANGO.txt for details. 18 19 Version: 20 Initial release: Nov 2005 21 Ocean adaptation: July 2012 22 23 Authors: Kris Bell, Gavin Norman 24 25 *******************************************************************************/ 26 27 module ocean.text.convert.Integer; 28 29 import ocean.meta.types.Qualifiers; 30 31 import ocean.meta.traits.Basic; 32 33 import ocean.core.array.Search : find; 34 35 import ocean.math.Math; 36 37 import ocean.core.Verify; 38 39 40 /******************************************************************************* 41 42 Parse an integer value from the provided string. The exact type of integer 43 parsed is determined by the template parameter T (see below). 44 45 The string is inspected for a sign and an optional radix prefix. A radix may 46 be provided as an argument instead, whereupon it must match the prefix 47 (where present). When radix is set to zero, conversion will default to 48 decimal. 49 50 Params: 51 C = char type of string 52 T = type of integer to parse (must be byte, ubyte, short, ushort, 53 int, uint, long or ulong) 54 digits = string to parse 55 value = receives parsed integer 56 radix = specifies which radix to interpret the string as 57 58 Returns: 59 true if parsing succeeded 60 61 *******************************************************************************/ 62 63 public bool toInteger ( C, T ) ( C[] digits, out T value, uint radix = 0 ) 64 { 65 static if (is(T == byte)) 66 { 67 return toByte(digits, value, radix); 68 } 69 else static if (is(T == ubyte)) 70 { 71 return toUbyte(digits, value, radix); 72 } 73 else static if (is(T == short)) 74 { 75 return toShort(digits, value, radix); 76 } 77 else static if (is(T == ushort)) 78 { 79 return toUshort(digits, value, radix); 80 } 81 else static if (is(T == int)) 82 { 83 return toInt(digits, value, radix); 84 } 85 else static if (is(T == uint)) 86 { 87 return toUint(digits, value, radix); 88 } 89 else static if (is(T == long)) 90 { 91 return toLong(digits, value, radix); 92 } 93 else static if (is(T == ulong)) 94 { 95 return toUlong(digits, value, radix); 96 } 97 else 98 { 99 static assert(false, "toInteger: T must be one of {byte, ubyte, short, " 100 ~ "ushort, int, uint, long, ulong}, not " ~ T.stringof); 101 } 102 } 103 104 105 /******************************************************************************* 106 107 Parse an integer value from the provided string. 108 109 The string is inspected for a sign and an optional radix prefix. A radix may 110 be provided as an argument instead, whereupon it must match the prefix 111 (where present). When radix is set to zero, conversion will default to 112 decimal. 113 114 Params: 115 T = char type of string 116 digits = string to parse 117 value = receives parsed integer 118 radix = specifies which radix to interpret the string as 119 120 Returns: 121 true if parsing succeeded 122 123 *******************************************************************************/ 124 125 public bool toByte ( T ) ( T[] digits, out byte value, uint radix = 0 ) 126 { 127 return toSignedInteger(digits, value, radix); 128 } 129 130 /// Ditto 131 public bool toUbyte ( T ) ( T[] digits, out ubyte value, uint radix = 0 ) 132 { 133 return toUnsignedInteger(digits, value, radix); 134 } 135 136 /// Ditto 137 public bool toShort ( T ) ( T[] digits, out short value, uint radix = 0 ) 138 { 139 return toSignedInteger(digits, value, radix); 140 } 141 142 /// Ditto 143 public bool toUshort ( T ) ( T[] digits, out ushort value, uint radix = 0 ) 144 { 145 return toUnsignedInteger(digits, value, radix); 146 } 147 148 /// Ditto 149 public bool toInt ( T ) ( T[] digits, out int value, uint radix = 0 ) 150 { 151 return toSignedInteger(digits, value, radix); 152 } 153 154 /// Ditto 155 public bool toUint ( T ) ( T[] digits, out uint value, uint radix = 0 ) 156 { 157 return toUnsignedInteger(digits, value, radix); 158 } 159 160 /// Ditto 161 public bool toLong ( T ) ( T[] digits, out long value, uint radix = 0 ) 162 { 163 return toSignedInteger(digits, value, radix); 164 } 165 166 /// Ditto 167 public bool toUlong ( T ) ( T[] digits, out ulong value, uint radix = 0 ) 168 { 169 return toUnsignedInteger(digits, value, radix); 170 } 171 172 173 /******************************************************************************* 174 175 Parses a floating point number represented as a string directly to an 176 integer value. 177 178 To represent the fractional part we multiply the value by the requested 179 amount of decimal points and add it up. For example: 180 181 "1.123" -> 1123 (decimal_points = 3) 182 "0.01" -> 10 (decimal_points = 3) 183 184 Any characters longer than the requested amount of decimal points will be 185 cut off: 186 187 "1.2345" -> 123 (decimal_points = 2) 188 "10.2030" -> 1020 (decimal_points = 2) 189 190 "1.2345" -> 1 (decimal_points = 0) 191 "10.2030" -> 10 (decimal_points = 0) 192 193 Params: 194 T = type of the integer 195 float_str = floating point number string to parse 196 value = out parameter containing the result 197 decimal_points = amount of decimal points to consider 198 199 Returns: 200 true if the parsing was successful, else false 201 202 *******************************************************************************/ 203 204 public bool floatStringToInt ( T = ulong ) ( cstring float_str, out T value, 205 size_t decimal_points = 0 ) 206 { 207 static immutable MaxDecimal = 16; 208 209 verify(decimal_points <= MaxDecimal); 210 211 T multiplier = pow(cast(T)10, decimal_points); 212 char[MaxDecimal] zeros_suffix_buf = '0'; 213 char[] zeros_suffix = zeros_suffix_buf[0 .. decimal_points]; 214 215 cstring[2] num_parts; 216 217 // Split string at '.' 218 auto idx = find(float_str, '.'); 219 220 if (idx == float_str.length) 221 { 222 num_parts[0] = float_str; 223 num_parts[1] = zeros_suffix; 224 } 225 else 226 { 227 num_parts[0] = float_str[0 .. idx]; 228 num_parts[1] = float_str[idx+1..$]; 229 } 230 231 // Cut off if too long 232 if (num_parts[1].length > decimal_points) 233 num_parts[1].length = decimal_points; 234 235 // Fill with zeros if too short 236 if (num_parts[1].length < decimal_points) 237 { 238 zeros_suffix[0 .. num_parts[1].length] = num_parts[1]; 239 num_parts[1] = zeros_suffix; 240 } 241 242 if (!toUlong(num_parts[0], value)) 243 return false; 244 245 T frac_value; 246 247 if (num_parts[1].length > 0 && !toUlong(num_parts[1], frac_value)) 248 return false; 249 250 value *= multiplier; 251 value += frac_value; 252 253 return true; 254 } 255 256 version (unittest) 257 { 258 import ocean.core.Test; 259 } 260 261 unittest 262 { 263 void testWith ( cstring str, ulong result, size_t dec_points ) 264 { 265 ulong ret; 266 test(floatStringToInt(str, ret, dec_points)); 267 test!("==")(ret, result); 268 } 269 270 testWith("0.16", 160, 3); 271 testWith("0.59", 590, 3); 272 testWith("3.29", 3_290, 3); 273 testWith("0.16", 160, 3); 274 testWith("4.00", 4_000, 3); 275 testWith("3.5993754486719", 3_599, 3); 276 testWith("0.99322729901677", 993, 3); 277 testWith("1.05", 1_050, 3); 278 testWith("0.5", 500, 3); 279 testWith("2", 2_000, 3); 280 281 testWith("2", 2, 0); 282 testWith("2.1", 2, 0); 283 testWith("2.123", 2, 0); 284 testWith("2.123456", 2, 0); 285 286 testWith("0.1", 10, 2); 287 testWith("1.1", 110, 2); 288 testWith("1", 100, 2); 289 testWith("01", 100, 2); 290 testWith("10.10", 1010, 2); 291 testWith("225.04", 22504, 2); 292 testWith("225.100000000000004", 22510, 2); 293 testWith("225.000000000000004", 22500, 2); 294 testWith("225.009999", 22500, 2); 295 296 ulong result; 297 test(!floatStringToInt("225.0.09999", result, 2)); 298 test(!floatStringToInt("10,10", result, 2)); 299 test(!floatStringToInt("0,1", result, 2)); 300 test(!floatStringToInt("1,1", result, 2)); 301 test(!floatStringToInt("6,6", result, 2)); 302 } 303 304 305 /******************************************************************************* 306 307 Parse a signed integer value from the provided string. 308 309 The string is inspected for a sign and an optional radix prefix. A radix may 310 be provided as an argument instead, whereupon it must match the prefix 311 (where present). When radix is set to zero, conversion will default to 312 decimal. 313 314 Params: 315 T = char type of string 316 I = type of integer to extract 317 digits = string to parse 318 value = receives parsed integer 319 radix = specifies which radix to interpret the string as 320 321 Returns: 322 true if parsing succeeded 323 324 *******************************************************************************/ 325 326 private bool toSignedInteger ( T, I ) ( T[] digits, out I value, uint radix = 0 ) 327 { 328 static assert(isSignedIntegerType!(I), "I must be signed integer type."); 329 static assert(I.max <= long.max, "I cannot be larger than long."); 330 331 static if (I.max < long.max) 332 { 333 long long_value; 334 if ( !toSignedInteger(digits, long_value, radix) ) 335 { 336 return false; 337 } 338 339 if ( long_value > value.max || long_value < value.min ) 340 { 341 return false; 342 } 343 344 value = cast(I) long_value; 345 return true; 346 } 347 else 348 { 349 static assert(is(I == long), 350 "Largest signed integer type should be long."); 351 352 bool negative; 353 uint len; 354 ulong x; 355 356 auto trimmed = trim(digits, negative, radix); 357 convert(digits[trimmed..$], x, len, radix); 358 359 if (len == 0 || trimmed + len < digits.length) 360 { 361 return false; 362 } 363 364 if ((negative && -x < value.min) || (!negative && x > value.max)) 365 { 366 return false; 367 } 368 369 value = cast(long)(negative ? -x : x); 370 return true; 371 } 372 } 373 374 375 /******************************************************************************* 376 377 Parse an unsigned integer value from the provided string. 378 379 The string is inspected for a sign and an optional radix prefix. A radix may 380 be provided as an argument instead, whereupon it must match the prefix 381 (where present). When radix is set to zero, conversion will default to 382 decimal. 383 384 Params: 385 T = char type of string 386 U = type of unsigned integer to extract 387 digits = string to parse 388 value = receives parsed unsigned integer 389 radix = specifies which radix to interpret the string as 390 391 Returns: 392 true if parsing succeeded 393 394 *******************************************************************************/ 395 396 private bool toUnsignedInteger ( T, U ) ( T[] digits, out U value, uint radix = 0 ) 397 { 398 static assert(isUnsignedIntegerType!(U), "U must be unsigned integer type."); 399 400 static if (U.max < ulong.max) 401 { 402 ulong long_value; 403 if (!toUnsignedInteger(digits, long_value, radix)) 404 { 405 return false; 406 } 407 408 if (long_value > value.max || long_value < value.min) 409 { 410 return false; 411 } 412 413 value = cast(typeof(value)) long_value; 414 return true; 415 } 416 else 417 { 418 static assert(is(U == ulong), 419 "Largest unsigned integer type should be ulong."); 420 421 bool negative; 422 uint len; 423 ulong x; 424 425 auto trimmed = trim(digits, negative, radix); 426 if ( negative ) 427 { 428 return false; 429 } 430 431 convert(digits[trimmed..$], x, len, radix); 432 if (len == 0 || trimmed + len < digits.length) 433 { 434 return false; 435 } 436 437 value = x; 438 return true; 439 } 440 } 441 442 443 /******************************************************************************* 444 445 Convert the provided 'digits' into an integer value, without checking for a 446 sign or radix. The radix defaults to decimal (10). 447 448 Parsing fails (returning false) if 'digits' represents an integer of greater 449 magnitude than the type T can store. 450 451 Params: 452 T = char type of string 453 digits = string to parse 454 value = receives parsed integer 455 eaten = receives the number of characters parsed 456 radix = specifies which radix to interpret the string as 457 458 Returns: 459 true if parsing succeeded 460 461 *******************************************************************************/ 462 463 private bool convert ( T ) ( T[] digits, out ulong value, out uint eaten, 464 uint radix = 10 ) 465 { 466 foreach (Unqual!(T) c; digits) 467 { 468 if (c >= '0' && c <= '9') 469 {} 470 else 471 if (c >= 'a' && c <= 'z') 472 c -= 39; 473 else 474 if (c >= 'A' && c <= 'Z') 475 c -= 7; 476 else 477 break; 478 479 if ((c -= '0') < radix) 480 { 481 if ( value > 0 && radix > value.max / value ) 482 { 483 return false; // multiplication overflow 484 } 485 value *= radix; 486 487 if ( (value.max - value) < c ) 488 { 489 return false; // addition overflow 490 } 491 value += c; 492 493 ++eaten; 494 } 495 else 496 break; 497 } 498 499 return true; 500 } 501 502 503 /******************************************************************************* 504 505 Strip leading whitespace, extract an optional +/- sign, and an optional 506 radix prefix. If the radix value matches an optional prefix, or the radix is 507 zero, the prefix will be consumed and assigned. Where the radix is non zero 508 and does not match an explicit prefix, the latter will remain unconsumed. 509 Otherwise, radix will default to 10. 510 511 Params: 512 T = char type of string 513 digits = string to parse 514 negative = set to true if the string indicates a negative number 515 radix = receives the radix parsed form the string 516 517 Returns: 518 the number of characters consumed 519 520 *******************************************************************************/ 521 522 private ptrdiff_t trim ( T ) ( T[] digits, ref bool negative, ref uint radix ) 523 { 524 Unqual!(T) c; 525 T* p = digits.ptr; 526 auto len = digits.length; 527 528 if (len) 529 { 530 // strip off whitespace and sign characters 531 for (c = *p; len; c = *++p, --len) 532 if (c is ' ' || c is '\t') 533 {} 534 else 535 if (c is '-') 536 negative = true; 537 else 538 if (c is '+') 539 negative = false; 540 else 541 break; 542 543 // strip off a radix specifier also? 544 auto r = radix; 545 if (c is '0' && len > 1) 546 switch (*++p) 547 { 548 case 'x': 549 case 'X': 550 ++p; 551 r = 16; 552 break; 553 554 case 'b': 555 case 'B': 556 ++p; 557 r = 2; 558 break; 559 560 case 'o': 561 case 'O': 562 ++p; 563 r = 8; 564 break; 565 566 default: 567 --p; 568 break; 569 } 570 571 // default the radix to 10 572 if (r is 0) 573 radix = 10; 574 else 575 // explicit radix must match (optional) prefix 576 if (radix != r) 577 { 578 if (radix) 579 p -= 2; 580 else 581 radix = r; 582 } 583 } 584 585 // return number of characters eaten 586 return (p - digits.ptr); 587 } 588 589 590 591 /******************************************************************************* 592 593 Unit test 594 595 *******************************************************************************/ 596 597 unittest 598 { 599 byte b; 600 ubyte ub; 601 short s; 602 ushort us; 603 int i; 604 uint ui; 605 long l; 606 ulong ul; 607 608 // basic functionality 609 toByte("1", b); test(b == 1); 610 toUbyte("1", ub); test(ub == 1); 611 toShort("1", s); test(s == 1); 612 toUshort("1", us); test(us == 1); 613 toInt("1", i); test(i == 1); 614 toUint("1", ui); test(ui == 1); 615 toLong("1", l); test(l == 1); 616 toUlong("1", ul); test(ul == 1); 617 618 // basic functionality with wide chars 619 toByte("1"w, b); test(b == 1); 620 toUbyte("1"w, ub); test(ub == 1); 621 toShort("1"w, s); test(s == 1); 622 toUshort("1"w, us); test(us == 1); 623 toInt("1"w, i); test(i == 1); 624 toUint("1"w, ui); test(ui == 1); 625 toLong("1"w, l); test(l == 1); 626 toUlong("1"w, ul); test(ul == 1); 627 628 // basic functionality with double chars 629 toByte("1"d, b); test(b == 1); 630 toUbyte("1"d, ub); test(ub == 1); 631 toShort("1"d, s); test(s == 1); 632 toUshort("1"d, us); test(us == 1); 633 toInt("1"d, i); test(i == 1); 634 toUint("1"d, ui); test(ui == 1); 635 toLong("1"d, l); test(l == 1); 636 toUlong("1"d, ul); test(ul == 1); 637 638 // basic signed functionality 639 toByte("+1", b); test(b == 1); 640 toUbyte("+1", ub); test(ub == 1); 641 toShort("+1", s); test(s == 1); 642 toUshort("+1", us); test(us == 1); 643 toInt("+1", i); test(i == 1); 644 toUint("+1", ui); test(ui == 1); 645 toLong("+1", l); test(l == 1); 646 toUlong("+1", ul); test(ul == 1); 647 648 toByte("-1", b); test(b == -1); 649 test(!toUbyte("-1", ub)); 650 toShort("-1", s); test(s == -1); 651 test(!toUshort("-1", us)); 652 toInt("-1", i); test(i == -1); 653 test(!toUint("-1", ui)); 654 toLong("-1", l); test(l == -1); 655 test(!toUlong("-1", ul)); 656 657 // basic functionality + radix 658 toByte("1", b, 10); test(b == 1); 659 toUbyte("1", ub, 10); test(ub == 1); 660 toShort("1", s, 10); test(s == 1); 661 toUshort("1", us, 10); test(us == 1); 662 toInt("1", i, 10); test(i == 1); 663 toUint("1", ui, 10); test(ui == 1); 664 toLong("1", l, 10); test(l == 1); 665 toUlong("1", ul, 10); test(ul == 1); 666 667 // numerical limits 668 toByte("-128", b); test(b == byte.min); 669 toByte("127", b); test(b == byte.max); 670 toUbyte("255", ub); test(ub == ubyte.max); 671 toShort("-32768", s); test(s == short.min); 672 toShort("32767", s); test(s == short.max); 673 toUshort("65535", us); test(us == ushort.max); 674 toInt("-2147483648", i); test(i == int.min); 675 toInt("2147483647", i); test(i == int.max); 676 toUint("4294967295", ui); test(ui == uint.max); 677 toLong("-9223372036854775808", l); test(l == long.min); 678 toLong("9223372036854775807", l); test(l == long.max); 679 toUlong("18446744073709551615", ul); test(ul == ulong.max); 680 681 // beyond numerical limits 682 test(!toByte("-129", b)); 683 test(!toByte("128", b)); 684 test(!toUbyte("256", ub)); 685 test(!toShort("-32769", s)); 686 test(!toShort("32768", s)); 687 test(!toUshort("65536", us)); 688 test(!toInt("-2147483649", i)); 689 test(!toInt("2147483648", i)); 690 test(!toUint("4294967296", ui)); 691 test(!toLong("-9223372036854775809", l)); 692 test(!toLong("9223372036854775808", l)); 693 test(!toUlong("18446744073709551616", ul)); 694 695 test(!toLong("-0x12345678123456789", l)); 696 test(!toLong("0x12345678123456789", l)); 697 test(!toUlong("0x12345678123456789", ul)); 698 699 // hex 700 toInt("a", i, 16); test(i == 0xa); 701 toInt("b", i, 16); test(i == 0xb); 702 toInt("c", i, 16); test(i == 0xc); 703 toInt("d", i, 16); test(i == 0xd); 704 toInt("e", i, 16); test(i == 0xe); 705 toInt("f", i, 16); test(i == 0xf); 706 toInt("A", i, 16); test(i == 0xa); 707 toInt("B", i, 16); test(i == 0xb); 708 toInt("C", i, 16); test(i == 0xc); 709 toInt("D", i, 16); test(i == 0xd); 710 toInt("E", i, 16); test(i == 0xe); 711 toInt("F", i, 16); test(i == 0xf); 712 713 toUlong("FF", ul, 16); test(ul == ubyte.max); 714 toUlong("FFFF", ul, 16); test(ul == ushort.max); 715 toUlong("ffffFFFF", ul, 16); test(ul == uint.max); 716 toUlong("ffffFFFFffffFFFF", ul, 16); test(ul == ulong.max); 717 718 // oct 719 toInt("55", i, 8); test(i == 45); 720 toInt("100", i, 8); test(i == 64); 721 722 // bin 723 toInt("10000", i, 2); test(i == 0b10000); 724 725 // trim 726 toInt(" \t20", i); test(i == 20); 727 toInt(" \t-20", i); test(i == -20); 728 toInt("- \t 20", i); test(i == -20); 729 730 // recognise radix prefix 731 toUlong("0xFFFF", ul); test(ul == ushort.max); 732 toUlong("0XffffFFFF", ul); test(ul == uint.max); 733 toUlong("0o55", ul); test(ul == 45); 734 toUlong("0O100", ul); test(ul == 64); 735 toUlong("0b10000", ul); test(ul == 0b10000); 736 toUlong("0B1010", ul); test(ul == 0b1010); 737 738 // recognise wrong radix prefix 739 test(!toUlong("0x10", ul, 10)); 740 test(!toUlong("0b10", ul, 10)); 741 test(!toUlong("0o10", ul, 10)); 742 743 // empty string handling (pasring error) 744 test(!toInt("", i)); 745 test(!toUint("", ui)); 746 test(!toLong("", l)); 747 test(!toUlong("", ul)); 748 }