1 /******************************************************************************* 2 3 A set of functions for converting between string and integer 4 values. 5 6 Applying the D "import alias" mechanism to this module is highly 7 recommended, in order to limit namespace pollution: 8 --- 9 import Integer = ocean.text.convert.Integer_tango; 10 11 auto i = Integer.parse ("32767"); 12 --- 13 14 Copyright: 15 Copyright (c) 2004 Kris Bell. 16 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 17 All rights reserved. 18 19 License: 20 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 21 See LICENSE_TANGO.txt for details. 22 23 Version: Initial release: Nov 2005 24 25 Authors: Kris 26 27 *******************************************************************************/ 28 29 module ocean.text.convert.Integer_tango; 30 31 import ocean.meta.types.Qualifiers; 32 import ocean.core.ExceptionDefinitions; 33 import ocean.core.Verify; 34 import ocean.meta.traits.Basic; 35 36 /****************************************************************************** 37 38 Parse an integer value from the provided 'digits' string. 39 40 The string is inspected for a sign and an optional radix 41 prefix. A radix may be provided as an argument instead, 42 whereupon it must match the prefix (where present). When 43 radix is set to zero, conversion will default to decimal. 44 45 Throws: IllegalArgumentException where the input text is not parsable 46 in its entirety. 47 48 See_also: the low level functions parse() and convert() 49 50 ******************************************************************************/ 51 52 int toInt(T) (T[] digits, uint radix=0) 53 { 54 auto x = toLong (digits, radix); 55 if (x > int.max) 56 throw new IllegalArgumentException ("Integer.toInt :: integer overflow"); 57 return cast(int) x; 58 } 59 60 /****************************************************************************** 61 62 Parse an integer value from the provided 'digits' string. 63 64 The string is inspected for a sign and an optional radix 65 prefix. A radix may be provided as an argument instead, 66 whereupon it must match the prefix (where present). When 67 radix is set to zero, conversion will default to decimal. 68 69 Throws: IllegalArgumentException where the input text is not parsable 70 in its entirety. 71 72 See_also: the low level functions parse() and convert() 73 74 ******************************************************************************/ 75 76 long toLong(T) (T[] digits, uint radix=0) 77 { 78 uint len; 79 80 auto x = parse (digits, radix, &len); 81 if (len < digits.length) 82 throw new IllegalArgumentException ("Integer.toLong :: invalid literal"); 83 return x; 84 } 85 86 /****************************************************************************** 87 88 Parse an unsignedinteger value from the provided 'digits' string. 89 90 The string is inspected for an optional radix prefix. A 91 radix may be provided as an argument instead, whereupon 92 it must match the prefix (where present). When radix is 93 set to zero, conversion will default to decimal. 94 95 Throws: IllegalArgumentException where the input text is not parsable 96 in its entirety. 97 98 See_also: the low level functions parse() and convert() 99 100 ******************************************************************************/ 101 102 ulong toUlong(T) (T[] digits, uint radix=0) 103 { 104 bool sign = false; 105 106 auto eaten = trim (digits, sign, radix); 107 if (sign) 108 throw new IllegalArgumentException ("Integer.toUlong :: invalid literal"); 109 110 uint len = 0; 111 auto value = convert (digits[eaten..$], radix, &len); 112 if (len == 0 || eaten + len < digits.length) 113 throw new IllegalArgumentException ("Integer.toUlong :: invalid literal"); 114 115 return value; 116 } 117 118 /****************************************************************************** 119 120 Wrapper to make life simpler. Returns a text version 121 of the provided value. 122 123 See format() for details 124 125 ******************************************************************************/ 126 127 char[] toString (long i, char[] fmt = null) 128 { 129 char[66] tmp = void; 130 return format (tmp, i, fmt).dup; 131 } 132 133 /****************************************************************************** 134 135 Wrapper to make life simpler. Returns a text version 136 of the provided value. 137 138 See format() for details 139 140 ******************************************************************************/ 141 142 wchar[] toString16 (long i, wchar[] fmt = null) 143 { 144 wchar[66] tmp = void; 145 return format (tmp, i, fmt).dup; 146 } 147 148 /****************************************************************************** 149 150 Wrapper to make life simpler. Returns a text version 151 of the provided value. 152 153 See format() for details 154 155 ******************************************************************************/ 156 157 dchar[] toString32 (long i, dchar[] fmt = null) 158 { 159 dchar[66] tmp = void; 160 return format (tmp, i, fmt).dup; 161 } 162 163 /******************************************************************************* 164 165 Supports format specifications via an array, where format follows 166 the notation given below: 167 --- 168 type width prefix 169 --- 170 171 Type is one of [d, g, u, b, x, o] or uppercase equivalent, and 172 dictates the conversion radix or other semantics. 173 174 Width is optional and indicates a minimum width for zero-padding, 175 while the optional prefix is one of ['#', ' ', '+'] and indicates 176 what variety of prefix should be placed in the output. e.g. 177 --- 178 "d" => integer 179 "u" => unsigned 180 "o" => octal 181 "b" => binary 182 "x" => hexadecimal 183 "X" => hexadecimal uppercase 184 185 "d+" => integer prefixed with "+" 186 "b#" => binary prefixed with "0b" 187 "x#" => hexadecimal prefixed with "0x" 188 "X#" => hexadecimal prefixed with "0X" 189 190 "d8" => decimal padded to 8 places as required 191 "b8" => binary padded to 8 places as required 192 "b8#" => binary padded to 8 places and prefixed with "0b" 193 --- 194 195 Note that the specified width is exclusive of the prefix, though 196 the width padding will be shrunk as necessary in order to ensure 197 a requested prefix can be inserted into the provided output. 198 199 *******************************************************************************/ 200 201 const(T)[] format(T, N) (T[] dst, N i, in T[] fmt = null) 202 { 203 static assert(isIntegerType!(N), 204 "Integer_tango.format only supports integers"); 205 206 char pre, 207 type; 208 int width; 209 210 decode (fmt, type, pre, width); 211 return formatter (dst, i, type, pre, width); 212 } 213 214 private void decode(T) (T[] fmt, ref char type, out char pre, out int width) 215 { 216 if (fmt.length is 0) 217 type = 'd'; 218 else 219 { 220 type = cast(char) fmt[0]; 221 if (fmt.length > 1) 222 { 223 auto p = &fmt[1]; 224 for (int j=1; j < fmt.length; ++j, ++p) 225 { 226 if (*p >= '0' && *p <= '9') 227 width = width * 10 + (*p - '0'); 228 else 229 pre = cast(char) *p; 230 } 231 } 232 } 233 } 234 235 private struct _FormatterInfo(T) 236 { 237 byte radix; 238 T[] prefix; 239 T[] numbers; 240 } 241 242 const(T)[] formatter(T, N) (T[] dst, N i_, char type, char pre, int width) 243 { 244 static assert(isIntegerType!(N), 245 "Integer_tango.formatter only supports integers"); 246 Unqual!(N) i = i_; 247 248 249 static immutable immutable(T)[] lower = "0123456789abcdef"; 250 static immutable immutable(T)[] upper = "0123456789ABCDEF"; 251 252 alias _FormatterInfo!(immutable(T)) Info; 253 254 static immutable Info[] formats = [ 255 { 10, null, lower}, 256 { -10, "-" , lower}, 257 { 10, " " , lower}, 258 { 10, "+" , lower}, 259 { 2, "0b", lower}, 260 { 8, "0o", lower}, 261 { 16, "0x", lower}, 262 { 16, "0X", upper}, 263 ]; 264 265 ubyte index; 266 int len = cast(int) dst.length; 267 268 if (len) 269 { 270 switch (type) 271 { 272 case 'd': 273 case 'D': 274 case 'g': 275 case 'G': 276 if (i < 0) 277 index = 1; 278 else 279 if (pre is ' ') 280 index = 2; 281 else 282 if (pre is '+') 283 index = 3; 284 goto case; 285 case 'u': 286 case 'U': 287 pre = '#'; 288 break; 289 290 case 'b': 291 case 'B': 292 index = 4; 293 break; 294 295 case 'o': 296 case 'O': 297 index = 5; 298 break; 299 300 case 'x': 301 index = 6; 302 break; 303 304 case 'X': 305 index = 7; 306 break; 307 308 default: 309 return cast(T[])"{unknown format '"~cast(T)type~"'}"; 310 } 311 312 auto info = &formats[index]; 313 auto numbers = info.numbers; 314 auto radix = info.radix; 315 316 // convert number to text 317 auto p = dst.ptr + len; 318 319 320 // Base 10 formatting 321 if (index <= 3 && index) 322 { 323 verify((i >= 0 && radix > 0) || (i < 0 && radix < 0)); 324 325 do 326 *--p = numbers[abs(i % radix)]; 327 while ((i /= radix) && --len); 328 } 329 else // Those numbers are not signed 330 { 331 ulong v = reinterpretInteger!(ulong)(i); 332 do 333 *--p = numbers[v % radix]; 334 while ((v /= radix) && --len); 335 } 336 337 auto prefix = (pre is '#') ? info.prefix : null; 338 if (len > prefix.length) 339 { 340 len -= prefix.length + 1; 341 342 // prefix number with zeros? 343 if (width) 344 { 345 width = cast(int) (dst.length - width - prefix.length); 346 while (len > width && len > 0) 347 { 348 *--p = '0'; 349 --len; 350 } 351 } 352 // write optional prefix string ... 353 dst [len .. len + prefix.length] = prefix; 354 355 // return slice of provided output buffer 356 return dst [len .. $]; 357 } 358 } 359 360 return "{output width too small}"; 361 } 362 363 /****************************************************************************** 364 365 Parse an integer value from the provided 'digits' string. 366 367 The string is inspected for a sign and an optional radix 368 prefix. A radix may be provided as an argument instead, 369 whereupon it must match the prefix (where present). When 370 radix is set to zero, conversion will default to decimal. 371 372 A non-null 'ate' will return the number of characters used 373 to construct the returned value. 374 375 Throws: none. The 'ate' param should be checked for valid input. 376 377 ******************************************************************************/ 378 379 long parse(T) (T[] digits, uint radix=0, uint* ate=null) 380 { 381 bool sign; 382 383 auto eaten = trim (digits, sign, radix); 384 auto value = convert (digits[eaten..$], radix, ate); 385 386 // check *ate > 0 to make sure we don't parse "-" as 0. 387 if (ate && *ate > 0) 388 *ate += eaten; 389 390 return cast(long) (sign ? -value : value); 391 } 392 393 /****************************************************************************** 394 395 Convert the provided 'digits' into an integer value, 396 without checking for a sign or radix. The radix defaults 397 to decimal (10). 398 399 Returns the value and updates 'ate' with the number of 400 characters consumed. 401 402 Throws: none. The 'ate' param should be checked for valid input. 403 404 ******************************************************************************/ 405 406 ulong convert(T) (T[] digits, uint radix=10, uint* ate=null) 407 { 408 uint eaten; 409 ulong value; 410 411 foreach (Unqual!(T) c; digits) 412 { 413 if (c >= '0' && c <= '9') 414 {} 415 else 416 if (c >= 'a' && c <= 'z') 417 c -= 39; 418 else 419 if (c >= 'A' && c <= 'Z') 420 c -= 7; 421 else 422 break; 423 424 if ((c -= '0') < radix) 425 { 426 value = value * radix + c; 427 ++eaten; 428 } 429 else 430 break; 431 } 432 433 if (ate) 434 *ate = eaten; 435 436 return value; 437 } 438 439 /****************************************************************************** 440 441 Strip leading whitespace, extract an optional +/- sign, 442 and an optional radix prefix. If the radix value matches 443 an optional prefix, or the radix is zero, the prefix will 444 be consumed and assigned. Where the radix is non zero and 445 does not match an explicit prefix, the latter will remain 446 unconsumed. Otherwise, radix will default to 10. 447 448 Returns the number of characters consumed. 449 450 ******************************************************************************/ 451 452 uint trim(T) (T[] digits, ref bool sign, ref uint radix) 453 { 454 Unqual!(T) c; 455 auto p = digits.ptr; 456 auto len = digits.length; 457 458 if (len) 459 { 460 // strip off whitespace and sign characters 461 for (c = *p; len; c = *++p, --len) 462 if (c is ' ' || c is '\t') 463 {} 464 else 465 if (c is '-') 466 sign = true; 467 else 468 if (c is '+') 469 sign = false; 470 else 471 break; 472 473 // strip off a radix specifier also? 474 auto r = radix; 475 if (c is '0' && len > 1) 476 { 477 switch (*++p) 478 { 479 case 'x': 480 case 'X': 481 ++p; 482 r = 16; 483 break; 484 485 case 'b': 486 case 'B': 487 ++p; 488 r = 2; 489 break; 490 491 case 'o': 492 case 'O': 493 ++p; 494 r = 8; 495 break; 496 497 default: 498 --p; 499 break; 500 } 501 } 502 503 // default the radix to 10 504 if (r is 0) 505 radix = 10; 506 else 507 { 508 // explicit radix must match (optional) prefix 509 if (radix != r) 510 { 511 if (radix) 512 p -= 2; 513 else 514 radix = r; 515 } 516 } 517 } 518 519 // return number of characters eaten 520 auto charcount = (p - digits.ptr); 521 assert(charcount >= 0); 522 return cast(uint) charcount; 523 } 524 525 /****************************************************************************** 526 527 quick & dirty text-to-unsigned int converter. Use only when you 528 know what the content is, or use parse() or convert() instead. 529 530 Return the parsed uint 531 532 ******************************************************************************/ 533 534 uint atoi(T) (T[] s, int radix = 10) 535 { 536 uint value; 537 538 foreach (c; s) 539 if (c >= '0' && c <= '9') 540 value = value * radix + (c - '0'); 541 else 542 break; 543 return value; 544 } 545 546 547 /****************************************************************************** 548 549 quick & dirty unsigned to text converter, where the provided output 550 must be large enough to house the result (10 digits in the largest 551 case). For mainstream use, consider utilizing format() instead. 552 553 Returns a populated slice of the provided output 554 555 ******************************************************************************/ 556 557 T[] itoa(T) (T[] output, uint value, int radix = 10) 558 { 559 T* p = output.ptr + output.length; 560 561 do { 562 *--p = cast(T)(value % radix + '0'); 563 } while (value /= radix); 564 return output[cast(size_t) (p-output.ptr) .. $]; 565 } 566 567 /****************************************************************************** 568 569 Consume a number from the input without converting it. Argument 570 'fp' enables floating-point consumption. Supports hex input for 571 numbers which are prefixed appropriately 572 573 Since version 0.99.9 574 575 ******************************************************************************/ 576 577 T[] consume(T) (T[] src, bool fp=false) 578 { 579 Unqual!(T) c; 580 bool sign; 581 uint radix; 582 583 // remove leading space, and sign 584 auto e = src.ptr + src.length; 585 auto p = src.ptr + trim (src, sign, radix); 586 auto b = p; 587 588 // bail out if the string is empty 589 if (src.length is 0 || p > &src[$-1]) 590 return null; 591 592 // read leading digits 593 for (c=*p; p < e && ((c >= '0' && c <= '9') || 594 (radix is 16 && ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))));) 595 c = *++p; 596 597 if (fp) 598 { 599 // gobble up a point 600 if (c is '.' && p < e) 601 c = *++p; 602 603 // read fractional digits 604 while (c >= '0' && c <= '9' && p < e) 605 c = *++p; 606 607 // did we consume anything? 608 if (p > b) 609 { 610 // consume exponent? 611 if ((c is 'e' || c is 'E') && p < e ) 612 { 613 c = *++p; 614 if (c is '+' || c is '-') 615 c = *++p; 616 while (c >= '0' && c <= '9' && p < e) 617 c = *++p; 618 } 619 } 620 } 621 return src [0 .. p-src.ptr]; 622 } 623 624 /******************************************************************************* 625 626 Get the absolute value of a number 627 628 The number should not be == `T.min` if `T` is a signed number. 629 Since signed numbers use the two's complement, `-T.min` cannot be 630 represented: It would be `T.max + 1`. 631 Trying to calculate `-T.min` causes an integer overflow and results in 632 `T.min`. 633 634 Params: 635 x = A value between `T.min` (exclusive for signed number) and `T.max` 636 637 Returns: 638 The absolute value of `x` (`|x|`) 639 640 *******************************************************************************/ 641 642 private T abs (T) (T x) 643 { 644 static if (T.min < 0) 645 { 646 verify(x != T.min, 647 "abs cannot be called with x == " ~ T.stringof ~ ".min"); 648 } 649 return x >= 0 ? x : -x; 650 } 651 652 653 /******************************************************************************* 654 655 Truncates or zero-extend a value of type `From` to fit into `To`. 656 657 Getting the same binary representation of a number in a larger type can be 658 quite tedious, especially when it comes to negative numbers. 659 For example, turning `byte(-1)` into `long` or `ulong` gives different 660 result. 661 This functions allows to get the same exact binary representation of an 662 integral type into another. If the representation is truncating, it is 663 just a cast. If it is widening, it zero extends `val`. 664 665 Params: 666 To = Type to convert to 667 From = Type to convert from. If not specified, it is infered from 668 val, so it will be an `int` when passing a literal. 669 val = Value to reinterpret 670 671 Returns: 672 Binary representation of `val` typed as `To` 673 674 *******************************************************************************/ 675 676 private To reinterpretInteger (To, From) (From val) 677 { 678 static if (From.sizeof >= To.sizeof) 679 return cast(To) val; 680 else 681 { 682 static struct Reinterpreter 683 { 684 version (LittleEndian) From value; 685 // 0 padding 686 ubyte[To.sizeof - From.sizeof] pad; 687 version (BigEndian) From value; 688 } 689 690 Reinterpreter r = { value: val }; 691 return *(cast(To*) &r.value); 692 } 693 } 694 695 696 /****************************************************************************** 697 698 ******************************************************************************/ 699 700 debug (Integer) 701 { 702 import ocean.io.Stdout; 703 704 void main() 705 { 706 char[8] tmp; 707 708 Stdout.formatln ("d '{}'", format(tmp, 10)); 709 Stdout.formatln ("d '{}'", format(tmp, -10)); 710 711 Stdout.formatln ("u '{}'", format(tmp, 10L, "u")); 712 Stdout.formatln ("U '{}'", format(tmp, 10L, "U")); 713 Stdout.formatln ("g '{}'", format(tmp, 10L, "g")); 714 Stdout.formatln ("G '{}'", format(tmp, 10L, "G")); 715 Stdout.formatln ("o '{}'", format(tmp, 10L, "o")); 716 Stdout.formatln ("O '{}'", format(tmp, 10L, "O")); 717 Stdout.formatln ("b '{}'", format(tmp, 10L, "b")); 718 Stdout.formatln ("B '{}'", format(tmp, 10L, "B")); 719 Stdout.formatln ("x '{}'", format(tmp, 10L, "x")); 720 Stdout.formatln ("X '{}'", format(tmp, 10L, "X")); 721 722 Stdout.formatln ("d+ '{}'", format(tmp, 10L, "d+")); 723 Stdout.formatln ("ds '{}'", format(tmp, 10L, "d ")); 724 Stdout.formatln ("d# '{}'", format(tmp, 10L, "d#")); 725 Stdout.formatln ("x# '{}'", format(tmp, 10L, "x#")); 726 Stdout.formatln ("X# '{}'", format(tmp, 10L, "X#")); 727 Stdout.formatln ("b# '{}'", format(tmp, 10L, "b#")); 728 Stdout.formatln ("o# '{}'", format(tmp, 10L, "o#")); 729 730 Stdout.formatln ("d1 '{}'", format(tmp, 10L, "d1")); 731 Stdout.formatln ("d8 '{}'", format(tmp, 10L, "d8")); 732 Stdout.formatln ("x8 '{}'", format(tmp, 10L, "x8")); 733 Stdout.formatln ("X8 '{}'", format(tmp, 10L, "X8")); 734 Stdout.formatln ("b8 '{}'", format(tmp, 10L, "b8")); 735 Stdout.formatln ("o8 '{}'", format(tmp, 10L, "o8")); 736 737 Stdout.formatln ("d1# '{}'", format(tmp, 10L, "d1#")); 738 Stdout.formatln ("d6# '{}'", format(tmp, 10L, "d6#")); 739 Stdout.formatln ("x6# '{}'", format(tmp, 10L, "x6#")); 740 Stdout.formatln ("X6# '{}'", format(tmp, 10L, "X6#")); 741 742 Stdout.formatln ("b12# '{}'", format(tmp, 10L, "b12#")); 743 Stdout.formatln ("o12# '{}'", format(tmp, 10L, "o12#")).newline; 744 745 Stdout.formatln (consume("10")); 746 Stdout.formatln (consume("0x1f")); 747 Stdout.formatln (consume("0.123")); 748 Stdout.formatln (consume("0.123", true)); 749 Stdout.formatln (consume("0.123e-10", true)).newline; 750 751 Stdout.formatln (consume("10 s")); 752 Stdout.formatln (consume("0x1f s")); 753 Stdout.formatln (consume("0.123 s")); 754 Stdout.formatln (consume("0.123 s", true)); 755 Stdout.formatln (consume("0.123e-10 s", true)).newline; 756 } 757 }