1 /****************************************************************************** 2 3 C string and character tool functions 4 5 C string and character tool functions and null terminator utilities 6 7 Copyright: 8 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 9 All rights reserved. 10 11 License: 12 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 13 Alternatively, this file may be distributed under the terms of the Tango 14 3-Clause BSD License (see LICENSE_BSD.txt for details). 15 16 ******************************************************************************/ 17 18 module ocean.text.util.StringSearch; 19 20 21 import ocean.transition; 22 import ocean.core.Verify; 23 24 import c_stddef = core.stdc.stddef: wchar_t; 25 import c_wctype = core.stdc.wctype; 26 import c_ctype = core.stdc.ctype; 27 import c_string = ocean.stdc.string; 28 29 import ocean.math.Math: min; 30 31 version (UnitTest) 32 { 33 import ocean.core.Test; 34 } 35 36 37 /++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 38 39 /************************************************************************** 40 41 Descriptions for public alias methods 42 43 **************************************************************************/ 44 45 /** 46 * Returns the length of "str" without null terminator. 47 * 48 * Params: 49 * str = string (must be null terminated) 50 * 51 * Returns: 52 * length of "str" without null terminator 53 */ 54 size_t lengthOf ( Char* str ); 55 56 57 /** 58 * Tells whether "chr" is 59 * isCntrl -- a control character or 60 * isSpace -- whitespace or 61 * isGraph -- a character associated with a graph or 62 * isPrint -- printable or 63 * isAlpha -- a letter or 64 * isLower -- a lower case letter or 65 * isUpper -- an upper case letter or 66 * isAlNum -- a letter or a decimal digit or 67 * isDigit -- a decimalt digit or 68 * isHexDigit -- a hexadecimal digit. 69 * 70 * Params: 71 * chr = character to identify 72 * Returns: 73 * true if the character is of the specified class or false otherwise 74 */ 75 bool isCntrl ( Char chr ); 76 bool isSpace ( Char chr ); 77 78 bool isGraph ( Char chr ); 79 bool isPrint ( Char chr ); 80 bool isPunct ( Char chr ); 81 82 bool isAlpha ( Char chr ); 83 bool isAlNum ( Char chr ); 84 bool isDigit ( Char chr ); 85 bool isHexDigit ( Char chr ); 86 87 88 bool isLower ( Char chr ); 89 bool isUpper ( Char chr ); 90 91 92 /** 93 * Converts "chr" 94 * toLower -- to lower case or 95 * toUpper -- to upper case. 96 * 97 * Params: 98 * chr = character to convert 99 * 100 * Returns: 101 * converted character 102 */ 103 Char toLower ( Char chr ); 104 Char toUpper ( Char chr ); 105 106 107 /************************************************************************** 108 109 Explanations for private alias methods 110 111 **************************************************************************/ 112 113 /** 114 * Returns the index of the first occurrence of one of the characters in 115 * "charset" in "str". 116 * 117 * Params: 118 * str = string to scan for characters in "charset" 119 * charset = search character set 120 * Returns: 121 */ 122 size_t pLocateFirstInSet ( Char* str, Char* charset ); 123 124 125 /** 126 * Returns a pointer to the first occurrence of "pattern" in "str". 127 * 128 * Params: 129 * str = string to scan for "pattern" 130 * pattern = search pattern 131 * Returns: 132 */ 133 Char* pLocatePattern ( Char* str, Char* pattern ); 134 135 136 /** 137 * Moves src[0 .. n] to dst[0 .. n]. "src" and "dst" may overlap. 138 * 139 * Params: 140 * dst = pointer to destination 141 * src = pointer to source 142 * n = number of elements to move 143 * Returns: 144 */ 145 Char* pMemMove ( Char* dst, Char* src, size_t n ); 146 147 148 /** 149 * Returns a pointer to the first occurrence of "chr" within the first "n" 150 * elements of "str". 151 * 152 * Params: 153 * str = string to scan for "chr" 154 * chr = search character 155 * n = number of characters to scan for "chr" 156 * Returns: 157 */ 158 Char* pLocateBinChar ( Char* str, Char chr, size_t n ); 159 160 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++/ 162 163 /****************************************************************************** 164 165 StringSearch structure 166 167 ******************************************************************************/ 168 169 struct StringSearch ( bool wide_char = false ) 170 { 171 alias c_stddef.wchar_t WcharT; 172 173 static if (wide_char) 174 { 175 alias WcharT Char; 176 177 alias c_wctype.iswcntrl isCntrl; 178 alias c_wctype.iswspace isSpace; 179 180 alias c_wctype.iswgraph isGraph; 181 alias c_wctype.iswprint isPrint; 182 alias c_wctype.iswpunct isPunct; 183 184 alias c_wctype.iswalpha isAlpha; 185 alias c_wctype.iswalnum isAlNum; 186 alias c_wctype.iswdigit isDigit; 187 alias c_wctype.iswxdigit isHexDigit; 188 189 alias c_wctype.iswlower isLower; 190 alias c_wctype.iswupper isUpper; 191 192 alias c_wctype.towlower toLower; 193 alias c_wctype.towupper toUpper; 194 195 alias c_string.wcslen lengthOf; 196 197 alias c_string.wmemchr pLocateBinChar; 198 199 alias c_string.wcsstr pLocatePattern; 200 alias c_string.wmemmove pMemMove; 201 alias c_string.wcscspn pLocateFirstInSet; 202 203 alias c_string.wcstok pSplit; 204 } 205 else 206 { 207 alias char Char; 208 209 alias c_ctype.iscntrl isCntrl; 210 alias c_ctype.isspace isSpace; 211 212 alias c_ctype.isgraph isGraph; 213 alias c_ctype.isprint isPrint; 214 alias c_ctype.ispunct isPunct; 215 216 alias c_ctype.isalpha isAlpha; 217 alias c_ctype.isalnum isAlNum; 218 alias c_ctype.isdigit isDigit; 219 alias c_ctype.isxdigit isHexDigit; 220 221 alias c_ctype.islower isLower; 222 alias c_ctype.isupper isUpper; 223 224 alias c_ctype.tolower toLower; 225 alias c_ctype.toupper toUpper; 226 227 alias c_string.strlen lengthOf; 228 229 alias c_string.memchr pLocateBinChar; 230 231 alias c_string.strstr pLocatePattern; 232 alias c_string.memmove pMemMove; 233 alias c_string.strcspn pLocateFirstInSet; 234 235 alias c_string.strtok pSplit; 236 237 } 238 239 static: 240 241 enum Char TERM = '\0'; 242 243 /** 244 * Locates the first occurence of value within the first length characters 245 * of str. If greater, length is truncated to the length of str. 246 * 247 * Params: 248 * str = string to search for value 249 * value = element value to find 250 * start = start index 251 * length = number of elements to examine 252 * 253 * Returns: 254 * the index of the first element with value "value" or the index of 255 * the last examined element + 1 256 */ 257 size_t locateChar ( in Char[] str, Char value, size_t start, size_t length ) 258 { 259 verify (start <= str.length, "locateChar: start index out of range"); 260 261 length = min(length, str.length); 262 263 Const!(void)* item = pLocateBinChar(str.ptr + start, value, length - start); 264 Const!(void)* pstr = str.ptr; 265 266 return item? (item - pstr) : length; 267 } 268 269 /// 270 unittest 271 { 272 test!("==")(StringSearch!().locateChar("Hello", 'l', 5, size_t.max), 5); 273 test!("==")(StringSearch!().locateChar("Hello", 'l', 2, size_t.max), 2); 274 test!("==")(StringSearch!().locateChar("Hello", 'l', 3, size_t.max), 3); 275 test!("==")(StringSearch!().locateChar("Hello", 'o', 5, size_t.max), 5); 276 test!("==")(StringSearch!().locateChar("Hello", 'o', 4, size_t.max), 4); 277 test!("==")(StringSearch!().locateChar("Hello", 'o', 0, size_t.max), 4); 278 // Test searches in a limited region of the input string 279 test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 0), 0); 280 test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 2), 2); 281 test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 3), 2); 282 } 283 284 285 /** 286 * Locates the first occurence of value within str. 287 * 288 * Params: 289 * str = string to search for "value" 290 * value = element value to find 291 * start = start index 292 * 293 * Returns: 294 * the index of the first element with value "value" or the index of 295 * the last examined element + 1 296 */ 297 size_t locateChar ( in Char[] str, Char value, size_t start = 0 ) 298 { 299 return locateChar(str, value, start, size_t.max); 300 } 301 302 /** 303 * Tells whether the first length characters of str, starting fromo start, 304 * contain value. If greater, length is truncated to the length of str. 305 * 306 * Params: 307 * str = string to search for value 308 * value = value to search for 309 * start = start index 310 * length = number of elements to examine 311 * 312 * Returns: 313 * true if str contains value or false otherwise 314 */ 315 bool containsChar ( in Char[] str, Char value, size_t start, size_t length ) 316 { 317 verify (start <= str.length, "containsChar: start index out of range"); 318 319 length = min(length, str.length); 320 321 return !!pLocateBinChar(str.ptr + start, value, length - start); 322 } 323 324 /// 325 unittest 326 { 327 test(!StringSearch!().containsChar("Hello", 'l', 5, size_t.max)); 328 test(StringSearch!().containsChar("Hello", 'l', 2, size_t.max)); 329 test(StringSearch!().containsChar("Hello", 'l', 3, size_t.max)); 330 test(!StringSearch!().containsChar("Hello", 'o', 5, size_t.max)); 331 test(StringSearch!().containsChar("Hello", 'o', 4, size_t.max)); 332 test(StringSearch!().containsChar("Hello", 'o', 0, size_t.max)); 333 334 test(!StringSearch!().containsChar("Hello", 'l', 0, 0)); 335 test(!StringSearch!().containsChar("Hello", 'l', 0, 2)); 336 test(StringSearch!().containsChar("Hello", 'l', 0, 3)); 337 } 338 339 bool containsChar ( in Char[] str, Char value, size_t start = 0 ) 340 { 341 return containsChar(str, value, start, size_t.max); 342 } 343 344 345 /** 346 * Scans "str" for "pattern" and returns the index of the first occurrence 347 * if found. 348 * 349 * Params: 350 * str = string to scan 351 * pattern = search pattern 352 * start = start location to start searching 353 * 354 * Returns: 355 * If found, the index of the first occurrence, or the length of "str" 356 * otherwise. 357 */ 358 size_t locatePattern ( in Char[] str, in Char[] pattern, size_t start = 0 ) 359 { 360 if (str.length) 361 { 362 start = min(start, str.length - 1); 363 } 364 365 auto str_search = str[start .. $] ~ TERM; 366 367 Const!(Char)* item = pLocatePattern(str_search.ptr, (pattern ~ TERM).ptr); 368 369 return item? ((item - str_search.ptr) + start) : str.length; 370 } 371 372 /// 373 unittest 374 { 375 test!("==")(StringSearch!().locatePattern("Hello World!", "World", 0), 6); 376 test!("==")(StringSearch!().locatePattern("[Hello]", "[", 1), "[Hello]".length); 377 test!("==")(StringSearch!().locatePattern("[Hello]", "[", 256), "[Hello]".length); 378 // Crazy/inconsistent behavior: It should return 1 379 test!("==")(StringSearch!().locatePattern("[", "[", 1), 0); 380 test!("==")(StringSearch!().locatePattern("[", "[", 256), 0); 381 } 382 383 384 /** 385 * Scans "str" for "pattern" and returns the index of the first occurrence 386 * if found. 387 * 388 * Params: 389 * str = string to scan 390 * pattern = search pattern 391 * start = index to start searching from 392 * 393 * Returns: 394 * If found, the index of the first occurrence, or the length of "str" 395 * otherwise. 396 */ 397 size_t locatePatternT ( istring pattern ) ( in Char[] str, size_t start = 0 ) 398 { 399 verify (start <= str.length, 400 "locatePatternT: start index out of range"); 401 if (str.length) 402 { 403 start = min(start, str.length - 1); 404 } 405 406 auto str_search = str[start .. $] ~ TERM; 407 408 auto item = pLocatePattern(str_search.ptr, pattern.ptr); 409 410 return item? ((item - str_search.ptr) + start) : str.length; 411 } 412 413 /// 414 unittest 415 { 416 test!("==")(StringSearch!().locatePatternT!("World")("Hello World!", 0), 6); 417 test!("==")(StringSearch!().locatePatternT!("[")("[Hello]", 1), "[Hello]".length); 418 // Crazy/inconsistent behavior: It should return 1 419 test!("==")(StringSearch!().locatePatternT!("[")("[", 1), 0); 420 // Fail unittests, because reasons 421 //test!("==")(StringSearch!().locatePattern("[", "[", 256), 0); 422 } 423 424 425 /************************************************************************** 426 427 Tells whether str contains pattern 428 429 Params: 430 str = string to scan 431 pattern = search pattern 432 start = search start index 433 434 Returns: 435 true if str contains pattern or false otherwise 436 437 **************************************************************************/ 438 439 bool containsPattern ( in Char[] str, in Char[] pattern, size_t start = 0 ) 440 { 441 verify (start <= str.length, 442 "containsPattern: start index out of range"); 443 444 return !!pLocatePattern((str ~ TERM).ptr + start, (pattern ~ TERM).ptr); 445 } 446 447 /// 448 unittest 449 { 450 test(!StringSearch!().containsPattern("Hello", "ll", 5)); 451 test(StringSearch!().containsPattern("Hello", "ll", 2)); 452 test(StringSearch!().containsPattern("Hello", "lo", 3)); 453 test(!StringSearch!().containsPattern("Hello", "lo", 4)); 454 test(StringSearch!().containsPattern("Hello", "lo", 3)); 455 test(StringSearch!().containsPattern("Hello", "lo", 0)); 456 } 457 458 459 /************************************************************************** 460 461 Locates the first occurrence of any of the characters of charset in str. 462 463 Params: 464 str = string to scan 465 charset = set of characters to look for 466 start = search start index 467 468 Returns: 469 index of first occurrence of any of the characters of charset in 470 str 471 472 **************************************************************************/ 473 474 size_t locateCharSet ( in Char[] str, in Char[] charset, size_t start = 0 ) 475 { 476 verify(start <= str.length, 477 "locateCharSet: start index out of range"); 478 479 size_t item = pLocateFirstInSet((str ~ TERM).ptr + start, (charset ~ TERM).ptr); 480 481 return item + start; 482 } 483 484 485 /************************************************************************** 486 487 Locates the first occurrence of any of the characters of charset in str. 488 Passing charset as template parameter makes this method somewhat more 489 efficient when used very frequently. 490 491 Params: 492 str = string to scan 493 start = search start index 494 495 Returns: 496 index of first occurrence of any of the characters of charset in 497 str 498 499 **************************************************************************/ 500 501 size_t locateCharSetT ( istring charset ) ( in Char[] str, size_t start = 0 ) 502 { 503 verify (start <= str.length, 504 "locateCharSetT: start index out of range"); 505 return pLocateFirstInSet((str ~ TERM).ptr + start, charset.ptr); 506 } 507 508 509 /************************************************************************** 510 511 Shifts "length" characters inside "string" from "src_pos" to "dst_pos". 512 This effectively does the same thing as 513 514 --- 515 string[src_pos .. src_pos + length] = string[dst_pos .. dst_pos + length]; 516 --- 517 518 but allows overlapping ranges. 519 520 Params: 521 str = string to process 522 dst_pos = destination start position (index) 523 src_pos = source start position (index) 524 length = number of array elements to shift 525 526 **************************************************************************/ 527 528 Char[] shiftString ( ref Char[] str, size_t dst_pos, size_t src_pos, size_t length ) 529 { 530 enum PREFIX = "shiftString(): "; 531 532 verify (src_pos <= str.length, PREFIX ~ "source start out of range"); 533 verify (dst_pos <= str.length, PREFIX ~ "destination start out of range"); 534 verify (src_pos + length <= str.length, PREFIX ~ "source end out of range"); 535 verify (dst_pos + length <= str.length, PREFIX ~ "destination end out of range"); 536 537 pMemMove(str.ptr + dst_pos, str.ptr + src_pos, length); 538 539 return str; 540 } 541 542 543 544 /************************************************************************** 545 546 Returns the length of "str" without null terminator. 547 548 Params: 549 str = input string (may or may not be null terminated) 550 551 Returns: 552 the length of the string of this segment 553 554 **************************************************************************/ 555 size_t lengthOf ( in Char[] str ) 556 { 557 return str.length? (str[$ - 1]? str.length : lengthOf(str.ptr)) : 0; 558 } 559 560 561 562 /************************************************************************** 563 564 Asserts that "str" is null-terminated. 565 566 Params: 567 str = input string 568 569 ***************************************************************************/ 570 void assertTerm ( istring func ) ( in Char[] str ) 571 { 572 verify (hasTerm(str), msgFunc!(func) ~ ": unterminated string"); 573 } 574 575 576 577 /************************************************************************** 578 579 Adds a '\0' terminator to "str" if not present. 580 581 Params: 582 str = string to '\0'-terminate 583 584 Returns: 585 true if the string did not have a '\0'-terminator and therefore was 586 changed, or false otherwise. 587 588 **************************************************************************/ 589 590 bool appendTerm ( ref Char[] str ) 591 { 592 bool terminated = str.length? !str[$ - 1] : false; 593 594 if (!terminated) 595 { 596 str ~= TERM; 597 } 598 599 return !terminated; 600 } 601 602 603 /************************************************************************** 604 605 Strips the null terminator from str, if any. 606 607 Params: 608 str = input to '\0'-unterminate 609 610 Returns: 611 true if the string had a '\0'-terminator and therefore was changed, 612 or false otherwise. 613 614 **************************************************************************/ 615 bool stripTerm ( ref Char[] str ) 616 { 617 bool terminated = str.length? !str[$ - 1] : false; 618 619 if (terminated) 620 { 621 str = str[0 .. lengthOf(str)]; 622 } 623 624 return terminated; 625 } 626 627 628 629 /************************************************************************** 630 631 Tells whether "str" is null-terminated. 632 633 Params: 634 str = input string 635 636 Returns: 637 true if "str" is null-terminated or false otherwise 638 639 **************************************************************************/ 640 bool hasTerm ( in Char[] str ) 641 { 642 return str.length? !str[$ - 1] : false; 643 } 644 645 646 647 /************************************************************************** 648 649 Tells whether "str" and "pattern" are equal regardless of null 650 terminators. 651 652 Params: 653 str = str to compare to "pattern" 654 pattern = comparison pattern for "str" 655 656 Returns: 657 true on match or false otherwise 658 659 **************************************************************************/ 660 bool matches ( Char[] str, Char[] pattern ) 661 { 662 return (stripTerm(str) == stripTerm(pattern)); 663 } 664 665 666 667 /*************************************************************************** 668 669 Trims white space from "str". 670 671 Params: 672 str = input string 673 terminate = set to true to null-terminate the resulting string if 674 the input string is null-terminated 675 676 Returns: 677 the resulting string 678 679 ***************************************************************************/ 680 TChar[] trim (TChar) ( TChar[] str, bool terminate = false ) 681 { 682 static assert (is(Unqual!(TChar) == Char)); 683 684 terminate &= hasTerm(str); 685 686 foreach_reverse (i, c; str[0 .. lengthOf(str)]) 687 { 688 if (!isSpace(c)) 689 { 690 str = str[0 .. i + terminate + 1]; 691 break; 692 } 693 } 694 695 foreach (i, c; str) 696 { 697 if (!isSpace(c)) 698 { 699 return str[i .. $]; 700 } 701 } 702 703 return null; 704 } 705 706 /// 707 unittest 708 { 709 test!("==")(StringSearch!().trim("trim"), "trim"[]); 710 711 test!("==")(StringSearch!().trim(" trim"), "trim"[]); 712 test!("==")(StringSearch!().trim("\ttrim"), "trim"[]); 713 test!("==")(StringSearch!().trim(" \t trim"), "trim"[]); 714 715 test!("==")(StringSearch!().trim("trim "), "trim"[]); 716 test!("==")(StringSearch!().trim("trim\t"), "trim"[]); 717 test!("==")(StringSearch!().trim("trim \t "), "trim"[]); 718 719 test!("==")(StringSearch!().trim(" trim "), "trim"[]); 720 test!("==")(StringSearch!().trim("\ttrim\t"), "trim"[]); 721 test!("==")(StringSearch!().trim("\t \ttrim \t "), "trim"[]); 722 } 723 724 725 /************************************************************************** 726 727 Converts each character of str in-place using convert. convert must be 728 a function that takes a character in the first argument and returns the 729 converted character. 730 731 Params: 732 str = string to convert 733 734 Returns: 735 converted string 736 737 **************************************************************************/ 738 739 Char[] charConv ( alias convert ) ( ref Char[] str ) 740 { 741 foreach (ref c; str) 742 { 743 c = cast(Char) convert(c); 744 } 745 746 return str; 747 } 748 749 /************************************************************************** 750 751 Checks if all symbols of `str` are not modified by predicate 752 `convert`, creates a duplicate otherwise. 753 754 Params 755 str = string to check/convert 756 757 Returns: 758 `str` untouched if all symbols are already converted, duplicated 759 and converted string otherwise 760 761 **************************************************************************/ 762 763 Const!(Char)[] charConvDup ( alias convert ) ( Const!(Char)[] str ) 764 { 765 foreach (Char c; str) 766 { 767 if (c != cast(Char) convert(c)) 768 { 769 auto newstr = str.dup; 770 foreach (ref Char c2; newstr) 771 c2 = cast(Char) convert(c2); 772 return newstr; 773 } 774 } 775 776 return str; 777 } 778 779 /************************************************************************** 780 781 Converts "str" in-place to lower case. 782 783 Params: 784 str = string to convert 785 786 Returns: 787 converted string 788 789 **************************************************************************/ 790 791 alias charConv!(toLower) strToLower; 792 793 /************************************************************************** 794 795 Ensures "str" is all lower case, allocates new copy and converts it 796 otherwise. 797 798 Params: 799 str = string to check 800 801 Returns: 802 converted/verified string 803 804 **************************************************************************/ 805 806 alias charConvDup!(toLower) strEnsureLower; 807 808 /************************************************************************** 809 810 Converts "str" in-place to upper case. 811 812 Params: 813 str = string to convert 814 815 Returns: 816 converted string 817 818 **************************************************************************/ 819 820 alias charConv!(toUpper) strToUpper; 821 822 823 824 /************************************************************************** 825 826 Tells if all letter characters in "str" match the condition checked by 827 "check". "check" must be something that takes a character in the first 828 argument and returns an integer type where a value different from 0 means 829 that the condition is satisfied. 830 831 Params: 832 str = string to convert 833 834 Returns: 835 true if all letter characters match the the condition checked by 836 "check" or false otherwise 837 838 **************************************************************************/ 839 bool caseCheck ( alias check ) ( in Char[] str ) 840 { 841 bool result = true; 842 843 foreach (c; str) 844 { 845 result &= (!isAlpha(c) || !!check(c)); 846 } 847 848 return result; 849 } 850 851 852 /************************************************************************** 853 854 Checks if all letter characters in "str" are lower case. 855 856 Params: 857 str = string to check 858 859 Returns: 860 true if all letter characters in "str" are lower case or false 861 otherwise 862 863 **************************************************************************/ 864 865 alias caseCheck!(isLower) strIsLower; 866 867 868 869 /************************************************************************** 870 871 Checks if all letter characters in "str" are upper case. 872 873 Params: 874 str = string to check 875 876 Returns: 877 true if all letter characters in "str" are upper case or false 878 otherwise 879 880 **************************************************************************/ 881 alias caseCheck!(isUpper) strIsUpper; 882 883 /************************************************************************** 884 885 Splits str into at most n slices on each occurrence of delim. collapse 886 indicates whether to collapse consecutive occurrences to a single one 887 to prevent producing empty slices. 888 889 Params: 890 slices = resulting slices buffer 891 str = input string 892 delim = delimiter character 893 n = maximum number of slices; set to 0 to indicate no limit 894 collapse = set to true to collapse consecutive occurrences to 895 prevent producing empty "slices" 896 897 Returns: 898 the resulting slices 899 900 **************************************************************************/ 901 902 TElem[] split (TElem) ( ref TElem[] slices, TElem str, Char delim, uint n = 0, 903 bool collapse = false ) 904 { 905 return split_!(Char, TElem)(slices, str, delim, &locateChar, n, collapse); 906 } 907 908 /// 909 unittest 910 { 911 cstring[] slices; 912 913 test!("==")(StringSearch!().split(slices, "a;b;c", ';'), 914 ["a", "b", "c"][]); 915 test!("==")(StringSearch!().split(slices, "a;b;c", '.'), 916 ["a;b;c"][]); 917 test!("==")(StringSearch!().split(slices, "abc;", ';'), 918 ["abc", ""][]); 919 test!("==")(StringSearch!().split(slices, ";abc;", ';'), 920 ["", "abc", ""][]); 921 test!("==")(StringSearch!().split(slices, "a;;bc", ';'), 922 ["a", "", "bc"][]); 923 924 925 test!("==")(StringSearch!().split(slices, "a;b;c", ';', 2), 926 ["a", "b"][]); 927 928 test!("==")(StringSearch!().split(slices, "abc;", ';', 0, true), 929 ["abc"][]); 930 test!("==")(StringSearch!().split(slices, ";abc;", ';', 0, true), 931 ["abc"][]); 932 test!("==")(StringSearch!().split(slices, "a;;bc", ';', 0, true), 933 ["a", "bc"][]); 934 935 mstring[] mslices; 936 test!("==")(StringSearch!().split(slices, "a;b;c".dup, ';'), 937 ["a", "b", "c"][]); 938 } 939 940 941 /************************************************************************** 942 943 Splits str on each occurrence of delim. collapse indicates whether to 944 collapse consecutive occurrences to a single one to prevent producing 945 empty slices. 946 947 Params: 948 slices = array to put the resulting slices 949 str = input string 950 delim = delimiter character 951 n = maximum number of slices; set to 0 to indicate no limit 952 953 Returns: 954 the resulting slices 955 956 **************************************************************************/ 957 958 TElem[] splitCollapse (TElem) ( ref TElem[] slices, TElem str, Char delim, 959 uint n = 0 ) 960 { 961 return split(slices, str, delim, n, true); 962 } 963 964 965 /************************************************************************** 966 967 Splits str into at most n slices on each occurrence of any character in 968 delims. collapse indicates whether to collapse consecutive occurrences 969 to a single one to prevent producing empty slices. 970 971 Params: 972 slices = destination array of slices 973 str = input string 974 delims = delimiter character 975 n = maximum number of slices; set to 0 to indicate no limit 976 collapse = set to true to collapse consecutive occurrences to 977 prevent producing empty "slices" 978 979 **************************************************************************/ 980 981 TElem[] split (TElem) ( ref TElem[] slices, TElem str, in Char[] delims, 982 uint n = 0, bool collapse = false ) 983 { 984 return split_!(Char[], TElem)(slices, str, delims, &locateCharSet, n, 985 collapse); 986 } 987 988 989 /************************************************************************** 990 991 Splits str on each occurrence of any character in delims. collapse 992 indicates whether to collapse consecutive occurrences to a single one to 993 prevent producing empty slices. 994 995 Params: 996 str = input string 997 delim = delimiter character 998 slices = destination array of slices 999 n = maximum number of slices; set to 0 to indicate no limit 1000 1001 Returns: 1002 the resulting slices 1003 1004 **************************************************************************/ 1005 1006 TElem[] splitCollapse (TElem) ( ref TElem[] slices, in TElem str, 1007 in Char[] delim, uint n = 0 ) 1008 { 1009 return split(slices, str, delim, n, true); 1010 } 1011 1012 1013 /************************************************************************** 1014 1015 Locate delimiter function definition template. LocateDelimDg is the type 1016 of the function callback used by split_(). 1017 1018 LocateDelimDg params: 1019 str = string to search for delim 1020 delim = search pattern of arbitrary type: single character, set of 1021 characters, search string, ... 1022 start = search start start index 1023 1024 LocateDelimDg shall return: 1025 index of first occurrence of delim in str, starting from start 1026 1027 **************************************************************************/ 1028 1029 template LocateDelimDg ( T ) 1030 { 1031 alias size_t function ( in Char[] str, T delim, size_t start ) LocateDelimDg; 1032 } 1033 1034 /************************************************************************** 1035 1036 Splits str into at most n slices on each occurrence reported by 1037 locateDelim. collapse indicates whether to collapse consecutive 1038 occurrences to a single one to prevent producing empty slices. 1039 1040 Params: 1041 slices = destination array of slices 1042 str = input string 1043 delim = delimiter(s), depending on locateDelim 1044 locateDelim = callback function which shall locate the 1045 occurrence of delim in str; see LocateDelimDg 1046 1047 collapse = set to true to collapse consecutive occurrences to 1048 prevent producing empty "slices" 1049 1050 **************************************************************************/ 1051 1052 private TElem[] split_ ( T , TElem ) ( ref TElem[] slices, TElem str, 1053 T delim, LocateDelimDg!(T) locateDelim, 1054 uint n, bool collapse ) 1055 { 1056 static if (is(Unqual!(TElem) E : E[])) 1057 { 1058 static assert (is (Unqual!(E) == Char), 1059 "TElem should be [const] Char[], not : " 1060 ~ TElem.stringof); 1061 } 1062 else 1063 { 1064 static assert (false, "TElem should be [const] Char[], not : " 1065 ~ TElem.stringof); 1066 } 1067 uint i = 0; 1068 1069 size_t start = collapse? skipLeadingDelims(str, delim) : 0; 1070 1071 size_t pos = locateDelim(str, delim, start); 1072 1073 slices.length = 0; 1074 enableStomping(slices); 1075 1076 while ((pos < str.length) && (!n || (i < n))) 1077 { 1078 if (!((pos == start) && collapse)) 1079 { 1080 slices ~= str[start .. pos]; 1081 1082 i++; 1083 } 1084 1085 start = pos + 1; 1086 1087 pos = locateDelim(str, delim, start); 1088 } 1089 1090 if ((!n || (i < n)) && (!((start == str.length) && collapse))) 1091 { 1092 slices ~= str[start .. $]; // append tail 1093 } 1094 1095 return slices; 1096 } 1097 1098 /************************************************************************** 1099 1100 Skips leading occurrences of delim in string. 1101 1102 Params: 1103 str = input string 1104 delim = delimiter character 1105 1106 Returns: 1107 index of character in str after skipping leading occurrences of 1108 delim (length of str if str consists of delim characters) 1109 1110 **************************************************************************/ 1111 1112 private size_t skipLeadingDelims ( T ) ( in Char[] str, T delim ) 1113 { 1114 foreach (i, c; str) 1115 { 1116 bool found; 1117 1118 static if (is (T U : U[])) 1119 { 1120 found = containsChar(delim, c); 1121 } 1122 else static if (is (T : Char)) 1123 { 1124 found = c == delim; 1125 } 1126 else static assert (false, "skipLeadingDelims: delim must be of type '" ~ 1127 Char.stringof ~ "' or '" ~ (Char[]).stringof ~ 1128 "', not '" ~ T.stringof ~ '\''); 1129 1130 1131 1132 if (!found) return i; 1133 } 1134 1135 return str.length; 1136 } 1137 }