1 /****************************************************************************** 2 3 C string and character tool functions 4 5 C string and character tool functions and null terminator utilities 6 7 Copyright: 8 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 9 All rights reserved. 10 11 License: 12 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 13 Alternatively, this file may be distributed under the terms of the Tango 14 3-Clause BSD License (see LICENSE_BSD.txt for details). 15 16 ******************************************************************************/ 17 18 module ocean.text.util.StringSearch; 19 20 21 import ocean.meta.types.Qualifiers; 22 import ocean.core.Verify; 23 24 import c_stddef = core.stdc.stddef: wchar_t; 25 import c_wctype = core.stdc.wctype; 26 import c_ctype = core.stdc.ctype; 27 import c_wchar = core.stdc.wchar_; 28 import c_string = core.stdc..string; 29 30 import ocean.math.Math: min; 31 32 version (unittest) 33 { 34 import ocean.core.Test; 35 } 36 37 38 /++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 39 40 /************************************************************************** 41 42 Descriptions for public alias methods 43 44 **************************************************************************/ 45 46 /** 47 * Returns the length of "str" without null terminator. 48 * 49 * Params: 50 * str = string (must be null terminated) 51 * 52 * Returns: 53 * length of "str" without null terminator 54 */ 55 size_t lengthOf ( Char* str ); 56 57 58 /** 59 * Tells whether "chr" is 60 * isCntrl -- a control character or 61 * isSpace -- whitespace or 62 * isGraph -- a character associated with a graph or 63 * isPrint -- printable or 64 * isAlpha -- a letter or 65 * isLower -- a lower case letter or 66 * isUpper -- an upper case letter or 67 * isAlNum -- a letter or a decimal digit or 68 * isDigit -- a decimalt digit or 69 * isHexDigit -- a hexadecimal digit. 70 * 71 * Params: 72 * chr = character to identify 73 * Returns: 74 * true if the character is of the specified class or false otherwise 75 */ 76 bool isCntrl ( Char chr ); 77 bool isSpace ( Char chr ); 78 79 bool isGraph ( Char chr ); 80 bool isPrint ( Char chr ); 81 bool isPunct ( Char chr ); 82 83 bool isAlpha ( Char chr ); 84 bool isAlNum ( Char chr ); 85 bool isDigit ( Char chr ); 86 bool isHexDigit ( Char chr ); 87 88 89 bool isLower ( Char chr ); 90 bool isUpper ( Char chr ); 91 92 93 /** 94 * Converts "chr" 95 * toLower -- to lower case or 96 * toUpper -- to upper case. 97 * 98 * Params: 99 * chr = character to convert 100 * 101 * Returns: 102 * converted character 103 */ 104 Char toLower ( Char chr ); 105 Char toUpper ( Char chr ); 106 107 108 /************************************************************************** 109 110 Explanations for private alias methods 111 112 **************************************************************************/ 113 114 /** 115 * Returns the index of the first occurrence of one of the characters in 116 * "charset" in "str". 117 * 118 * Params: 119 * str = string to scan for characters in "charset" 120 * charset = search character set 121 * Returns: 122 */ 123 size_t pLocateFirstInSet ( Char* str, Char* charset ); 124 125 126 /** 127 * Returns a pointer to the first occurrence of "pattern" in "str". 128 * 129 * Params: 130 * str = string to scan for "pattern" 131 * pattern = search pattern 132 * Returns: 133 */ 134 Char* pLocatePattern ( Char* str, Char* pattern ); 135 136 137 /** 138 * Moves src[0 .. n] to dst[0 .. n]. "src" and "dst" may overlap. 139 * 140 * Params: 141 * dst = pointer to destination 142 * src = pointer to source 143 * n = number of elements to move 144 * Returns: 145 */ 146 Char* pMemMove ( Char* dst, Char* src, size_t n ); 147 148 149 /** 150 * Returns a pointer to the first occurrence of "chr" within the first "n" 151 * elements of "str". 152 * 153 * Params: 154 * str = string to scan for "chr" 155 * chr = search character 156 * n = number of characters to scan for "chr" 157 * Returns: 158 */ 159 Char* pLocateBinChar ( Char* str, Char chr, size_t n ); 160 161 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++/ 163 164 /****************************************************************************** 165 166 StringSearch structure 167 168 ******************************************************************************/ 169 170 struct StringSearch ( bool wide_char = false ) 171 { 172 alias c_stddef.wchar_t WcharT; 173 174 static if (wide_char) 175 { 176 alias WcharT Char; 177 178 alias c_wctype.iswcntrl isCntrl; 179 alias c_wctype.iswspace isSpace; 180 181 alias c_wctype.iswgraph isGraph; 182 alias c_wctype.iswprint isPrint; 183 alias c_wctype.iswpunct isPunct; 184 185 alias c_wctype.iswalpha isAlpha; 186 alias c_wctype.iswalnum isAlNum; 187 alias c_wctype.iswdigit isDigit; 188 alias c_wctype.iswxdigit isHexDigit; 189 190 alias c_wctype.iswlower isLower; 191 alias c_wctype.iswupper isUpper; 192 193 alias c_wctype.towlower toLower; 194 alias c_wctype.towupper toUpper; 195 196 alias c_wchar.wcslen lengthOf; 197 198 alias c_wchar.wmemchr pLocateBinChar; 199 200 alias c_wchar.wcsstr pLocatePattern; 201 alias c_wchar.wmemmove pMemMove; 202 alias c_wchar.wcscspn pLocateFirstInSet; 203 204 alias c_wchar.wcstok pSplit; 205 } 206 else 207 { 208 alias char Char; 209 210 alias c_ctype.iscntrl isCntrl; 211 alias c_ctype.isspace isSpace; 212 213 alias c_ctype.isgraph isGraph; 214 alias c_ctype.isprint isPrint; 215 alias c_ctype.ispunct isPunct; 216 217 alias c_ctype.isalpha isAlpha; 218 alias c_ctype.isalnum isAlNum; 219 alias c_ctype.isdigit isDigit; 220 alias c_ctype.isxdigit isHexDigit; 221 222 alias c_ctype.islower isLower; 223 alias c_ctype.isupper isUpper; 224 225 alias c_ctype.tolower toLower; 226 alias c_ctype.toupper toUpper; 227 228 alias c_string.strlen lengthOf; 229 230 alias c_string.memchr pLocateBinChar; 231 232 alias c_string.strstr pLocatePattern; 233 alias c_string.memmove pMemMove; 234 alias c_string.strcspn pLocateFirstInSet; 235 236 alias c_string.strtok pSplit; 237 238 } 239 240 static: 241 242 enum Char TERM = '\0'; 243 244 /** 245 * Locates the first occurence of value within the first length characters 246 * of str. If greater, length is truncated to the length of str. 247 * 248 * Params: 249 * str = string to search for value 250 * value = element value to find 251 * start = start index 252 * length = number of elements to examine 253 * 254 * Returns: 255 * the index of the first element with value "value" or the index of 256 * the last examined element + 1 257 */ 258 size_t locateChar ( in Char[] str, Char value, size_t start, size_t length ) 259 { 260 verify (start <= str.length, "locateChar: start index out of range"); 261 262 length = min(length, str.length); 263 264 const(void)* item = pLocateBinChar(str.ptr + start, value, length - start); 265 const(void)* pstr = str.ptr; 266 267 return item? (item - pstr) : length; 268 } 269 270 /// 271 unittest 272 { 273 test!("==")(StringSearch!().locateChar("Hello", 'l', 5, size_t.max), 5); 274 test!("==")(StringSearch!().locateChar("Hello", 'l', 2, size_t.max), 2); 275 test!("==")(StringSearch!().locateChar("Hello", 'l', 3, size_t.max), 3); 276 test!("==")(StringSearch!().locateChar("Hello", 'o', 5, size_t.max), 5); 277 test!("==")(StringSearch!().locateChar("Hello", 'o', 4, size_t.max), 4); 278 test!("==")(StringSearch!().locateChar("Hello", 'o', 0, size_t.max), 4); 279 // Test searches in a limited region of the input string 280 test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 0), 0); 281 test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 2), 2); 282 test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 3), 2); 283 } 284 285 286 /** 287 * Locates the first occurence of value within str. 288 * 289 * Params: 290 * str = string to search for "value" 291 * value = element value to find 292 * start = start index 293 * 294 * Returns: 295 * the index of the first element with value "value" or the index of 296 * the last examined element + 1 297 */ 298 size_t locateChar ( in Char[] str, Char value, size_t start = 0 ) 299 { 300 return locateChar(str, value, start, size_t.max); 301 } 302 303 /** 304 * Tells whether the first length characters of str, starting fromo start, 305 * contain value. If greater, length is truncated to the length of str. 306 * 307 * Params: 308 * str = string to search for value 309 * value = value to search for 310 * start = start index 311 * length = number of elements to examine 312 * 313 * Returns: 314 * true if str contains value or false otherwise 315 */ 316 bool containsChar ( in Char[] str, Char value, size_t start, size_t length ) 317 { 318 verify (start <= str.length, "containsChar: start index out of range"); 319 320 length = min(length, str.length); 321 322 return !!pLocateBinChar(str.ptr + start, value, length - start); 323 } 324 325 /// 326 unittest 327 { 328 test(!StringSearch!().containsChar("Hello", 'l', 5, size_t.max)); 329 test(StringSearch!().containsChar("Hello", 'l', 2, size_t.max)); 330 test(StringSearch!().containsChar("Hello", 'l', 3, size_t.max)); 331 test(!StringSearch!().containsChar("Hello", 'o', 5, size_t.max)); 332 test(StringSearch!().containsChar("Hello", 'o', 4, size_t.max)); 333 test(StringSearch!().containsChar("Hello", 'o', 0, size_t.max)); 334 335 test(!StringSearch!().containsChar("Hello", 'l', 0, 0)); 336 test(!StringSearch!().containsChar("Hello", 'l', 0, 2)); 337 test(StringSearch!().containsChar("Hello", 'l', 0, 3)); 338 } 339 340 bool containsChar ( in Char[] str, Char value, size_t start = 0 ) 341 { 342 return containsChar(str, value, start, size_t.max); 343 } 344 345 346 /** 347 * Scans "str" for "pattern" and returns the index of the first occurrence 348 * if found. 349 * 350 * Params: 351 * str = string to scan 352 * pattern = search pattern 353 * start = start location to start searching 354 * 355 * Returns: 356 * If found, the index of the first occurrence, or the length of "str" 357 * otherwise. 358 */ 359 size_t locatePattern ( in Char[] str, in Char[] pattern, size_t start = 0 ) 360 { 361 if (str.length) 362 { 363 start = min(start, str.length - 1); 364 } 365 366 auto str_search = str[start .. $] ~ TERM; 367 368 const(Char)* item = pLocatePattern(str_search.ptr, (pattern ~ TERM).ptr); 369 370 return item? ((item - str_search.ptr) + start) : str.length; 371 } 372 373 /// 374 unittest 375 { 376 test!("==")(StringSearch!().locatePattern("Hello World!", "World", 0), 6); 377 test!("==")(StringSearch!().locatePattern("[Hello]", "[", 1), "[Hello]".length); 378 test!("==")(StringSearch!().locatePattern("[Hello]", "[", 256), "[Hello]".length); 379 // Crazy/inconsistent behavior: It should return 1 380 test!("==")(StringSearch!().locatePattern("[", "[", 1), 0); 381 test!("==")(StringSearch!().locatePattern("[", "[", 256), 0); 382 } 383 384 385 /** 386 * Scans "str" for "pattern" and returns the index of the first occurrence 387 * if found. 388 * 389 * Params: 390 * str = string to scan 391 * pattern = search pattern 392 * start = index to start searching from 393 * 394 * Returns: 395 * If found, the index of the first occurrence, or the length of "str" 396 * otherwise. 397 */ 398 size_t locatePatternT ( istring pattern ) ( in Char[] str, size_t start = 0 ) 399 { 400 verify (start <= str.length, 401 "locatePatternT: start index out of range"); 402 if (str.length) 403 { 404 start = min(start, str.length - 1); 405 } 406 407 auto str_search = str[start .. $] ~ TERM; 408 409 auto item = pLocatePattern(str_search.ptr, pattern.ptr); 410 411 return item? ((item - str_search.ptr) + start) : str.length; 412 } 413 414 /// 415 unittest 416 { 417 test!("==")(StringSearch!().locatePatternT!("World")("Hello World!", 0), 6); 418 test!("==")(StringSearch!().locatePatternT!("[")("[Hello]", 1), "[Hello]".length); 419 // Crazy/inconsistent behavior: It should return 1 420 test!("==")(StringSearch!().locatePatternT!("[")("[", 1), 0); 421 // Fail unittests, because reasons 422 //test!("==")(StringSearch!().locatePattern("[", "[", 256), 0); 423 } 424 425 426 /************************************************************************** 427 428 Tells whether str contains pattern 429 430 Params: 431 str = string to scan 432 pattern = search pattern 433 start = search start index 434 435 Returns: 436 true if str contains pattern or false otherwise 437 438 **************************************************************************/ 439 440 bool containsPattern ( in Char[] str, in Char[] pattern, size_t start = 0 ) 441 { 442 verify (start <= str.length, 443 "containsPattern: start index out of range"); 444 445 return !!pLocatePattern((str ~ TERM).ptr + start, (pattern ~ TERM).ptr); 446 } 447 448 /// 449 unittest 450 { 451 test(!StringSearch!().containsPattern("Hello", "ll", 5)); 452 test(StringSearch!().containsPattern("Hello", "ll", 2)); 453 test(StringSearch!().containsPattern("Hello", "lo", 3)); 454 test(!StringSearch!().containsPattern("Hello", "lo", 4)); 455 test(StringSearch!().containsPattern("Hello", "lo", 3)); 456 test(StringSearch!().containsPattern("Hello", "lo", 0)); 457 } 458 459 460 /************************************************************************** 461 462 Locates the first occurrence of any of the characters of charset in str. 463 464 Params: 465 str = string to scan 466 charset = set of characters to look for 467 start = search start index 468 469 Returns: 470 index of first occurrence of any of the characters of charset in 471 str 472 473 **************************************************************************/ 474 475 size_t locateCharSet ( in Char[] str, in Char[] charset, size_t start = 0 ) 476 { 477 verify(start <= str.length, 478 "locateCharSet: start index out of range"); 479 480 size_t item = pLocateFirstInSet((str ~ TERM).ptr + start, (charset ~ TERM).ptr); 481 482 return item + start; 483 } 484 485 486 /************************************************************************** 487 488 Locates the first occurrence of any of the characters of charset in str. 489 Passing charset as template parameter makes this method somewhat more 490 efficient when used very frequently. 491 492 Params: 493 str = string to scan 494 start = search start index 495 496 Returns: 497 index of first occurrence of any of the characters of charset in 498 str 499 500 **************************************************************************/ 501 502 size_t locateCharSetT ( istring charset ) ( in Char[] str, size_t start = 0 ) 503 { 504 verify (start <= str.length, 505 "locateCharSetT: start index out of range"); 506 return pLocateFirstInSet((str ~ TERM).ptr + start, charset.ptr); 507 } 508 509 510 /************************************************************************** 511 512 Shifts "length" characters inside "string" from "src_pos" to "dst_pos". 513 This effectively does the same thing as 514 515 --- 516 string[src_pos .. src_pos + length] = string[dst_pos .. dst_pos + length]; 517 --- 518 519 but allows overlapping ranges. 520 521 Params: 522 str = string to process 523 dst_pos = destination start position (index) 524 src_pos = source start position (index) 525 length = number of array elements to shift 526 527 **************************************************************************/ 528 529 Char[] shiftString ( ref Char[] str, size_t dst_pos, size_t src_pos, size_t length ) 530 { 531 enum PREFIX = "shiftString(): "; 532 533 verify (src_pos <= str.length, PREFIX ~ "source start out of range"); 534 verify (dst_pos <= str.length, PREFIX ~ "destination start out of range"); 535 verify (src_pos + length <= str.length, PREFIX ~ "source end out of range"); 536 verify (dst_pos + length <= str.length, PREFIX ~ "destination end out of range"); 537 538 pMemMove(str.ptr + dst_pos, str.ptr + src_pos, length); 539 540 return str; 541 } 542 543 544 545 /************************************************************************** 546 547 Returns the length of "str" without null terminator. 548 549 Params: 550 str = input string (may or may not be null terminated) 551 552 Returns: 553 the length of the string of this segment 554 555 **************************************************************************/ 556 size_t lengthOf ( in Char[] str ) 557 { 558 return str.length? (str[$ - 1]? str.length : lengthOf(str.ptr)) : 0; 559 } 560 561 562 563 /************************************************************************** 564 565 Asserts that "str" is null-terminated. 566 567 Params: 568 str = input string 569 570 ***************************************************************************/ 571 void assertTerm ( istring func ) ( in Char[] str ) 572 { 573 verify (hasTerm(str), msgFunc!(func) ~ ": unterminated string"); 574 } 575 576 577 578 /************************************************************************** 579 580 Adds a '\0' terminator to "str" if not present. 581 582 Params: 583 str = string to '\0'-terminate 584 585 Returns: 586 true if the string did not have a '\0'-terminator and therefore was 587 changed, or false otherwise. 588 589 **************************************************************************/ 590 591 bool appendTerm ( ref Char[] str ) 592 { 593 bool terminated = str.length? !str[$ - 1] : false; 594 595 if (!terminated) 596 { 597 str ~= TERM; 598 } 599 600 return !terminated; 601 } 602 603 604 /************************************************************************** 605 606 Strips the null terminator from str, if any. 607 608 Params: 609 str = input to '\0'-unterminate 610 611 Returns: 612 true if the string had a '\0'-terminator and therefore was changed, 613 or false otherwise. 614 615 **************************************************************************/ 616 bool stripTerm ( ref Char[] str ) 617 { 618 bool terminated = str.length? !str[$ - 1] : false; 619 620 if (terminated) 621 { 622 str = str[0 .. lengthOf(str)]; 623 } 624 625 return terminated; 626 } 627 628 629 630 /************************************************************************** 631 632 Tells whether "str" is null-terminated. 633 634 Params: 635 str = input string 636 637 Returns: 638 true if "str" is null-terminated or false otherwise 639 640 **************************************************************************/ 641 bool hasTerm ( in Char[] str ) 642 { 643 return str.length? !str[$ - 1] : false; 644 } 645 646 647 648 /************************************************************************** 649 650 Tells whether "str" and "pattern" are equal regardless of null 651 terminators. 652 653 Params: 654 str = str to compare to "pattern" 655 pattern = comparison pattern for "str" 656 657 Returns: 658 true on match or false otherwise 659 660 **************************************************************************/ 661 bool matches ( Char[] str, Char[] pattern ) 662 { 663 return (stripTerm(str) == stripTerm(pattern)); 664 } 665 666 667 668 /*************************************************************************** 669 670 Trims white space from "str". 671 672 Params: 673 str = input string 674 terminate = set to true to null-terminate the resulting string if 675 the input string is null-terminated 676 677 Returns: 678 the resulting string 679 680 ***************************************************************************/ 681 TChar[] trim (TChar) ( TChar[] str, bool terminate = false ) 682 { 683 static assert (is(Unqual!(TChar) == Char)); 684 685 terminate &= hasTerm(str); 686 687 foreach_reverse (i, c; str[0 .. lengthOf(str)]) 688 { 689 if (!isSpace(c)) 690 { 691 str = str[0 .. i + terminate + 1]; 692 break; 693 } 694 } 695 696 foreach (i, c; str) 697 { 698 if (!isSpace(c)) 699 { 700 return str[i .. $]; 701 } 702 } 703 704 return null; 705 } 706 707 /// 708 unittest 709 { 710 test!("==")(StringSearch!().trim("trim"), "trim"[]); 711 712 test!("==")(StringSearch!().trim(" trim"), "trim"[]); 713 test!("==")(StringSearch!().trim("\ttrim"), "trim"[]); 714 test!("==")(StringSearch!().trim(" \t trim"), "trim"[]); 715 716 test!("==")(StringSearch!().trim("trim "), "trim"[]); 717 test!("==")(StringSearch!().trim("trim\t"), "trim"[]); 718 test!("==")(StringSearch!().trim("trim \t "), "trim"[]); 719 720 test!("==")(StringSearch!().trim(" trim "), "trim"[]); 721 test!("==")(StringSearch!().trim("\ttrim\t"), "trim"[]); 722 test!("==")(StringSearch!().trim("\t \ttrim \t "), "trim"[]); 723 } 724 725 726 /************************************************************************** 727 728 Converts each character of str in-place using convert. convert must be 729 a function that takes a character in the first argument and returns the 730 converted character. 731 732 Params: 733 str = string to convert 734 735 Returns: 736 converted string 737 738 **************************************************************************/ 739 740 Char[] charConv ( alias convert ) ( ref Char[] str ) 741 { 742 foreach (ref c; str) 743 { 744 c = cast(Char) convert(c); 745 } 746 747 return str; 748 } 749 750 /************************************************************************** 751 752 Checks if all symbols of `str` are not modified by predicate 753 `convert`, creates a duplicate otherwise. 754 755 Params 756 str = string to check/convert 757 758 Returns: 759 `str` untouched if all symbols are already converted, duplicated 760 and converted string otherwise 761 762 **************************************************************************/ 763 764 const(Char)[] charConvDup ( alias convert ) ( const(Char)[] str ) 765 { 766 foreach (Char c; str) 767 { 768 if (c != cast(Char) convert(c)) 769 { 770 auto newstr = str.dup; 771 foreach (ref Char c2; newstr) 772 c2 = cast(Char) convert(c2); 773 return newstr; 774 } 775 } 776 777 return str; 778 } 779 780 /************************************************************************** 781 782 Converts "str" in-place to lower case. 783 784 Params: 785 str = string to convert 786 787 Returns: 788 converted string 789 790 **************************************************************************/ 791 792 alias charConv!(toLower) strToLower; 793 794 /************************************************************************** 795 796 Ensures "str" is all lower case, allocates new copy and converts it 797 otherwise. 798 799 Params: 800 str = string to check 801 802 Returns: 803 converted/verified string 804 805 **************************************************************************/ 806 807 alias charConvDup!(toLower) strEnsureLower; 808 809 /************************************************************************** 810 811 Converts "str" in-place to upper case. 812 813 Params: 814 str = string to convert 815 816 Returns: 817 converted string 818 819 **************************************************************************/ 820 821 alias charConv!(toUpper) strToUpper; 822 823 824 825 /************************************************************************** 826 827 Tells if all letter characters in "str" match the condition checked by 828 "check". "check" must be something that takes a character in the first 829 argument and returns an integer type where a value different from 0 means 830 that the condition is satisfied. 831 832 Params: 833 str = string to convert 834 835 Returns: 836 true if all letter characters match the the condition checked by 837 "check" or false otherwise 838 839 **************************************************************************/ 840 bool caseCheck ( alias check ) ( in Char[] str ) 841 { 842 bool result = true; 843 844 foreach (c; str) 845 { 846 result &= (!isAlpha(c) || !!check(c)); 847 } 848 849 return result; 850 } 851 852 853 /************************************************************************** 854 855 Checks if all letter characters in "str" are lower case. 856 857 Params: 858 str = string to check 859 860 Returns: 861 true if all letter characters in "str" are lower case or false 862 otherwise 863 864 **************************************************************************/ 865 866 alias caseCheck!(isLower) strIsLower; 867 868 869 870 /************************************************************************** 871 872 Checks if all letter characters in "str" are upper case. 873 874 Params: 875 str = string to check 876 877 Returns: 878 true if all letter characters in "str" are upper case or false 879 otherwise 880 881 **************************************************************************/ 882 alias caseCheck!(isUpper) strIsUpper; 883 884 /************************************************************************** 885 886 Splits str into at most n slices on each occurrence of delim. collapse 887 indicates whether to collapse consecutive occurrences to a single one 888 to prevent producing empty slices. 889 890 Params: 891 slices = resulting slices buffer 892 str = input string 893 delim = delimiter character 894 n = maximum number of slices; set to 0 to indicate no limit 895 collapse = set to true to collapse consecutive occurrences to 896 prevent producing empty "slices" 897 898 Returns: 899 the resulting slices 900 901 **************************************************************************/ 902 903 TElem[] split (TElem) ( ref TElem[] slices, TElem str, Char delim, uint n = 0, 904 bool collapse = false ) 905 { 906 return split_!(Char, TElem)(slices, str, delim, &locateChar, n, collapse); 907 } 908 909 /// 910 unittest 911 { 912 cstring[] slices; 913 914 test!("==")(StringSearch!().split(slices, "a;b;c", ';'), 915 ["a", "b", "c"][]); 916 test!("==")(StringSearch!().split(slices, "a;b;c", '.'), 917 ["a;b;c"][]); 918 test!("==")(StringSearch!().split(slices, "abc;", ';'), 919 ["abc", ""][]); 920 test!("==")(StringSearch!().split(slices, ";abc;", ';'), 921 ["", "abc", ""][]); 922 test!("==")(StringSearch!().split(slices, "a;;bc", ';'), 923 ["a", "", "bc"][]); 924 925 926 test!("==")(StringSearch!().split(slices, "a;b;c", ';', 2), 927 ["a", "b"][]); 928 929 test!("==")(StringSearch!().split(slices, "abc;", ';', 0, true), 930 ["abc"][]); 931 test!("==")(StringSearch!().split(slices, ";abc;", ';', 0, true), 932 ["abc"][]); 933 test!("==")(StringSearch!().split(slices, "a;;bc", ';', 0, true), 934 ["a", "bc"][]); 935 936 mstring[] mslices; 937 test!("==")(StringSearch!().split(slices, "a;b;c".dup, ';'), 938 ["a", "b", "c"][]); 939 } 940 941 942 /************************************************************************** 943 944 Splits str on each occurrence of delim. collapse indicates whether to 945 collapse consecutive occurrences to a single one to prevent producing 946 empty slices. 947 948 Params: 949 slices = array to put the resulting slices 950 str = input string 951 delim = delimiter character 952 n = maximum number of slices; set to 0 to indicate no limit 953 954 Returns: 955 the resulting slices 956 957 **************************************************************************/ 958 959 TElem[] splitCollapse (TElem) ( ref TElem[] slices, TElem str, Char delim, 960 uint n = 0 ) 961 { 962 return split(slices, str, delim, n, true); 963 } 964 965 966 /************************************************************************** 967 968 Splits str into at most n slices on each occurrence of any character in 969 delims. collapse indicates whether to collapse consecutive occurrences 970 to a single one to prevent producing empty slices. 971 972 Params: 973 slices = destination array of slices 974 str = input string 975 delims = delimiter character 976 n = maximum number of slices; set to 0 to indicate no limit 977 collapse = set to true to collapse consecutive occurrences to 978 prevent producing empty "slices" 979 980 **************************************************************************/ 981 982 TElem[] split (TElem) ( ref TElem[] slices, TElem str, in Char[] delims, 983 uint n = 0, bool collapse = false ) 984 { 985 return split_!(Char[], TElem)(slices, str, delims, &locateCharSet, n, 986 collapse); 987 } 988 989 990 /************************************************************************** 991 992 Splits str on each occurrence of any character in delims. collapse 993 indicates whether to collapse consecutive occurrences to a single one to 994 prevent producing empty slices. 995 996 Params: 997 str = input string 998 delim = delimiter character 999 slices = destination array of slices 1000 n = maximum number of slices; set to 0 to indicate no limit 1001 1002 Returns: 1003 the resulting slices 1004 1005 **************************************************************************/ 1006 1007 TElem[] splitCollapse (TElem) ( ref TElem[] slices, in TElem str, 1008 in Char[] delim, uint n = 0 ) 1009 { 1010 return split(slices, str, delim, n, true); 1011 } 1012 1013 1014 /************************************************************************** 1015 1016 Locate delimiter function definition template. LocateDelimDg is the type 1017 of the function callback used by split_(). 1018 1019 LocateDelimDg params: 1020 str = string to search for delim 1021 delim = search pattern of arbitrary type: single character, set of 1022 characters, search string, ... 1023 start = search start start index 1024 1025 LocateDelimDg shall return: 1026 index of first occurrence of delim in str, starting from start 1027 1028 **************************************************************************/ 1029 1030 template LocateDelimDg ( T ) 1031 { 1032 alias size_t function ( in Char[] str, T delim, size_t start ) LocateDelimDg; 1033 } 1034 1035 /************************************************************************** 1036 1037 Splits str into at most n slices on each occurrence reported by 1038 locateDelim. collapse indicates whether to collapse consecutive 1039 occurrences to a single one to prevent producing empty slices. 1040 1041 Params: 1042 slices = destination array of slices 1043 str = input string 1044 delim = delimiter(s), depending on locateDelim 1045 locateDelim = callback function which shall locate the 1046 occurrence of delim in str; see LocateDelimDg 1047 1048 collapse = set to true to collapse consecutive occurrences to 1049 prevent producing empty "slices" 1050 1051 **************************************************************************/ 1052 1053 private TElem[] split_ ( T , TElem ) ( ref TElem[] slices, TElem str, 1054 T delim, LocateDelimDg!(T) locateDelim, 1055 uint n, bool collapse ) 1056 { 1057 static if (is(Unqual!(TElem) E : E[])) 1058 { 1059 static assert (is (Unqual!(E) == Char), 1060 "TElem should be [const] Char[], not : " 1061 ~ TElem.stringof); 1062 } 1063 else 1064 { 1065 static assert (false, "TElem should be [const] Char[], not : " 1066 ~ TElem.stringof); 1067 } 1068 uint i = 0; 1069 1070 size_t start = collapse? skipLeadingDelims(str, delim) : 0; 1071 1072 size_t pos = locateDelim(str, delim, start); 1073 1074 slices.length = 0; 1075 assumeSafeAppend(slices); 1076 1077 while ((pos < str.length) && (!n || (i < n))) 1078 { 1079 if (!((pos == start) && collapse)) 1080 { 1081 slices ~= str[start .. pos]; 1082 1083 i++; 1084 } 1085 1086 start = pos + 1; 1087 1088 pos = locateDelim(str, delim, start); 1089 } 1090 1091 if ((!n || (i < n)) && (!((start == str.length) && collapse))) 1092 { 1093 slices ~= str[start .. $]; // append tail 1094 } 1095 1096 return slices; 1097 } 1098 1099 /************************************************************************** 1100 1101 Skips leading occurrences of delim in string. 1102 1103 Params: 1104 str = input string 1105 delim = delimiter character 1106 1107 Returns: 1108 index of character in str after skipping leading occurrences of 1109 delim (length of str if str consists of delim characters) 1110 1111 **************************************************************************/ 1112 1113 private size_t skipLeadingDelims ( T ) ( in Char[] str, T delim ) 1114 { 1115 foreach (i, c; str) 1116 { 1117 bool found; 1118 1119 static if (is (T U : U[])) 1120 { 1121 found = containsChar(delim, c); 1122 } 1123 else static if (is (T : Char)) 1124 { 1125 found = c == delim; 1126 } 1127 else static assert (false, "skipLeadingDelims: delim must be of type '" ~ 1128 Char.stringof ~ "' or '" ~ (Char[]).stringof ~ 1129 "', not '" ~ T.stringof ~ '\''); 1130 1131 1132 1133 if (!found) return i; 1134 } 1135 1136 return str.length; 1137 } 1138 }