1 /******************************************************************************
2 
3     C string and character tool functions
4 
5     C string and character tool functions and null terminator utilities
6 
7     Copyright:
8         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
9         All rights reserved.
10 
11     License:
12         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
13         Alternatively, this file may be distributed under the terms of the Tango
14         3-Clause BSD License (see LICENSE_BSD.txt for details).
15 
16  ******************************************************************************/
17 
18 module ocean.text.util.StringSearch;
19 
20 
21 import ocean.meta.types.Qualifiers;
22 import ocean.core.Verify;
23 
24 import c_stddef = core.stdc.stddef: wchar_t;
25 import c_wctype = core.stdc.wctype;
26 import c_ctype  = core.stdc.ctype;
27 import c_wchar  = core.stdc.wchar_;
28 import c_string = core.stdc..string;
29 
30 import ocean.math.Math:   min;
31 
32 version (unittest)
33 {
34     import ocean.core.Test;
35 }
36 
37 
38 /++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
39 
40     /**************************************************************************
41 
42         Descriptions for public alias methods
43 
44      **************************************************************************/
45 
46     /**
47      * Returns the length of "str" without null terminator.
48      *
49      * Params:
50      *      str = string (must be null terminated)
51      *
52      * Returns:
53      *      length of "str" without null terminator
54      */
55     size_t lengthOf ( Char* str );
56 
57 
58     /**
59      * Tells whether "chr" is
60      *  isCntrl -- a control character or
61      *  isSpace -- whitespace or
62      *  isGraph -- a character associated with a graph or
63      *  isPrint -- printable or
64      *  isAlpha -- a letter or
65      *  isLower -- a lower case letter or
66      *  isUpper -- an upper case letter or
67      *  isAlNum -- a letter or a decimal digit or
68      *  isDigit -- a decimalt digit or
69      *  isHexDigit -- a hexadecimal digit.
70      *
71      * Params:
72      *      chr = character to identify
73      * Returns:
74      *      true if the character is of the specified class or false otherwise
75      */
76     bool isCntrl ( Char chr );
77     bool isSpace ( Char chr );
78 
79     bool isGraph ( Char chr );
80     bool isPrint ( Char chr );
81     bool isPunct ( Char chr );
82 
83     bool isAlpha ( Char chr );
84     bool isAlNum ( Char chr );
85     bool isDigit ( Char chr );
86     bool isHexDigit ( Char chr );
87 
88 
89     bool isLower ( Char chr );
90     bool isUpper ( Char chr );
91 
92 
93     /**
94      * Converts "chr"
95      *  toLower -- to lower case or
96      *  toUpper -- to upper case.
97      *
98      * Params:
99      *      chr = character to convert
100      *
101      * Returns:
102      *      converted character
103      */
104     Char toLower ( Char chr );
105     Char toUpper ( Char chr );
106 
107 
108     /**************************************************************************
109 
110         Explanations for private alias methods
111 
112      **************************************************************************/
113 
114     /**
115      * Returns the index of the first occurrence of one of the characters in
116      * "charset" in "str".
117      *
118      * Params:
119      *     str =     string to scan for characters in "charset"
120      *     charset = search character set
121      * Returns:
122      */
123     size_t pLocateFirstInSet ( Char* str, Char* charset );
124 
125 
126     /**
127      * Returns a pointer to the first occurrence of "pattern" in "str".
128      *
129      * Params:
130      *     str = string to scan for "pattern"
131      *     pattern = search pattern
132      * Returns:
133      */
134     Char* pLocatePattern ( Char* str, Char* pattern );
135 
136 
137     /**
138      * Moves src[0 .. n] to dst[0 .. n]. "src" and "dst" may overlap.
139      *
140      * Params:
141      *     dst = pointer to destination
142      *     src = pointer to source
143      *     n   = number of elements to move
144      * Returns:
145      */
146     Char* pMemMove ( Char* dst, Char* src, size_t n );
147 
148 
149     /**
150      * Returns a pointer to the first occurrence of "chr" within the first "n"
151      * elements of "str".
152      *
153      * Params:
154      *     str = string to scan for "chr"
155      *     chr = search character
156      *     n =   number of characters to scan for "chr"
157      * Returns:
158      */
159     Char* pLocateBinChar ( Char* str, Char chr, size_t n );
160 
161 
162  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++/
163 
164 /******************************************************************************
165 
166     StringSearch structure
167 
168  ******************************************************************************/
169 
170 struct StringSearch ( bool wide_char = false )
171 {
172     alias c_stddef.wchar_t WcharT;
173 
174     static if (wide_char)
175     {
176         alias WcharT            Char;
177 
178         alias c_wctype.iswcntrl  isCntrl;
179         alias c_wctype.iswspace  isSpace;
180 
181         alias c_wctype.iswgraph  isGraph;
182         alias c_wctype.iswprint  isPrint;
183         alias c_wctype.iswpunct  isPunct;
184 
185         alias c_wctype.iswalpha  isAlpha;
186         alias c_wctype.iswalnum  isAlNum;
187         alias c_wctype.iswdigit  isDigit;
188         alias c_wctype.iswxdigit isHexDigit;
189 
190         alias c_wctype.iswlower  isLower;
191         alias c_wctype.iswupper  isUpper;
192 
193         alias c_wctype.towlower  toLower;
194         alias c_wctype.towupper  toUpper;
195 
196         alias c_wchar.wcslen     lengthOf;
197 
198         alias c_wchar.wmemchr    pLocateBinChar;
199 
200         alias c_wchar.wcsstr     pLocatePattern;
201         alias c_wchar.wmemmove   pMemMove;
202         alias c_wchar.wcscspn    pLocateFirstInSet;
203 
204         alias c_wchar.wcstok     pSplit;
205     }
206     else
207     {
208         alias char              Char;
209 
210         alias c_ctype.iscntrl    isCntrl;
211         alias c_ctype.isspace    isSpace;
212 
213         alias c_ctype.isgraph    isGraph;
214         alias c_ctype.isprint    isPrint;
215         alias c_ctype.ispunct    isPunct;
216 
217         alias c_ctype.isalpha    isAlpha;
218         alias c_ctype.isalnum    isAlNum;
219         alias c_ctype.isdigit    isDigit;
220         alias c_ctype.isxdigit   isHexDigit;
221 
222         alias c_ctype.islower    isLower;
223         alias c_ctype.isupper    isUpper;
224 
225         alias c_ctype.tolower    toLower;
226         alias c_ctype.toupper    toUpper;
227 
228         alias c_string.strlen    lengthOf;
229 
230         alias c_string.memchr    pLocateBinChar;
231 
232         alias c_string.strstr    pLocatePattern;
233         alias c_string.memmove   pMemMove;
234         alias c_string.strcspn   pLocateFirstInSet;
235 
236         alias c_string.strtok    pSplit;
237 
238     }
239 
240     static:
241 
242     enum Char TERM = '\0';
243 
244     /**
245      * Locates the first occurence of value within the first length characters
246      * of str. If greater, length is truncated to the length of str.
247      *
248      * Params:
249      *      str    = string to search for value
250      *      value  = element value to find
251      *      start  = start index
252      *      length = number of elements to examine
253      *
254      * Returns:
255      *      the index of the first element with value "value" or the index of
256      *      the last examined element + 1
257      */
258     size_t locateChar ( in Char[] str, Char value, size_t start, size_t length )
259     {
260         verify (start <= str.length, "locateChar: start index out of range");
261 
262         length = min(length, str.length);
263 
264         const(void)* item = pLocateBinChar(str.ptr + start, value, length - start);
265         const(void)* pstr = str.ptr;
266 
267         return item? (item - pstr) : length;
268     }
269 
270     ///
271     unittest
272     {
273         test!("==")(StringSearch!().locateChar("Hello", 'l', 5, size_t.max), 5);
274         test!("==")(StringSearch!().locateChar("Hello", 'l', 2, size_t.max), 2);
275         test!("==")(StringSearch!().locateChar("Hello", 'l', 3, size_t.max), 3);
276         test!("==")(StringSearch!().locateChar("Hello", 'o', 5, size_t.max), 5);
277         test!("==")(StringSearch!().locateChar("Hello", 'o', 4, size_t.max), 4);
278         test!("==")(StringSearch!().locateChar("Hello", 'o', 0, size_t.max), 4);
279         // Test searches in a limited region of the input string
280         test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 0), 0);
281         test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 2), 2);
282         test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 3), 2);
283     }
284 
285 
286     /**
287      * Locates the first occurence of value within str.
288      *
289      * Params:
290      *      str    = string to search for "value"
291      *      value  = element value to find
292      *      start  = start index
293      *
294      * Returns:
295      *      the index of the first element with value "value" or the index of
296      *      the last examined element + 1
297      */
298     size_t locateChar ( in Char[] str, Char value, size_t start = 0 )
299     {
300         return locateChar(str, value, start, size_t.max);
301     }
302 
303     /**
304      * Tells whether the first length characters of str, starting fromo start,
305      * contain value. If greater, length is truncated to the length of str.
306      *
307      * Params:
308      *     str    = string to search for value
309      *     value  = value to search for
310      *     start  = start index
311      *     length = number of elements to examine
312      *
313      * Returns:
314      *      true if str contains value or false otherwise
315      */
316     bool containsChar ( in Char[] str, Char value, size_t start, size_t length )
317     {
318         verify (start <= str.length, "containsChar: start index out of range");
319 
320         length = min(length, str.length);
321 
322         return !!pLocateBinChar(str.ptr + start, value, length - start);
323     }
324 
325     ///
326     unittest
327     {
328         test(!StringSearch!().containsChar("Hello", 'l', 5, size_t.max));
329         test(StringSearch!().containsChar("Hello", 'l', 2, size_t.max));
330         test(StringSearch!().containsChar("Hello", 'l', 3, size_t.max));
331         test(!StringSearch!().containsChar("Hello", 'o', 5, size_t.max));
332         test(StringSearch!().containsChar("Hello", 'o', 4, size_t.max));
333         test(StringSearch!().containsChar("Hello", 'o', 0, size_t.max));
334 
335         test(!StringSearch!().containsChar("Hello", 'l', 0, 0));
336         test(!StringSearch!().containsChar("Hello", 'l', 0, 2));
337         test(StringSearch!().containsChar("Hello", 'l', 0, 3));
338     }
339 
340     bool containsChar ( in Char[] str, Char value, size_t start = 0 )
341     {
342         return containsChar(str, value, start, size_t.max);
343     }
344 
345 
346     /**
347      * Scans "str" for "pattern" and returns the index of the first occurrence
348      * if found.
349      *
350      * Params:
351      *      str     = string to scan
352      *      pattern = search pattern
353      *      start   = start location to start searching
354      *
355      * Returns:
356      *      If found, the index of the first occurrence, or the length of "str"
357      *      otherwise.
358      */
359     size_t locatePattern ( in Char[] str, in Char[] pattern, size_t start = 0 )
360     {
361         if (str.length)
362         {
363             start = min(start, str.length - 1);
364         }
365 
366         auto str_search = str[start .. $] ~ TERM;
367 
368         const(Char)* item = pLocatePattern(str_search.ptr, (pattern ~ TERM).ptr);
369 
370         return item? ((item - str_search.ptr) + start) : str.length;
371     }
372 
373     ///
374     unittest
375     {
376         test!("==")(StringSearch!().locatePattern("Hello World!", "World", 0), 6);
377         test!("==")(StringSearch!().locatePattern("[Hello]", "[", 1), "[Hello]".length);
378         test!("==")(StringSearch!().locatePattern("[Hello]", "[", 256), "[Hello]".length);
379         // Crazy/inconsistent behavior: It should return 1
380         test!("==")(StringSearch!().locatePattern("[", "[", 1), 0);
381         test!("==")(StringSearch!().locatePattern("[", "[", 256), 0);
382     }
383 
384 
385     /**
386      * Scans "str" for "pattern" and returns the index of the first occurrence
387      * if found.
388      *
389      * Params:
390      *      str     = string to scan
391      *      pattern = search pattern
392      *      start   = index to start searching from
393      *
394      * Returns:
395      *      If found, the index of the first occurrence, or the length of "str"
396      *      otherwise.
397      */
398     size_t locatePatternT ( istring pattern ) ( in Char[] str, size_t start = 0 )
399     {
400         verify (start <= str.length,
401             "locatePatternT: start index out of range");
402         if (str.length)
403         {
404             start = min(start, str.length - 1);
405         }
406 
407         auto str_search = str[start .. $] ~ TERM;
408 
409         auto item = pLocatePattern(str_search.ptr, pattern.ptr);
410 
411         return item? ((item - str_search.ptr) + start) : str.length;
412     }
413 
414     ///
415     unittest
416     {
417         test!("==")(StringSearch!().locatePatternT!("World")("Hello World!", 0), 6);
418         test!("==")(StringSearch!().locatePatternT!("[")("[Hello]", 1), "[Hello]".length);
419         // Crazy/inconsistent behavior: It should return 1
420         test!("==")(StringSearch!().locatePatternT!("[")("[", 1), 0);
421         // Fail unittests, because reasons
422         //test!("==")(StringSearch!().locatePattern("[", "[", 256), 0);
423     }
424 
425 
426     /**************************************************************************
427 
428          Tells whether str contains pattern
429 
430          Params:
431               str     = string to scan
432               pattern = search pattern
433               start   = search start index
434 
435          Returns:
436               true if str contains pattern or false otherwise
437 
438      **************************************************************************/
439 
440     bool containsPattern ( in Char[] str, in Char[] pattern, size_t start = 0 )
441     {
442         verify (start <= str.length,
443             "containsPattern: start index out of range");
444 
445         return !!pLocatePattern((str ~ TERM).ptr + start, (pattern ~ TERM).ptr);
446     }
447 
448     ///
449     unittest
450     {
451         test(!StringSearch!().containsPattern("Hello", "ll", 5));
452         test(StringSearch!().containsPattern("Hello", "ll", 2));
453         test(StringSearch!().containsPattern("Hello", "lo", 3));
454         test(!StringSearch!().containsPattern("Hello", "lo", 4));
455         test(StringSearch!().containsPattern("Hello", "lo", 3));
456         test(StringSearch!().containsPattern("Hello", "lo", 0));
457     }
458 
459 
460     /**************************************************************************
461 
462         Locates the first occurrence of any of the characters of charset in str.
463 
464         Params:
465              str     = string to scan
466              charset = set of characters to look for
467              start   = search start index
468 
469         Returns:
470              index of first occurrence of any of the characters of charset in
471              str
472 
473     **************************************************************************/
474 
475     size_t locateCharSet ( in Char[] str, in Char[] charset, size_t start = 0 )
476     {
477         verify(start <= str.length,
478             "locateCharSet: start index out of range");
479 
480         size_t item = pLocateFirstInSet((str ~ TERM).ptr + start, (charset ~ TERM).ptr);
481 
482         return item + start;
483     }
484 
485 
486     /**************************************************************************
487 
488         Locates the first occurrence of any of the characters of charset in str.
489         Passing charset as template parameter makes this method somewhat more
490         efficient when used very frequently.
491 
492         Params:
493              str     = string to scan
494              start   = search start index
495 
496         Returns:
497              index of first occurrence of any of the characters of charset in
498              str
499 
500     **************************************************************************/
501 
502     size_t locateCharSetT ( istring charset ) ( in Char[] str, size_t start = 0 )
503     {
504         verify (start <= str.length,
505             "locateCharSetT: start index out of range");
506         return pLocateFirstInSet((str ~ TERM).ptr + start, charset.ptr);
507     }
508 
509 
510     /**************************************************************************
511 
512          Shifts "length" characters inside "string" from "src_pos" to "dst_pos".
513          This effectively does the same thing as
514 
515          ---
516               string[src_pos .. src_pos + length] =  string[dst_pos .. dst_pos + length];
517          ---
518 
519          but allows overlapping ranges.
520 
521          Params:
522              str     = string to process
523              dst_pos = destination start position (index)
524              src_pos = source start position (index)
525              length  = number of array elements to shift
526 
527      **************************************************************************/
528 
529     Char[] shiftString ( ref Char[] str, size_t dst_pos, size_t src_pos, size_t length )
530     {
531         enum PREFIX = "shiftString(): ";
532 
533         verify (src_pos <= str.length, PREFIX ~ "source start out of range");
534         verify (dst_pos <= str.length, PREFIX ~ "destination start out of range");
535         verify (src_pos + length <= str.length, PREFIX ~ "source end out of range");
536         verify (dst_pos + length <= str.length, PREFIX ~ "destination end out of range");
537 
538         pMemMove(str.ptr + dst_pos, str.ptr + src_pos, length);
539 
540         return str;
541     }
542 
543 
544 
545     /**************************************************************************
546 
547          Returns the length of "str" without null terminator.
548 
549          Params:
550               str = input string (may or may not be null terminated)
551 
552          Returns:
553               the length of the string of this segment
554 
555      **************************************************************************/
556     size_t lengthOf ( in Char[] str )
557     {
558         return str.length? (str[$ - 1]? str.length : lengthOf(str.ptr)) : 0;
559     }
560 
561 
562 
563     /**************************************************************************
564 
565          Asserts that "str" is null-terminated.
566 
567          Params:
568              str = input string
569 
570      ***************************************************************************/
571     void assertTerm ( istring func ) ( in Char[] str )
572     {
573         verify (hasTerm(str), msgFunc!(func) ~ ": unterminated string");
574     }
575 
576 
577 
578     /**************************************************************************
579 
580         Adds a '\0' terminator to "str" if not present.
581 
582         Params:
583              str = string to '\0'-terminate
584 
585         Returns:
586              true if the string did not have a '\0'-terminator and therefore was
587              changed, or false otherwise.
588 
589      **************************************************************************/
590 
591     bool appendTerm ( ref Char[] str )
592     {
593         bool terminated = str.length? !str[$ - 1] : false;
594 
595         if (!terminated)
596         {
597             str ~= TERM;
598         }
599 
600         return !terminated;
601     }
602 
603 
604     /**************************************************************************
605 
606         Strips the null terminator from str, if any.
607 
608         Params:
609              str = input to '\0'-unterminate
610 
611         Returns:
612              true if the string had a '\0'-terminator and therefore was changed,
613              or false otherwise.
614 
615      **************************************************************************/
616     bool stripTerm ( ref Char[] str )
617     {
618         bool terminated = str.length? !str[$ - 1] : false;
619 
620         if (terminated)
621         {
622             str = str[0 .. lengthOf(str)];
623         }
624 
625         return terminated;
626     }
627 
628 
629 
630     /**************************************************************************
631 
632          Tells whether "str" is null-terminated.
633 
634          Params:
635               str = input string
636 
637          Returns:
638               true if "str" is null-terminated or false otherwise
639 
640      **************************************************************************/
641     bool hasTerm ( in Char[] str )
642     {
643         return str.length? !str[$ - 1] : false;
644     }
645 
646 
647 
648     /**************************************************************************
649 
650          Tells whether "str" and "pattern" are equal regardless of null
651          terminators.
652 
653          Params:
654               str     = str to compare to "pattern"
655               pattern = comparison pattern for "str"
656 
657          Returns:
658               true on match or false otherwise
659 
660      **************************************************************************/
661     bool matches ( Char[] str, Char[] pattern )
662     {
663         return (stripTerm(str) == stripTerm(pattern));
664     }
665 
666 
667 
668    /***************************************************************************
669 
670         Trims white space from "str".
671 
672         Params:
673              str       = input string
674              terminate = set to true to null-terminate the resulting string if
675                          the input string is null-terminated
676 
677         Returns:
678              the resulting string
679 
680     ***************************************************************************/
681     TChar[] trim (TChar) ( TChar[] str, bool terminate = false )
682     {
683         static assert (is(Unqual!(TChar) == Char));
684 
685         terminate &= hasTerm(str);
686 
687         foreach_reverse (i, c; str[0 .. lengthOf(str)])
688         {
689             if (!isSpace(c))
690             {
691                 str = str[0 .. i + terminate + 1];
692                 break;
693             }
694         }
695 
696         foreach (i, c; str)
697         {
698             if (!isSpace(c))
699             {
700                 return str[i .. $];
701             }
702         }
703 
704         return null;
705     }
706 
707     ///
708     unittest
709     {
710         test!("==")(StringSearch!().trim("trim"), "trim"[]);
711 
712         test!("==")(StringSearch!().trim("  trim"), "trim"[]);
713         test!("==")(StringSearch!().trim("\ttrim"), "trim"[]);
714         test!("==")(StringSearch!().trim(" \t trim"), "trim"[]);
715 
716         test!("==")(StringSearch!().trim("trim  "), "trim"[]);
717         test!("==")(StringSearch!().trim("trim\t"), "trim"[]);
718         test!("==")(StringSearch!().trim("trim \t "), "trim"[]);
719 
720         test!("==")(StringSearch!().trim("  trim  "), "trim"[]);
721         test!("==")(StringSearch!().trim("\ttrim\t"), "trim"[]);
722         test!("==")(StringSearch!().trim("\t \ttrim \t "), "trim"[]);
723     }
724 
725 
726     /**************************************************************************
727 
728          Converts each character of str in-place using convert. convert must be
729          a function that takes a character in the first argument and returns the
730          converted character.
731 
732          Params:
733               str = string to convert
734 
735          Returns:
736               converted string
737 
738      **************************************************************************/
739 
740     Char[] charConv ( alias convert ) ( ref Char[] str )
741     {
742         foreach (ref c; str)
743         {
744             c = cast(Char) convert(c);
745         }
746 
747         return str;
748     }
749 
750     /**************************************************************************
751 
752         Checks if all symbols of `str` are not modified by predicate
753         `convert`, creates a duplicate otherwise.
754 
755         Params
756             str = string to check/convert
757 
758         Returns:
759             `str` untouched if all symbols are already converted, duplicated
760             and converted string otherwise
761 
762      **************************************************************************/
763 
764     const(Char)[] charConvDup ( alias convert ) ( const(Char)[] str )
765     {
766         foreach (Char c; str)
767         {
768             if (c != cast(Char) convert(c))
769             {
770                 auto newstr = str.dup;
771                 foreach (ref Char c2; newstr)
772                     c2 = cast(Char) convert(c2);
773                 return newstr;
774             }
775         }
776 
777         return str;
778     }
779 
780     /**************************************************************************
781 
782          Converts "str" in-place to lower case.
783 
784          Params:
785               str = string to convert
786 
787          Returns:
788               converted string
789 
790      **************************************************************************/
791 
792     alias charConv!(toLower) strToLower;
793 
794     /**************************************************************************
795 
796          Ensures "str" is all lower case, allocates new copy and converts it
797          otherwise.
798 
799          Params:
800               str = string to check
801 
802          Returns:
803               converted/verified string
804 
805      **************************************************************************/
806 
807     alias charConvDup!(toLower) strEnsureLower;
808 
809     /**************************************************************************
810 
811          Converts "str" in-place to upper case.
812 
813          Params:
814               str = string to convert
815 
816          Returns:
817               converted string
818 
819      **************************************************************************/
820 
821     alias charConv!(toUpper) strToUpper;
822 
823 
824 
825     /**************************************************************************
826 
827          Tells if all letter characters in "str" match the condition checked by
828          "check". "check" must be something that takes a character in the first
829          argument and returns an integer type where a value different from 0 means
830          that the condition is satisfied.
831 
832          Params:
833               str = string to convert
834 
835          Returns:
836               true if all letter characters match the the condition checked by
837               "check" or false otherwise
838 
839      **************************************************************************/
840     bool caseCheck ( alias check ) ( in Char[] str )
841     {
842         bool result = true;
843 
844         foreach (c; str)
845         {
846             result &= (!isAlpha(c) || !!check(c));
847         }
848 
849         return result;
850     }
851 
852 
853     /**************************************************************************
854 
855          Checks if all letter characters in "str" are lower case.
856 
857          Params:
858               str = string to check
859 
860          Returns:
861               true if all letter characters in "str" are lower case or false
862               otherwise
863 
864      **************************************************************************/
865 
866     alias caseCheck!(isLower) strIsLower;
867 
868 
869 
870     /**************************************************************************
871 
872      Checks if all letter characters in "str" are upper case.
873 
874      Params:
875           str = string to check
876 
877      Returns:
878           true if all letter characters in "str" are upper case or false
879           otherwise
880 
881      **************************************************************************/
882     alias caseCheck!(isUpper) strIsUpper;
883 
884     /**************************************************************************
885 
886         Splits str into at most n slices on each occurrence of delim. collapse
887         indicates whether to collapse consecutive occurrences  to a single one
888         to prevent producing empty slices.
889 
890         Params:
891              slices   = resulting slices buffer
892              str      = input string
893              delim    = delimiter character
894              n        = maximum number of slices; set to 0 to indicate no limit
895              collapse = set to true to collapse consecutive occurrences to
896                         prevent producing empty "slices"
897 
898         Returns:
899              the resulting slices
900 
901      **************************************************************************/
902 
903     TElem[] split (TElem) ( ref TElem[] slices, TElem str, Char delim, uint n = 0,
904                      bool collapse = false )
905     {
906         return split_!(Char, TElem)(slices, str, delim, &locateChar, n, collapse);
907     }
908 
909     ///
910     unittest
911     {
912         cstring[] slices;
913 
914         test!("==")(StringSearch!().split(slices, "a;b;c", ';'),
915                     ["a", "b", "c"][]);
916         test!("==")(StringSearch!().split(slices, "a;b;c", '.'),
917                     ["a;b;c"][]);
918         test!("==")(StringSearch!().split(slices, "abc;", ';'),
919                     ["abc", ""][]);
920         test!("==")(StringSearch!().split(slices, ";abc;", ';'),
921                     ["", "abc", ""][]);
922         test!("==")(StringSearch!().split(slices, "a;;bc", ';'),
923                     ["a", "", "bc"][]);
924 
925 
926         test!("==")(StringSearch!().split(slices, "a;b;c", ';', 2),
927                     ["a", "b"][]);
928 
929         test!("==")(StringSearch!().split(slices, "abc;", ';', 0, true),
930                     ["abc"][]);
931         test!("==")(StringSearch!().split(slices, ";abc;", ';', 0, true),
932                     ["abc"][]);
933         test!("==")(StringSearch!().split(slices, "a;;bc", ';', 0, true),
934                     ["a", "bc"][]);
935 
936         mstring[] mslices;
937         test!("==")(StringSearch!().split(slices, "a;b;c".dup, ';'),
938                     ["a", "b", "c"][]);
939     }
940 
941 
942     /**************************************************************************
943 
944         Splits str on each occurrence of delim. collapse indicates whether to
945         collapse consecutive occurrences  to a single one to prevent producing
946         empty slices.
947 
948         Params:
949              slices   = array to put the resulting slices
950              str      = input string
951              delim    = delimiter character
952              n = maximum number of slices; set to 0 to indicate no limit
953 
954         Returns:
955              the resulting slices
956 
957      **************************************************************************/
958 
959     TElem[] splitCollapse (TElem) ( ref TElem[] slices, TElem str, Char delim,
960                                     uint n = 0 )
961     {
962         return split(slices,  str, delim, n, true);
963     }
964 
965 
966     /**************************************************************************
967 
968         Splits str into at most n slices on each occurrence of any character in
969         delims. collapse indicates whether to collapse consecutive occurrences
970         to a single one to prevent producing empty slices.
971 
972         Params:
973              slices   = destination array of slices
974              str      = input string
975              delims   = delimiter character
976              n        = maximum number of slices; set to 0 to indicate no limit
977              collapse = set to true to collapse consecutive occurrences to
978                         prevent producing empty "slices"
979 
980      **************************************************************************/
981 
982     TElem[] split (TElem) ( ref TElem[] slices, TElem str, in Char[] delims,
983                             uint n = 0, bool collapse = false )
984     {
985         return split_!(Char[], TElem)(slices, str, delims, &locateCharSet, n,
986                                       collapse);
987     }
988 
989 
990     /**************************************************************************
991 
992         Splits str on each occurrence of any character in delims. collapse
993         indicates whether to collapse consecutive occurrences to a single one to
994         prevent producing empty slices.
995 
996         Params:
997              str      = input string
998              delim    = delimiter character
999              slices   = destination array of slices
1000              n        = maximum number of slices; set to 0 to indicate no limit
1001 
1002         Returns:
1003              the resulting slices
1004 
1005      **************************************************************************/
1006 
1007     TElem[] splitCollapse (TElem) ( ref TElem[] slices, in TElem str,
1008                                      in Char[] delim, uint n = 0 )
1009     {
1010         return split(slices, str, delim, n, true);
1011     }
1012 
1013 
1014     /**************************************************************************
1015 
1016         Locate delimiter function definition template. LocateDelimDg is the type
1017         of the function callback used by split_().
1018 
1019         LocateDelimDg params:
1020             str   = string to search for delim
1021             delim = search pattern of arbitrary type: single character, set of
1022                     characters, search string, ...
1023             start = search start start index
1024 
1025         LocateDelimDg shall return:
1026             index of first occurrence of delim in str, starting from start
1027 
1028      **************************************************************************/
1029 
1030     template LocateDelimDg ( T )
1031     {
1032         alias size_t function ( in Char[] str, T delim, size_t start ) LocateDelimDg;
1033     }
1034 
1035     /**************************************************************************
1036 
1037         Splits str into at most n slices on each occurrence reported by
1038         locateDelim. collapse indicates whether to collapse consecutive
1039         occurrences to a single one to prevent producing empty slices.
1040 
1041         Params:
1042              slices      = destination array of slices
1043              str         = input string
1044              delim       = delimiter(s), depending on locateDelim
1045              locateDelim = callback function which shall locate the
1046                            occurrence of delim in str; see LocateDelimDg
1047 
1048              collapse = set to true to collapse consecutive occurrences to
1049                         prevent producing empty "slices"
1050 
1051      **************************************************************************/
1052 
1053     private TElem[] split_  ( T , TElem ) ( ref TElem[] slices, TElem str,
1054                                             T delim, LocateDelimDg!(T) locateDelim,
1055                                             uint n, bool collapse )
1056     {
1057         static if (is(Unqual!(TElem) E : E[]))
1058         {
1059             static assert (is (Unqual!(E) == Char),
1060                            "TElem should be [const] Char[], not : "
1061                            ~ TElem.stringof);
1062         }
1063         else
1064         {
1065             static assert (false, "TElem should be [const] Char[], not : "
1066                            ~ TElem.stringof);
1067         }
1068         uint   i     = 0;
1069 
1070         size_t start = collapse? skipLeadingDelims(str, delim) : 0;
1071 
1072         size_t pos   = locateDelim(str, delim, start);
1073 
1074         slices.length = 0;
1075         assumeSafeAppend(slices);
1076 
1077         while ((pos < str.length) && (!n || (i < n)))
1078         {
1079             if (!((pos == start) && collapse))
1080             {
1081                 slices ~= str[start .. pos];
1082 
1083                 i++;
1084             }
1085 
1086             start = pos + 1;
1087 
1088             pos = locateDelim(str, delim, start);
1089         }
1090 
1091         if ((!n || (i < n)) && (!((start == str.length) && collapse)))
1092         {
1093             slices ~= str[start .. $];                                          // append tail
1094         }
1095 
1096         return slices;
1097     }
1098 
1099     /**************************************************************************
1100 
1101         Skips leading occurrences of delim in string.
1102 
1103         Params:
1104              str      = input string
1105              delim    = delimiter character
1106 
1107         Returns:
1108              index of character in str after skipping leading occurrences of
1109              delim (length of str if str consists of delim characters)
1110 
1111      **************************************************************************/
1112 
1113     private size_t skipLeadingDelims ( T ) ( in Char[] str, T delim )
1114     {
1115         foreach (i, c; str)
1116         {
1117             bool found;
1118 
1119             static if (is (T U : U[]))
1120             {
1121                 found = containsChar(delim, c);
1122             }
1123             else static if (is (T : Char))
1124             {
1125                 found = c == delim;
1126             }
1127             else static assert (false, "skipLeadingDelims: delim must be of type '" ~
1128                                        Char.stringof ~ "' or '" ~ (Char[]).stringof ~
1129                                        "', not '" ~ T.stringof ~ '\'');
1130 
1131 
1132 
1133             if (!found) return i;
1134         }
1135 
1136         return str.length;
1137     }
1138 }