ocean.text.util.StringSearch source code

1 /******************************************************************************
2 
3     C string and character tool functions
4 
5     C string and character tool functions and null terminator utilities
6 
7     Copyright:
8         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
9         All rights reserved.
10 
11     License:
12         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
13         Alternatively, this file may be distributed under the terms of the Tango
14         3-Clause BSD License (see LICENSE_BSD.txt for details).
15 
16  ******************************************************************************/
17 
18 module ocean.text.util.StringSearch;
19 
20 
21 import ocean.transition;
22 import ocean.core.Verify;
23 
24 import c_stddef = core.stdc.stddef: wchar_t;
25 import c_wctype = core.stdc.wctype;
26 import c_ctype  = core.stdc.ctype;
27 import c_string = ocean.stdc.string;
28 
29 import ocean.math.Math:   min;
30 
31 version (UnitTest)
32 {
33     import ocean.core.Test;
34 }
35 
36 
37 /++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
38 
39     /**************************************************************************
40 
41         Descriptions for public alias methods
42 
43      **************************************************************************/
44 
45     /**
46      * Returns the length of "str" without null terminator.
47      *
48      * Params:
49      *      str = string (must be null terminated)
50      *
51      * Returns:
52      *      length of "str" without null terminator
53      */
54     size_t lengthOf ( Char* str );
55 
56 
57     /**
58      * Tells whether "chr" is
59      *  isCntrl -- a control character or
60      *  isSpace -- whitespace or
61      *  isGraph -- a character associated with a graph or
62      *  isPrint -- printable or
63      *  isAlpha -- a letter or
64      *  isLower -- a lower case letter or
65      *  isUpper -- an upper case letter or
66      *  isAlNum -- a letter or a decimal digit or
67      *  isDigit -- a decimalt digit or
68      *  isHexDigit -- a hexadecimal digit.
69      *
70      * Params:
71      *      chr = character to identify
72      * Returns:
73      *      true if the character is of the specified class or false otherwise
74      */
75     bool isCntrl ( Char chr );
76     bool isSpace ( Char chr );
77 
78     bool isGraph ( Char chr );
79     bool isPrint ( Char chr );
80     bool isPunct ( Char chr );
81 
82     bool isAlpha ( Char chr );
83     bool isAlNum ( Char chr );
84     bool isDigit ( Char chr );
85     bool isHexDigit ( Char chr );
86 
87 
88     bool isLower ( Char chr );
89     bool isUpper ( Char chr );
90 
91 
92     /**
93      * Converts "chr"
94      *  toLower -- to lower case or
95      *  toUpper -- to upper case.
96      *
97      * Params:
98      *      chr = character to convert
99      *
100      * Returns:
101      *      converted character
102      */
103     Char toLower ( Char chr );
104     Char toUpper ( Char chr );
105 
106 
107     /**************************************************************************
108 
109         Explanations for private alias methods
110 
111      **************************************************************************/
112 
113     /**
114      * Returns the index of the first occurrence of one of the characters in
115      * "charset" in "str".
116      *
117      * Params:
118      *     str =     string to scan for characters in "charset"
119      *     charset = search character set
120      * Returns:
121      */
122     size_t pLocateFirstInSet ( Char* str, Char* charset );
123 
124 
125     /**
126      * Returns a pointer to the first occurrence of "pattern" in "str".
127      *
128      * Params:
129      *     str = string to scan for "pattern"
130      *     pattern = search pattern
131      * Returns:
132      */
133     Char* pLocatePattern ( Char* str, Char* pattern );
134 
135 
136     /**
137      * Moves src[0 .. n] to dst[0 .. n]. "src" and "dst" may overlap.
138      *
139      * Params:
140      *     dst = pointer to destination
141      *     src = pointer to source
142      *     n   = number of elements to move
143      * Returns:
144      */
145     Char* pMemMove ( Char* dst, Char* src, size_t n );
146 
147 
148     /**
149      * Returns a pointer to the first occurrence of "chr" within the first "n"
150      * elements of "str".
151      *
152      * Params:
153      *     str = string to scan for "chr"
154      *     chr = search character
155      *     n =   number of characters to scan for "chr"
156      * Returns:
157      */
158     Char* pLocateBinChar ( Char* str, Char chr, size_t n );
159 
160 
161  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++/
162 
163 /******************************************************************************
164 
165     StringSearch structure
166 
167  ******************************************************************************/
168 
169 struct StringSearch ( bool wide_char = false )
170 {
171     alias c_stddef.wchar_t WcharT;
172 
173     static if (wide_char)
174     {
175         alias WcharT            Char;
176 
177         alias c_wctype.iswcntrl  isCntrl;
178         alias c_wctype.iswspace  isSpace;
179 
180         alias c_wctype.iswgraph  isGraph;
181         alias c_wctype.iswprint  isPrint;
182         alias c_wctype.iswpunct  isPunct;
183 
184         alias c_wctype.iswalpha  isAlpha;
185         alias c_wctype.iswalnum  isAlNum;
186         alias c_wctype.iswdigit  isDigit;
187         alias c_wctype.iswxdigit isHexDigit;
188 
189         alias c_wctype.iswlower  isLower;
190         alias c_wctype.iswupper  isUpper;
191 
192         alias c_wctype.towlower  toLower;
193         alias c_wctype.towupper  toUpper;
194 
195         alias c_string.wcslen    lengthOf;
196 
197         alias c_string.wmemchr   pLocateBinChar;
198 
199         alias c_string.wcsstr    pLocatePattern;
200         alias c_string.wmemmove  pMemMove;
201         alias c_string.wcscspn   pLocateFirstInSet;
202 
203         alias c_string.wcstok    pSplit;
204     }
205     else
206     {
207         alias char              Char;
208 
209         alias c_ctype.iscntrl    isCntrl;
210         alias c_ctype.isspace    isSpace;
211 
212         alias c_ctype.isgraph    isGraph;
213         alias c_ctype.isprint    isPrint;
214         alias c_ctype.ispunct    isPunct;
215 
216         alias c_ctype.isalpha    isAlpha;
217         alias c_ctype.isalnum    isAlNum;
218         alias c_ctype.isdigit    isDigit;
219         alias c_ctype.isxdigit   isHexDigit;
220 
221         alias c_ctype.islower    isLower;
222         alias c_ctype.isupper    isUpper;
223 
224         alias c_ctype.tolower    toLower;
225         alias c_ctype.toupper    toUpper;
226 
227         alias c_string.strlen    lengthOf;
228 
229         alias c_string.memchr    pLocateBinChar;
230 
231         alias c_string.strstr    pLocatePattern;
232         alias c_string.memmove   pMemMove;
233         alias c_string.strcspn   pLocateFirstInSet;
234 
235         alias c_string.strtok    pSplit;
236 
237     }
238 
239     static:
240 
241     enum Char TERM = '\0';
242 
243     /**
244      * Locates the first occurence of value within the first length characters
245      * of str. If greater, length is truncated to the length of str.
246      *
247      * Params:
248      *      str    = string to search for value
249      *      value  = element value to find
250      *      start  = start index
251      *      length = number of elements to examine
252      *
253      * Returns:
254      *      the index of the first element with value "value" or the index of
255      *      the last examined element + 1
256      */
257     size_t locateChar ( in Char[] str, Char value, size_t start, size_t length )
258     {
259         verify (start <= str.length, "locateChar: start index out of range");
260 
261         length = min(length, str.length);
262 
263         Const!(void)* item = pLocateBinChar(str.ptr + start, value, length - start);
264         Const!(void)* pstr = str.ptr;
265 
266         return item? (item - pstr) : length;
267     }
268 
269     ///
270     unittest
271     {
272         test!("==")(StringSearch!().locateChar("Hello", 'l', 5, size_t.max), 5);
273         test!("==")(StringSearch!().locateChar("Hello", 'l', 2, size_t.max), 2);
274         test!("==")(StringSearch!().locateChar("Hello", 'l', 3, size_t.max), 3);
275         test!("==")(StringSearch!().locateChar("Hello", 'o', 5, size_t.max), 5);
276         test!("==")(StringSearch!().locateChar("Hello", 'o', 4, size_t.max), 4);
277         test!("==")(StringSearch!().locateChar("Hello", 'o', 0, size_t.max), 4);
278         // Test searches in a limited region of the input string
279         test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 0), 0);
280         test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 2), 2);
281         test!("==")(StringSearch!().locateChar("Hello", 'l', 0, 3), 2);
282     }
283 
284 
285     /**
286      * Locates the first occurence of value within str.
287      *
288      * Params:
289      *      str    = string to search for "value"
290      *      value  = element value to find
291      *      start  = start index
292      *
293      * Returns:
294      *      the index of the first element with value "value" or the index of
295      *      the last examined element + 1
296      */
297     size_t locateChar ( in Char[] str, Char value, size_t start = 0 )
298     {
299         return locateChar(str, value, start, size_t.max);
300     }
301 
302     /**
303      * Tells whether the first length characters of str, starting fromo start,
304      * contain value. If greater, length is truncated to the length of str.
305      *
306      * Params:
307      *     str    = string to search for value
308      *     value  = value to search for
309      *     start  = start index
310      *     length = number of elements to examine
311      *
312      * Returns:
313      *      true if str contains value or false otherwise
314      */
315     bool containsChar ( in Char[] str, Char value, size_t start, size_t length )
316     {
317         verify (start <= str.length, "containsChar: start index out of range");
318 
319         length = min(length, str.length);
320 
321         return !!pLocateBinChar(str.ptr + start, value, length - start);
322     }
323 
324     ///
325     unittest
326     {
327         test(!StringSearch!().containsChar("Hello", 'l', 5, size_t.max));
328         test(StringSearch!().containsChar("Hello", 'l', 2, size_t.max));
329         test(StringSearch!().containsChar("Hello", 'l', 3, size_t.max));
330         test(!StringSearch!().containsChar("Hello", 'o', 5, size_t.max));
331         test(StringSearch!().containsChar("Hello", 'o', 4, size_t.max));
332         test(StringSearch!().containsChar("Hello", 'o', 0, size_t.max));
333 
334         test(!StringSearch!().containsChar("Hello", 'l', 0, 0));
335         test(!StringSearch!().containsChar("Hello", 'l', 0, 2));
336         test(StringSearch!().containsChar("Hello", 'l', 0, 3));
337     }
338 
339     bool containsChar ( in Char[] str, Char value, size_t start = 0 )
340     {
341         return containsChar(str, value, start, size_t.max);
342     }
343 
344 
345     /**
346      * Scans "str" for "pattern" and returns the index of the first occurrence
347      * if found.
348      *
349      * Params:
350      *      str     = string to scan
351      *      pattern = search pattern
352      *      start   = start location to start searching
353      *
354      * Returns:
355      *      If found, the index of the first occurrence, or the length of "str"
356      *      otherwise.
357      */
358     size_t locatePattern ( in Char[] str, in Char[] pattern, size_t start = 0 )
359     {
360         if (str.length)
361         {
362             start = min(start, str.length - 1);
363         }
364 
365         auto str_search = str[start .. $] ~ TERM;
366 
367         Const!(Char)* item = pLocatePattern(str_search.ptr, (pattern ~ TERM).ptr);
368 
369         return item? ((item - str_search.ptr) + start) : str.length;
370     }
371 
372     ///
373     unittest
374     {
375         test!("==")(StringSearch!().locatePattern("Hello World!", "World", 0), 6);
376         test!("==")(StringSearch!().locatePattern("[Hello]", "[", 1), "[Hello]".length);
377         test!("==")(StringSearch!().locatePattern("[Hello]", "[", 256), "[Hello]".length);
378         // Crazy/inconsistent behavior: It should return 1
379         test!("==")(StringSearch!().locatePattern("[", "[", 1), 0);
380         test!("==")(StringSearch!().locatePattern("[", "[", 256), 0);
381     }
382 
383 
384     /**
385      * Scans "str" for "pattern" and returns the index of the first occurrence
386      * if found.
387      *
388      * Params:
389      *      str     = string to scan
390      *      pattern = search pattern
391      *      start   = index to start searching from
392      *
393      * Returns:
394      *      If found, the index of the first occurrence, or the length of "str"
395      *      otherwise.
396      */
397     size_t locatePatternT ( istring pattern ) ( in Char[] str, size_t start = 0 )
398     {
399         verify (start <= str.length,
400             "locatePatternT: start index out of range");
401         if (str.length)
402         {
403             start = min(start, str.length - 1);
404         }
405 
406         auto str_search = str[start .. $] ~ TERM;
407 
408         auto item = pLocatePattern(str_search.ptr, pattern.ptr);
409 
410         return item? ((item - str_search.ptr) + start) : str.length;
411     }
412 
413     ///
414     unittest
415     {
416         test!("==")(StringSearch!().locatePatternT!("World")("Hello World!", 0), 6);
417         test!("==")(StringSearch!().locatePatternT!("[")("[Hello]", 1), "[Hello]".length);
418         // Crazy/inconsistent behavior: It should return 1
419         test!("==")(StringSearch!().locatePatternT!("[")("[", 1), 0);
420         // Fail unittests, because reasons
421         //test!("==")(StringSearch!().locatePattern("[", "[", 256), 0);
422     }
423 
424 
425     /**************************************************************************
426 
427          Tells whether str contains pattern
428 
429          Params:
430               str     = string to scan
431               pattern = search pattern
432               start   = search start index
433 
434          Returns:
435               true if str contains pattern or false otherwise
436 
437      **************************************************************************/
438 
439     bool containsPattern ( in Char[] str, in Char[] pattern, size_t start = 0 )
440     {
441         verify (start <= str.length,
442             "containsPattern: start index out of range");
443 
444         return !!pLocatePattern((str ~ TERM).ptr + start, (pattern ~ TERM).ptr);
445     }
446 
447     ///
448     unittest
449     {
450         test(!StringSearch!().containsPattern("Hello", "ll", 5));
451         test(StringSearch!().containsPattern("Hello", "ll", 2));
452         test(StringSearch!().containsPattern("Hello", "lo", 3));
453         test(!StringSearch!().containsPattern("Hello", "lo", 4));
454         test(StringSearch!().containsPattern("Hello", "lo", 3));
455         test(StringSearch!().containsPattern("Hello", "lo", 0));
456     }
457 
458 
459     /**************************************************************************
460 
461         Locates the first occurrence of any of the characters of charset in str.
462 
463         Params:
464              str     = string to scan
465              charset = set of characters to look for
466              start   = search start index
467 
468         Returns:
469              index of first occurrence of any of the characters of charset in
470              str
471 
472     **************************************************************************/
473 
474     size_t locateCharSet ( in Char[] str, in Char[] charset, size_t start = 0 )
475     {
476         verify(start <= str.length,
477             "locateCharSet: start index out of range");
478 
479         size_t item = pLocateFirstInSet((str ~ TERM).ptr + start, (charset ~ TERM).ptr);
480 
481         return item + start;
482     }
483 
484 
485     /**************************************************************************
486 
487         Locates the first occurrence of any of the characters of charset in str.
488         Passing charset as template parameter makes this method somewhat more
489         efficient when used very frequently.
490 
491         Params:
492              str     = string to scan
493              start   = search start index
494 
495         Returns:
496              index of first occurrence of any of the characters of charset in
497              str
498 
499     **************************************************************************/
500 
501     size_t locateCharSetT ( istring charset ) ( in Char[] str, size_t start = 0 )
502     {
503         verify (start <= str.length,
504             "locateCharSetT: start index out of range");
505         return pLocateFirstInSet((str ~ TERM).ptr + start, charset.ptr);
506     }
507 
508 
509     /**************************************************************************
510 
511          Shifts "length" characters inside "string" from "src_pos" to "dst_pos".
512          This effectively does the same thing as
513 
514          ---
515               string[src_pos .. src_pos + length] =  string[dst_pos .. dst_pos + length];
516          ---
517 
518          but allows overlapping ranges.
519 
520          Params:
521              str     = string to process
522              dst_pos = destination start position (index)
523              src_pos = source start position (index)
524              length  = number of array elements to shift
525 
526      **************************************************************************/
527 
528     Char[] shiftString ( ref Char[] str, size_t dst_pos, size_t src_pos, size_t length )
529     {
530         enum PREFIX = "shiftString(): ";
531 
532         verify (src_pos <= str.length, PREFIX ~ "source start out of range");
533         verify (dst_pos <= str.length, PREFIX ~ "destination start out of range");
534         verify (src_pos + length <= str.length, PREFIX ~ "source end out of range");
535         verify (dst_pos + length <= str.length, PREFIX ~ "destination end out of range");
536 
537         pMemMove(str.ptr + dst_pos, str.ptr + src_pos, length);
538 
539         return str;
540     }
541 
542 
543 
544     /**************************************************************************
545 
546          Returns the length of "str" without null terminator.
547 
548          Params:
549               str = input string (may or may not be null terminated)
550 
551          Returns:
552               the length of the string of this segment
553 
554      **************************************************************************/
555     size_t lengthOf ( in Char[] str )
556     {
557         return str.length? (str[$ - 1]? str.length : lengthOf(str.ptr)) : 0;
558     }
559 
560 
561 
562     /**************************************************************************
563 
564          Asserts that "str" is null-terminated.
565 
566          Params:
567              str = input string
568 
569      ***************************************************************************/
570     void assertTerm ( istring func ) ( in Char[] str )
571     {
572         verify (hasTerm(str), msgFunc!(func) ~ ": unterminated string");
573     }
574 
575 
576 
577     /**************************************************************************
578 
579         Adds a '\0' terminator to "str" if not present.
580 
581         Params:
582              str = string to '\0'-terminate
583 
584         Returns:
585              true if the string did not have a '\0'-terminator and therefore was
586              changed, or false otherwise.
587 
588      **************************************************************************/
589 
590     bool appendTerm ( ref Char[] str )
591     {
592         bool terminated = str.length? !str[$ - 1] : false;
593 
594         if (!terminated)
595         {
596             str ~= TERM;
597         }
598 
599         return !terminated;
600     }
601 
602 
603     /**************************************************************************
604 
605         Strips the null terminator from str, if any.
606 
607         Params:
608              str = input to '\0'-unterminate
609 
610         Returns:
611              true if the string had a '\0'-terminator and therefore was changed,
612              or false otherwise.
613 
614      **************************************************************************/
615     bool stripTerm ( ref Char[] str )
616     {
617         bool terminated = str.length? !str[$ - 1] : false;
618 
619         if (terminated)
620         {
621             str = str[0 .. lengthOf(str)];
622         }
623 
624         return terminated;
625     }
626 
627 
628 
629     /**************************************************************************
630 
631          Tells whether "str" is null-terminated.
632 
633          Params:
634               str = input string
635 
636          Returns:
637               true if "str" is null-terminated or false otherwise
638 
639      **************************************************************************/
640     bool hasTerm ( in Char[] str )
641     {
642         return str.length? !str[$ - 1] : false;
643     }
644 
645 
646 
647     /**************************************************************************
648 
649          Tells whether "str" and "pattern" are equal regardless of null
650          terminators.
651 
652          Params:
653               str     = str to compare to "pattern"
654               pattern = comparison pattern for "str"
655 
656          Returns:
657               true on match or false otherwise
658 
659      **************************************************************************/
660     bool matches ( Char[] str, Char[] pattern )
661     {
662         return (stripTerm(str) == stripTerm(pattern));
663     }
664 
665 
666 
667    /***************************************************************************
668 
669         Trims white space from "str".
670 
671         Params:
672              str       = input string
673              terminate = set to true to null-terminate the resulting string if
674                          the input string is null-terminated
675 
676         Returns:
677              the resulting string
678 
679     ***************************************************************************/
680     TChar[] trim (TChar) ( TChar[] str, bool terminate = false )
681     {
682         static assert (is(Unqual!(TChar) == Char));
683 
684         terminate &= hasTerm(str);
685 
686         foreach_reverse (i, c; str[0 .. lengthOf(str)])
687         {
688             if (!isSpace(c))
689             {
690                 str = str[0 .. i + terminate + 1];
691                 break;
692             }
693         }
694 
695         foreach (i, c; str)
696         {
697             if (!isSpace(c))
698             {
699                 return str[i .. $];
700             }
701         }
702 
703         return null;
704     }
705 
706     ///
707     unittest
708     {
709         test!("==")(StringSearch!().trim("trim"), "trim"[]);
710 
711         test!("==")(StringSearch!().trim("  trim"), "trim"[]);
712         test!("==")(StringSearch!().trim("\ttrim"), "trim"[]);
713         test!("==")(StringSearch!().trim(" \t trim"), "trim"[]);
714 
715         test!("==")(StringSearch!().trim("trim  "), "trim"[]);
716         test!("==")(StringSearch!().trim("trim\t"), "trim"[]);
717         test!("==")(StringSearch!().trim("trim \t "), "trim"[]);
718 
719         test!("==")(StringSearch!().trim("  trim  "), "trim"[]);
720         test!("==")(StringSearch!().trim("\ttrim\t"), "trim"[]);
721         test!("==")(StringSearch!().trim("\t \ttrim \t "), "trim"[]);
722     }
723 
724 
725     /**************************************************************************
726 
727          Converts each character of str in-place using convert. convert must be
728          a function that takes a character in the first argument and returns the
729          converted character.
730 
731          Params:
732               str = string to convert
733 
734          Returns:
735               converted string
736 
737      **************************************************************************/
738 
739     Char[] charConv ( alias convert ) ( ref Char[] str )
740     {
741         foreach (ref c; str)
742         {
743             c = cast(Char) convert(c);
744         }
745 
746         return str;
747     }
748 
749     /**************************************************************************
750 
751         Checks if all symbols of `str` are not modified by predicate
752         `convert`, creates a duplicate otherwise.
753 
754         Params
755             str = string to check/convert
756 
757         Returns:
758             `str` untouched if all symbols are already converted, duplicated
759             and converted string otherwise
760 
761      **************************************************************************/
762 
763     Const!(Char)[] charConvDup ( alias convert ) ( Const!(Char)[] str )
764     {
765         foreach (Char c; str)
766         {
767             if (c != cast(Char) convert(c))
768             {
769                 auto newstr = str.dup;
770                 foreach (ref Char c2; newstr)
771                     c2 = cast(Char) convert(c2);
772                 return newstr;
773             }
774         }
775 
776         return str;
777     }
778 
779     /**************************************************************************
780 
781          Converts "str" in-place to lower case.
782 
783          Params:
784               str = string to convert
785 
786          Returns:
787               converted string
788 
789      **************************************************************************/
790 
791     alias charConv!(toLower) strToLower;
792 
793     /**************************************************************************
794 
795          Ensures "str" is all lower case, allocates new copy and converts it
796          otherwise.
797 
798          Params:
799               str = string to check
800 
801          Returns:
802               converted/verified string
803 
804      **************************************************************************/
805 
806     alias charConvDup!(toLower) strEnsureLower;
807 
808     /**************************************************************************
809 
810          Converts "str" in-place to upper case.
811 
812          Params:
813               str = string to convert
814 
815          Returns:
816               converted string
817 
818      **************************************************************************/
819 
820     alias charConv!(toUpper) strToUpper;
821 
822 
823 
824     /**************************************************************************
825 
826          Tells if all letter characters in "str" match the condition checked by
827          "check". "check" must be something that takes a character in the first
828          argument and returns an integer type where a value different from 0 means
829          that the condition is satisfied.
830 
831          Params:
832               str = string to convert
833 
834          Returns:
835               true if all letter characters match the the condition checked by
836               "check" or false otherwise
837 
838      **************************************************************************/
839     bool caseCheck ( alias check ) ( in Char[] str )
840     {
841         bool result = true;
842 
843         foreach (c; str)
844         {
845             result &= (!isAlpha(c) || !!check(c));
846         }
847 
848         return result;
849     }
850 
851 
852     /**************************************************************************
853 
854          Checks if all letter characters in "str" are lower case.
855 
856          Params:
857               str = string to check
858 
859          Returns:
860               true if all letter characters in "str" are lower case or false
861               otherwise
862 
863      **************************************************************************/
864 
865     alias caseCheck!(isLower) strIsLower;
866 
867 
868 
869     /**************************************************************************
870 
871      Checks if all letter characters in "str" are upper case.
872 
873      Params:
874           str = string to check
875 
876      Returns:
877           true if all letter characters in "str" are upper case or false
878           otherwise
879 
880      **************************************************************************/
881     alias caseCheck!(isUpper) strIsUpper;
882 
883     /**************************************************************************
884 
885         Splits str into at most n slices on each occurrence of delim. collapse
886         indicates whether to collapse consecutive occurrences  to a single one
887         to prevent producing empty slices.
888 
889         Params:
890              slices   = resulting slices buffer
891              str      = input string
892              delim    = delimiter character
893              n        = maximum number of slices; set to 0 to indicate no limit
894              collapse = set to true to collapse consecutive occurrences to
895                         prevent producing empty "slices"
896 
897         Returns:
898              the resulting slices
899 
900      **************************************************************************/
901 
902     TElem[] split (TElem) ( ref TElem[] slices, TElem str, Char delim, uint n = 0,
903                      bool collapse = false )
904     {
905         return split_!(Char, TElem)(slices, str, delim, &locateChar, n, collapse);
906     }
907 
908     ///
909     unittest
910     {
911         cstring[] slices;
912 
913         test!("==")(StringSearch!().split(slices, "a;b;c", ';'),
914                     ["a", "b", "c"][]);
915         test!("==")(StringSearch!().split(slices, "a;b;c", '.'),
916                     ["a;b;c"][]);
917         test!("==")(StringSearch!().split(slices, "abc;", ';'),
918                     ["abc", ""][]);
919         test!("==")(StringSearch!().split(slices, ";abc;", ';'),
920                     ["", "abc", ""][]);
921         test!("==")(StringSearch!().split(slices, "a;;bc", ';'),
922                     ["a", "", "bc"][]);
923 
924 
925         test!("==")(StringSearch!().split(slices, "a;b;c", ';', 2),
926                     ["a", "b"][]);
927 
928         test!("==")(StringSearch!().split(slices, "abc;", ';', 0, true),
929                     ["abc"][]);
930         test!("==")(StringSearch!().split(slices, ";abc;", ';', 0, true),
931                     ["abc"][]);
932         test!("==")(StringSearch!().split(slices, "a;;bc", ';', 0, true),
933                     ["a", "bc"][]);
934 
935         mstring[] mslices;
936         test!("==")(StringSearch!().split(slices, "a;b;c".dup, ';'),
937                     ["a", "b", "c"][]);
938     }
939 
940 
941     /**************************************************************************
942 
943         Splits str on each occurrence of delim. collapse indicates whether to
944         collapse consecutive occurrences  to a single one to prevent producing
945         empty slices.
946 
947         Params:
948              slices   = array to put the resulting slices
949              str      = input string
950              delim    = delimiter character
951              n = maximum number of slices; set to 0 to indicate no limit
952 
953         Returns:
954              the resulting slices
955 
956      **************************************************************************/
957 
958     TElem[] splitCollapse (TElem) ( ref TElem[] slices, TElem str, Char delim,
959                                     uint n = 0 )
960     {
961         return split(slices,  str, delim, n, true);
962     }
963 
964 
965     /**************************************************************************
966 
967         Splits str into at most n slices on each occurrence of any character in
968         delims. collapse indicates whether to collapse consecutive occurrences
969         to a single one to prevent producing empty slices.
970 
971         Params:
972              slices   = destination array of slices
973              str      = input string
974              delims   = delimiter character
975              n        = maximum number of slices; set to 0 to indicate no limit
976              collapse = set to true to collapse consecutive occurrences to
977                         prevent producing empty "slices"
978 
979      **************************************************************************/
980 
981     TElem[] split (TElem) ( ref TElem[] slices, TElem str, in Char[] delims,
982                             uint n = 0, bool collapse = false )
983     {
984         return split_!(Char[], TElem)(slices, str, delims, &locateCharSet, n,
985                                       collapse);
986     }
987 
988 
989     /**************************************************************************
990 
991         Splits str on each occurrence of any character in delims. collapse
992         indicates whether to collapse consecutive occurrences to a single one to
993         prevent producing empty slices.
994 
995         Params:
996              str      = input string
997              delim    = delimiter character
998              slices   = destination array of slices
999              n        = maximum number of slices; set to 0 to indicate no limit
1000 
1001         Returns:
1002              the resulting slices
1003 
1004      **************************************************************************/
1005 
1006     TElem[] splitCollapse (TElem) ( ref TElem[] slices, in TElem str,
1007                                      in Char[] delim, uint n = 0 )
1008     {
1009         return split(slices, str, delim, n, true);
1010     }
1011 
1012 
1013     /**************************************************************************
1014 
1015         Locate delimiter function definition template. LocateDelimDg is the type
1016         of the function callback used by split_().
1017 
1018         LocateDelimDg params:
1019             str   = string to search for delim
1020             delim = search pattern of arbitrary type: single character, set of
1021                     characters, search string, ...
1022             start = search start start index
1023 
1024         LocateDelimDg shall return:
1025             index of first occurrence of delim in str, starting from start
1026 
1027      **************************************************************************/
1028 
1029     template LocateDelimDg ( T )
1030     {
1031         alias size_t function ( in Char[] str, T delim, size_t start ) LocateDelimDg;
1032     }
1033 
1034     /**************************************************************************
1035 
1036         Splits str into at most n slices on each occurrence reported by
1037         locateDelim. collapse indicates whether to collapse consecutive
1038         occurrences to a single one to prevent producing empty slices.
1039 
1040         Params:
1041              slices      = destination array of slices
1042              str         = input string
1043              delim       = delimiter(s), depending on locateDelim
1044              locateDelim = callback function which shall locate the
1045                            occurrence of delim in str; see LocateDelimDg
1046 
1047              collapse = set to true to collapse consecutive occurrences to
1048                         prevent producing empty "slices"
1049 
1050      **************************************************************************/
1051 
1052     private TElem[] split_  ( T , TElem ) ( ref TElem[] slices, TElem str,
1053                                             T delim, LocateDelimDg!(T) locateDelim,
1054                                             uint n, bool collapse )
1055     {
1056         static if (is(Unqual!(TElem) E : E[]))
1057         {
1058             static assert (is (Unqual!(E) == Char),
1059                            "TElem should be [const] Char[], not : "
1060                            ~ TElem.stringof);
1061         }
1062         else
1063         {
1064             static assert (false, "TElem should be [const] Char[], not : "
1065                            ~ TElem.stringof);
1066         }
1067         uint   i     = 0;
1068 
1069         size_t start = collapse? skipLeadingDelims(str, delim) : 0;
1070 
1071         size_t pos   = locateDelim(str, delim, start);
1072 
1073         slices.length = 0;
1074         enableStomping(slices);
1075 
1076         while ((pos < str.length) && (!n || (i < n)))
1077         {
1078             if (!((pos == start) && collapse))
1079             {
1080                 slices ~= str[start .. pos];
1081 
1082                 i++;
1083             }
1084 
1085             start = pos + 1;
1086 
1087             pos = locateDelim(str, delim, start);
1088         }
1089 
1090         if ((!n || (i < n)) && (!((start == str.length) && collapse)))
1091         {
1092             slices ~= str[start .. $];                                          // append tail
1093         }
1094 
1095         return slices;
1096     }
1097 
1098     /**************************************************************************
1099 
1100         Skips leading occurrences of delim in string.
1101 
1102         Params:
1103              str      = input string
1104              delim    = delimiter character
1105 
1106         Returns:
1107              index of character in str after skipping leading occurrences of
1108              delim (length of str if str consists of delim characters)
1109 
1110      **************************************************************************/
1111 
1112     private size_t skipLeadingDelims ( T ) ( in Char[] str, T delim )
1113     {
1114         foreach (i, c; str)
1115         {
1116             bool found;
1117 
1118             static if (is (T U : U[]))
1119             {
1120                 found = containsChar(delim, c);
1121             }
1122             else static if (is (T : Char))
1123             {
1124                 found = c == delim;
1125             }
1126             else static assert (false, "skipLeadingDelims: delim must be of type '" ~
1127                                        Char.stringof ~ "' or '" ~ (Char[]).stringof ~
1128                                        "', not '" ~ T.stringof ~ '\'');
1129 
1130 
1131 
1132             if (!found) return i;
1133         }
1134 
1135         return str.length;
1136     }
1137 }