ocean.text.Unicode source code

1 /*******************************************************************************
2 
3         Provides case mapping Functions for Unicode Strings. As of now it is
4         only 99 % complete, because it does not take into account Conditional
5         case mappings. This means the Greek Letter Sigma will not be correctly
6         case mapped at the end of a Word, and the Locales Lithuanian, Turkish
7         and Azeri are not taken into account during Case Mappings. This means
8         all in all around 12 Characters will not be mapped correctly under
9         some circumstances.
10 
11         ICU4j also does not handle these cases at the moment.
12 
13         Unittests are written against output from ICU4j
14 
15         This Module tries to minimize Memory allocation and usage. You can
16         always pass the output buffer that should be used to the case mapping
17         function, which will be resized if necessary.
18 
19         Copyright:
20             Copyright (c) 2007 Peter Triller.
21             Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
22             All rights reserved.
23 
24         License:
25             Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
26             See LICENSE_TANGO.txt for details.
27 
28         Version: Initial release: Sept 2007
29 
30         Authors: Peter
31 
32 *******************************************************************************/
33 
34 module ocean.text.Unicode;
35 
36 import ocean.meta.types.Qualifiers;
37 import ocean.text.UnicodeData;
38 import ocean.text.convert.Utf;
39 import ocean.core.Verify;
40 
41 version (unittest) import ocean.core.Test;
42 
43 
44 /**
45  * Converts an Utf8 String to Upper case
46  *
47  * Params:
48  *     input = String to be case mapped
49  *     output = this output buffer will be used unless too small
50  * Returns: the case mapped string
51  */
52 char[] toUpper(const(char)[] input, char[] output = null) {
53 
54     dchar[1] buf;
55     // assume most common case: String stays the same length
56     if (output.length < input.length)
57         output.length = input.length;
58 
59     auto produced = 0;
60     size_t ate;
61     foreach(dchar ch; input) {
62         // TODO Conditional Case Mapping
63         UnicodeData *d = getUnicodeData(ch);
64         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
65             SpecialCaseData *s = getSpecialCaseData(ch);
66             verify(s !is null);
67             if(s.upperCaseMapping !is null) {
68                 // To speed up, use worst case for memory prealocation
69                 // since the length of an UpperCaseMapping list is at most 4
70                 // Make sure no relocation is made in the toString Method
71                 // better allocation algorithm ?
72                 if(produced + s.upperCaseMapping.length * 4 >= output.length)
73                         output.length = output.length + output.length / 2 +  s.upperCaseMapping.length * 4;
74                 auto res = toString(s.upperCaseMapping, output[produced..output.length], &ate);
75                 verify(ate == s.upperCaseMapping.length);
76                 verify(res.ptr == output[produced..output.length].ptr);
77                 produced += res.length;
78                 continue;
79             }
80         }
81         // Make sure no relocation is made in the toString Method
82         if(produced + 4 >= output.length)
83             output.length = output.length + output.length / 2 + 4;
84         buf[0] = d is null ? ch:d.simpleUpperCaseMapping;
85         auto res = toString(buf, output[produced..output.length], &ate);
86         verify(ate == 1);
87         verify(res.ptr == output[produced..output.length].ptr);
88         produced += res.length;
89     }
90     return output[0..produced];
91 }
92 
93 
94 /**
95  * Converts an Utf16 String to Upper case
96  *
97  * Params:
98  *     input = String to be case mapped
99  *     output = this output buffer will be used unless too small
100  * Returns: the case mapped string
101  */
102 wchar[] toUpper(const(wchar)[] input, wchar[] output = null) {
103 
104     dchar[1] buf;
105     // assume most common case: String stays the same length
106     if (output.length < input.length)
107         output.length = input.length;
108 
109     auto produced = 0;
110     size_t ate;
111     foreach(dchar ch; input) {
112         // TODO Conditional Case Mapping
113         UnicodeData *d = getUnicodeData(ch);
114         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
115             SpecialCaseData *s = getSpecialCaseData(ch);
116             verify(s !is null);
117             if(s.upperCaseMapping !is null) {
118                 // To speed up, use worst case for memory prealocation
119                 // Make sure no relocation is made in the toString16 Method
120                 // better allocation algorithm ?
121                 if(produced + s.upperCaseMapping.length * 2 >= output.length)
122                     output.length = output.length + output.length / 2 +  s.upperCaseMapping.length * 3;
123                 auto res = toString16(s.upperCaseMapping, output[produced..output.length], &ate);
124                 verify(ate == s.upperCaseMapping.length);
125                 verify(res.ptr == output[produced..output.length].ptr);
126                 produced += res.length;
127                 continue;
128             }
129         }
130         // Make sure no relocation is made in the toString16 Method
131         if(produced + 4 >= output.length)
132             output.length = output.length + output.length / 2 + 3;
133         buf[0] = d is null ? ch:d.simpleUpperCaseMapping;
134         auto res = toString16(buf, output[produced..output.length], &ate);
135         verify(ate == 1);
136         verify(res.ptr == output[produced..output.length].ptr);
137         produced += res.length;
138     }
139     return output[0..produced];
140 }
141 
142 /**
143  * Converts an Utf32 String to Upper case
144  *
145  * Params:
146  *     input = String to be case mapped
147  *     output = this output buffer will be used unless too small
148  * Returns: the case mapped string
149  */
150 dchar[] toUpper(const(dchar)[] input, dchar[] output = null) {
151 
152     // assume most common case: String stays the same length
153     if (input.length > output.length)
154         output.length = input.length;
155 
156     uint produced = 0;
157     if (input.length)
158         foreach(dchar orig; input) {
159             // TODO Conditional Case Mapping
160             UnicodeData *d = getUnicodeData(orig);
161             if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
162                 SpecialCaseData *s = getSpecialCaseData(orig);
163                 verify(s !is null);
164                 if(s.upperCaseMapping !is null) {
165                     // Better resize strategy ???
166                     if(produced + s.upperCaseMapping.length  > output.length)
167                         output.length = output.length + output.length / 2 + s.upperCaseMapping.length;
168                     foreach(ch; s.upperCaseMapping) {
169                         output[produced++] = ch;
170                     }
171                 }
172                 continue;
173             }
174             if(produced >= output.length)
175                 output.length = output.length + output.length / 2;
176             output[produced++] = d is null ? orig:d.simpleUpperCaseMapping;
177         }
178     return output[0..produced];
179 }
180 
181 
182 /**
183  * Converts an Utf8 String to Lower case
184  *
185  * Params:
186  *     input = String to be case mapped
187  *     output = this output buffer will be used unless too small
188  * Returns: the case mapped string
189  */
190 char[] toLower(const(char)[] input, char[] output = null) {
191 
192     dchar[1] buf;
193     // assume most common case: String stays the same length
194     if (output.length < input.length)
195         output.length = input.length;
196 
197     auto produced = 0;
198     size_t ate;
199     foreach(dchar ch; input) {
200         // TODO Conditional Case Mapping
201         UnicodeData *d = getUnicodeData(ch);
202         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
203             SpecialCaseData *s = getSpecialCaseData(ch);
204             verify(s !is null);
205             if(s.lowerCaseMapping !is null) {
206                 // To speed up, use worst case for memory prealocation
207                 // since the length of an LowerCaseMapping list is at most 4
208                 // Make sure no relocation is made in the toString Method
209                 // better allocation algorithm ?
210                 if(produced + s.lowerCaseMapping.length * 4 >= output.length)
211                         output.length = output.length + output.length / 2 +  s.lowerCaseMapping.length * 4;
212                 auto res = toString(s.lowerCaseMapping, output[produced..output.length], &ate);
213                 verify(ate == s.lowerCaseMapping.length);
214                 verify(res.ptr == output[produced..output.length].ptr);
215                 produced += res.length;
216                 continue;
217             }
218         }
219         // Make sure no relocation is made in the toString Method
220         if(produced + 4 >= output.length)
221             output.length = output.length + output.length / 2 + 4;
222         buf[0] = d is null ? ch:d.simpleLowerCaseMapping;
223         auto res = toString(buf, output[produced..output.length], &ate);
224         verify(ate == 1);
225         verify(res.ptr == output[produced..output.length].ptr);
226         produced += res.length;
227     }
228     return output[0..produced];
229 }
230 
231 
232 /**
233  * Converts an Utf16 String to Lower case
234  *
235  * Params:
236  *     input = String to be case mapped
237  *     output = this output buffer will be used unless too small
238  * Returns: the case mapped string
239  */
240 wchar[] toLower(const(wchar)[] input, wchar[] output = null) {
241 
242     dchar[1] buf;
243     // assume most common case: String stays the same length
244     if (output.length < input.length)
245         output.length = input.length;
246 
247     auto produced = 0;
248     size_t ate;
249     foreach(dchar ch; input) {
250         // TODO Conditional Case Mapping
251         UnicodeData *d = getUnicodeData(ch);
252         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
253             SpecialCaseData *s = getSpecialCaseData(ch);
254             verify(s !is null);
255             if(s.lowerCaseMapping !is null) {
256                 // To speed up, use worst case for memory prealocation
257                 // Make sure no relocation is made in the toString16 Method
258                 // better allocation algorithm ?
259                 if(produced + s.lowerCaseMapping.length * 2 >= output.length)
260                     output.length = output.length + output.length / 2 +  s.lowerCaseMapping.length * 3;
261                 auto res = toString16(s.lowerCaseMapping, output[produced..output.length], &ate);
262                 verify(ate == s.lowerCaseMapping.length);
263                 verify(res.ptr == output[produced..output.length].ptr);
264                 produced += res.length;
265                 continue;
266             }
267         }
268         // Make sure no relocation is made in the toString16 Method
269         if(produced + 4 >= output.length)
270             output.length = output.length + output.length / 2 + 3;
271         buf[0] = d is null ? ch:d.simpleLowerCaseMapping;
272         auto res = toString16(buf, output[produced..output.length], &ate);
273         verify(ate == 1);
274         verify(res.ptr == output[produced..output.length].ptr);
275         produced += res.length;
276     }
277     return output[0..produced];
278 }
279 
280 
281 /**
282  * Converts an Utf32 String to Lower case
283  *
284  * Params:
285  *     input = String to be case mapped
286  *     output = this output buffer will be used unless too small
287  * Returns: the case mapped string
288  */
289 dchar[] toLower(const(dchar)[] input, dchar[] output = null) {
290 
291     // assume most common case: String stays the same length
292     if (input.length > output.length)
293         output.length = input.length;
294 
295     auto produced = 0;
296     if (input.length)
297         foreach(dchar orig; input) {
298             // TODO Conditional Case Mapping
299             UnicodeData *d = getUnicodeData(orig);
300             if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
301                 SpecialCaseData *s = getSpecialCaseData(orig);
302                 verify(s !is null);
303                 if(s.lowerCaseMapping !is null) {
304                     // Better resize strategy ???
305                     if(produced + s.lowerCaseMapping.length  > output.length)
306                         output.length = output.length + output.length / 2 + s.lowerCaseMapping.length;
307                     foreach(ch; s.lowerCaseMapping) {
308                         output[produced++] = ch;
309                     }
310                 }
311                 continue;
312             }
313             if(produced >= output.length)
314                 output.length = output.length + output.length / 2;
315             output[produced++] = d is null ? orig:d.simpleLowerCaseMapping;
316         }
317     return output[0..produced];
318 }
319 
320 /**
321  * Converts an Utf8 String to Folding case
322  * Folding case is used for case insensitive comparsions.
323  *
324  * Params:
325  *     input = String to be case mapped
326  *     output = this output buffer will be used unless too small
327  * Returns: the case mapped string
328  */
329 char[] toFold(const(char)[] input, char[] output = null) {
330 
331     dchar[1] buf;
332     // assume most common case: String stays the same length
333     if (output.length < input.length)
334         output.length = input.length;
335 
336     auto produced = 0;
337     size_t ate;
338     foreach(dchar ch; input) {
339         FoldingCaseData *s = getFoldingCaseData(ch);
340         if(s !is null) {
341             // To speed up, use worst case for memory prealocation
342             // since the length of an UpperCaseMapping list is at most 4
343             // Make sure no relocation is made in the toString Method
344             // better allocation algorithm ?
345             if(produced + s.mapping.length * 4 >= output.length)
346                 output.length = output.length + output.length / 2 +  s.mapping.length * 4;
347             auto res = toString(s.mapping, output[produced..output.length], &ate);
348             verify(ate == s.mapping.length);
349             verify(res.ptr == output[produced..output.length].ptr);
350             produced += res.length;
351             continue;
352         }
353         // Make sure no relocation is made in the toString Method
354         if(produced + 4 >= output.length)
355             output.length = output.length + output.length / 2 + 4;
356         buf[0] = ch;
357         auto res = toString(buf, output[produced..output.length], &ate);
358         verify(ate == 1);
359         verify(res.ptr == output[produced..output.length].ptr);
360         produced += res.length;
361     }
362     return output[0..produced];
363 }
364 
365 /**
366  * Converts an Utf16 String to Folding case
367  * Folding case is used for case insensitive comparsions.
368  *
369  * Params:
370  *     input = String to be case mapped
371  *     output = this output buffer will be used unless too small
372  * Returns: the case mapped string
373  */
374 wchar[] toFold(const(wchar)[] input, wchar[] output = null) {
375 
376     dchar[1] buf;
377     // assume most common case: String stays the same length
378     if (output.length < input.length)
379         output.length = input.length;
380 
381     auto produced = 0;
382     size_t ate;
383     foreach(dchar ch; input) {
384         FoldingCaseData *s = getFoldingCaseData(ch);
385         if(s !is null) {
386             // To speed up, use worst case for memory prealocation
387             // Make sure no relocation is made in the toString16 Method
388             // better allocation algorithm ?
389             if(produced + s.mapping.length * 2 >= output.length)
390                 output.length = output.length + output.length / 2 +  s.mapping.length * 3;
391             auto res = toString16(s.mapping, output[produced..output.length], &ate);
392             verify(ate == s.mapping.length);
393             verify(res.ptr == output[produced..output.length].ptr);
394             produced += res.length;
395             continue;
396         }
397         // Make sure no relocation is made in the toString16 Method
398         if(produced + 4 >= output.length)
399             output.length = output.length + output.length / 2 + 3;
400         buf[0] = ch;
401         auto res = toString16(buf, output[produced..output.length], &ate);
402         verify(ate == 1);
403         verify(res.ptr == output[produced..output.length].ptr);
404         produced += res.length;
405     }
406     return output[0..produced];
407 }
408 
409 /**
410  * Converts an Utf32 String to Folding case
411  * Folding case is used for case insensitive comparsions.
412  *
413  * Params:
414  *     input = String to be case mapped
415  *     output = this output buffer will be used unless too small
416  * Returns: the case mapped string
417  */
418 dchar[] toFold(const(dchar)[] input, dchar[] output = null) {
419 
420     // assume most common case: String stays the same length
421     if (input.length > output.length)
422         output.length = input.length;
423 
424     uint produced = 0;
425     if (input.length)
426         foreach(dchar orig; input) {
427             FoldingCaseData *d = getFoldingCaseData(orig);
428             if(d !is null ) {
429                 // Better resize strategy ???
430                 if(produced + d.mapping.length  > output.length)
431                     output.length = output.length + output.length / 2 + d.mapping.length;
432                 foreach(ch; d.mapping) {
433                     output[produced++] = ch;
434                 }
435                 continue;
436             }
437             if(produced >= output.length)
438                 output.length = output.length + output.length / 2;
439             output[produced++] = orig;
440         }
441     return output[0..produced];
442 }
443 
444 
445 /**
446  * Determines if a character is a digit. It returns true for decimal
447  * digits only.
448  *
449  * Params:
450  *     ch = the character to be inspected
451  */
452 bool isDigit(dchar ch) {
453     UnicodeData *d = getUnicodeData(ch);
454     return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Nd);
455 }
456 
457 
458 /**
459  * Determines if a character is a letter.
460  *
461  * Params:
462  *     ch = the character to be inspected
463  */
464 bool isLetter(int ch) {
465     UnicodeData *d = getUnicodeData(ch);
466     return (d !is null) && (d.generalCategory &
467         ( UnicodeData.GeneralCategory.Lu
468         | UnicodeData.GeneralCategory.Ll
469         | UnicodeData.GeneralCategory.Lt
470         | UnicodeData.GeneralCategory.Lm
471         | UnicodeData.GeneralCategory.Lo));
472 }
473 
474 /**
475  * Determines if a character is a letter or a
476  * decimal digit.
477  *
478  * Params:
479  *     ch = the character to be inspected
480  */
481 bool isLetterOrDigit(int ch) {
482     UnicodeData *d = getUnicodeData(ch);
483     return (d !is null) && (d.generalCategory &
484         ( UnicodeData.GeneralCategory.Lu
485         | UnicodeData.GeneralCategory.Ll
486         | UnicodeData.GeneralCategory.Lt
487         | UnicodeData.GeneralCategory.Lm
488         | UnicodeData.GeneralCategory.Lo
489         | UnicodeData.GeneralCategory.Nd));
490 }
491 
492 /**
493  * Determines if a character is a lower case letter.
494  * Params:
495  *     ch = the character to be inspected
496  */
497 bool isLower(dchar ch) {
498     UnicodeData *d = getUnicodeData(ch);
499     return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Ll);
500 }
501 
502 /**
503  * Determines if a character is a title case letter.
504  * In case of combined letters, only the first is upper and the second is lower.
505  * Some of these special characters can be found in the croatian and greek language.
506  * See_Also: http://en.wikipedia.org/wiki/Capitalization
507  * Params:
508  *     ch = the character to be inspected
509  */
510 bool isTitle(dchar ch) {
511     UnicodeData *d = getUnicodeData(ch);
512     return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lt);
513 }
514 
515 /**
516  * Determines if a character is a upper case letter.
517  * Params:
518  *     ch = the character to be inspected
519  */
520 bool isUpper(dchar ch) {
521     UnicodeData *d = getUnicodeData(ch);
522     return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lu);
523 }
524 
525 /**
526  * Determines if a character is a Whitespace character.
527  * Whitespace characters are characters in the
528  * General Catetories Zs, Zl, Zp without the No Break
529  * spaces plus the control characters out of the ASCII
530  * range, that are used as spaces:
531  * TAB VT LF FF CR FS GS RS US NL
532  *
533  * WARNING: look at isSpace, maybe that function does
534  *          more what you expect.
535  *
536  * Params:
537  *     ch = the character to be inspected
538  */
539 bool isWhitespace(dchar ch) {
540     if((ch >= 0x0009 && ch <= 0x000D) || (ch >= 0x001C && ch <= 0x001F))
541         return true;
542     UnicodeData *d = getUnicodeData(ch);
543     return (d !is null) && (d.generalCategory &
544             ( UnicodeData.GeneralCategory.Zs
545             | UnicodeData.GeneralCategory.Zl
546             | UnicodeData.GeneralCategory.Zp))
547             && ch != 0x00A0 // NBSP
548             && ch != 0x202F // NARROW NBSP
549             && ch != 0xFEFF; // ZERO WIDTH NBSP
550 }
551 
552 /**
553  * Detemines if a character is a Space character as
554  * specified in the Unicode Standard.
555  *
556  * WARNING: look at isWhitespace, maybe that function does
557  *          more what you expect.
558  *
559  * Params:
560  *     ch = the character to be inspected
561  */
562 bool isSpace(dchar ch) {
563     UnicodeData *d = getUnicodeData(ch);
564     return (d !is null) && (d.generalCategory &
565             ( UnicodeData.GeneralCategory.Zs
566             | UnicodeData.GeneralCategory.Zl
567             | UnicodeData.GeneralCategory.Zp));
568 }
569 
570 
571 /**
572  * Detemines if a character is a printable character as
573  * specified in the Unicode Standard.
574  *
575  * Params:
576  *     ch = the character to be inspected
577  */
578 bool isPrintable(dchar ch) {
579     UnicodeData *d = getUnicodeData(ch);
580     return (d !is null) && !(d.generalCategory &
581             ( UnicodeData.GeneralCategory.Cn
582             | UnicodeData.GeneralCategory.Cc
583             | UnicodeData.GeneralCategory.Cf
584             | UnicodeData.GeneralCategory.Co
585             | UnicodeData.GeneralCategory.Cs));
586 }
587 
588 unittest {
589     // 1) No Buffer passed, no resize, no SpecialCase
590 
591     immutable(char)[] testString1utf8 = "\u00E4\u00F6\u00FC";
592     immutable(wchar)[] testString1utf16 = "\u00E4\u00F6\u00FC";
593     immutable(dchar)[] testString1utf32 = "\u00E4\u00F6\u00FC";
594     immutable(char)[] refString1utf8 = "\u00C4\u00D6\u00DC";
595     immutable(wchar)[] refString1utf16 = "\u00C4\u00D6\u00DC";
596     immutable(dchar)[] refString1utf32 = "\u00C4\u00D6\u00DC";
597     char[] resultString1utf8 = toUpper(testString1utf8);
598     test(resultString1utf8 == refString1utf8);
599     wchar[] resultString1utf16 = toUpper(testString1utf16);
600     test(resultString1utf16 == refString1utf16);
601     dchar[] resultString1utf32 = toUpper(testString1utf32);
602     test(resultString1utf32 == refString1utf32);
603 
604     // 2) Buffer passed, no resize, no SpecialCase
605     char[60] buffer1utf8;
606     wchar[30] buffer1utf16;
607     dchar[30] buffer1utf32;
608     resultString1utf8 = toUpper(testString1utf8,buffer1utf8);
609     test(resultString1utf8.ptr == buffer1utf8.ptr);
610     test(resultString1utf8 == refString1utf8);
611     resultString1utf16 = toUpper(testString1utf16,buffer1utf16);
612     test(resultString1utf16.ptr == buffer1utf16.ptr);
613     test(resultString1utf16 == refString1utf16);
614     resultString1utf32 = toUpper(testString1utf32,buffer1utf32);
615     test(resultString1utf32.ptr == buffer1utf32.ptr);
616     test(resultString1utf32 == refString1utf32);
617 
618     // 3/ Buffer passed, resize necessary, no Special case
619 
620     char[5] buffer2utf8;
621     wchar[2] buffer2utf16;
622     dchar[2] buffer2utf32;
623     resultString1utf8 = toUpper(testString1utf8,buffer2utf8);
624     test(resultString1utf8.ptr != buffer2utf8.ptr);
625     test(resultString1utf8 == refString1utf8);
626     resultString1utf16 = toUpper(testString1utf16,buffer2utf16);
627     test(resultString1utf16.ptr != buffer2utf16.ptr);
628     test(resultString1utf16 == refString1utf16);
629     resultString1utf32 = toUpper(testString1utf32,buffer2utf32);
630     test(resultString1utf32.ptr != buffer2utf32.ptr);
631     test(resultString1utf32 == refString1utf32);
632 
633     // 4) Buffer passed, resize necessary, extensive SpecialCase
634 
635 
636     immutable(char)[] testString2utf8 = "\uFB03\uFB04\uFB05";
637     immutable(wchar)[] testString2utf16 = "\uFB03\uFB04\uFB05";
638     immutable(dchar)[] testString2utf32 = "\uFB03\uFB04\uFB05";
639     immutable(char)[] refString2utf8 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
640     immutable(wchar)[] refString2utf16 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
641     immutable(dchar)[] refString2utf32 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
642     resultString1utf8 = toUpper(testString2utf8,buffer2utf8);
643     test(resultString1utf8.ptr != buffer2utf8.ptr);
644     test(resultString1utf8 == refString2utf8);
645     resultString1utf16 = toUpper(testString2utf16,buffer2utf16);
646     test(resultString1utf16.ptr != buffer2utf16.ptr);
647     test(resultString1utf16 == refString2utf16);
648     resultString1utf32 = toUpper(testString2utf32,buffer2utf32);
649     test(resultString1utf32.ptr != buffer2utf32.ptr);
650     test(resultString1utf32 == refString2utf32);
651 
652 }
653 
654 
655 unittest {
656     // 1) No Buffer passed, no resize, no SpecialCase
657 
658     immutable(char)[] testString1utf8 = "\u00C4\u00D6\u00DC";
659     immutable(wchar)[] testString1utf16 = "\u00C4\u00D6\u00DC";
660     immutable(dchar)[] testString1utf32 = "\u00C4\u00D6\u00DC";
661     immutable(char)[] refString1utf8 = "\u00E4\u00F6\u00FC";
662     immutable(wchar)[] refString1utf16 = "\u00E4\u00F6\u00FC";
663     immutable(dchar)[] refString1utf32 = "\u00E4\u00F6\u00FC";
664     char[] resultString1utf8 = toLower(testString1utf8);
665     test(resultString1utf8 == refString1utf8);
666     wchar[] resultString1utf16 = toLower(testString1utf16);
667     test(resultString1utf16 == refString1utf16);
668     dchar[] resultString1utf32 = toLower(testString1utf32);
669     test(resultString1utf32 == refString1utf32);
670 
671     // 2) Buffer passed, no resize, no SpecialCase
672     char[60] buffer1utf8;
673     wchar[30] buffer1utf16;
674     dchar[30] buffer1utf32;
675     resultString1utf8 = toLower(testString1utf8,buffer1utf8);
676     test(resultString1utf8.ptr == buffer1utf8.ptr);
677     test(resultString1utf8 == refString1utf8);
678     resultString1utf16 = toLower(testString1utf16,buffer1utf16);
679     test(resultString1utf16.ptr == buffer1utf16.ptr);
680     test(resultString1utf16 == refString1utf16);
681     resultString1utf32 = toLower(testString1utf32,buffer1utf32);
682     test(resultString1utf32.ptr == buffer1utf32.ptr);
683     test(resultString1utf32 == refString1utf32);
684 
685     // 3/ Buffer passed, resize necessary, no Special case
686 
687     char[5] buffer2utf8;
688     wchar[2] buffer2utf16;
689     dchar[2] buffer2utf32;
690     resultString1utf8 = toLower(testString1utf8,buffer2utf8);
691     test(resultString1utf8.ptr != buffer2utf8.ptr);
692     test(resultString1utf8 == refString1utf8);
693     resultString1utf16 = toLower(testString1utf16,buffer2utf16);
694     test(resultString1utf16.ptr != buffer2utf16.ptr);
695     test(resultString1utf16 == refString1utf16);
696     resultString1utf32 = toLower(testString1utf32,buffer2utf32);
697     test(resultString1utf32.ptr != buffer2utf32.ptr);
698     test(resultString1utf32 == refString1utf32);
699 
700     // 4) Buffer passed, resize necessary, extensive SpecialCase
701 
702     immutable(char)[] testString2utf8 = "\u0130\u0130\u0130";
703     immutable(wchar)[] testString2utf16 = "\u0130\u0130\u0130";
704     immutable(dchar)[] testString2utf32 = "\u0130\u0130\u0130";
705     immutable(char)[] refString2utf8 = "\u0069\u0307\u0069\u0307\u0069\u0307";
706     immutable(wchar)[] refString2utf16 = "\u0069\u0307\u0069\u0307\u0069\u0307";
707     immutable(dchar)[] refString2utf32 = "\u0069\u0307\u0069\u0307\u0069\u0307";
708     resultString1utf8 = toLower(testString2utf8,buffer2utf8);
709     test(resultString1utf8.ptr != buffer2utf8.ptr);
710     test(resultString1utf8 == refString2utf8);
711     resultString1utf16 = toLower(testString2utf16,buffer2utf16);
712     test(resultString1utf16.ptr != buffer2utf16.ptr);
713     test(resultString1utf16 == refString2utf16);
714     resultString1utf32 = toLower(testString2utf32,buffer2utf32);
715     test(resultString1utf32.ptr != buffer2utf32.ptr);
716     test(resultString1utf32 == refString2utf32);
717 }
718 
719 unittest {
720     immutable(char)[] testString1utf8 = "?!Mädchen \u0390\u0390,;";
721     immutable(char)[] testString2utf8 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
722     test(toFold(testString1utf8) == toFold(testString2utf8));
723     immutable(wchar)[] testString1utf16 = "?!Mädchen \u0390\u0390,;";
724     immutable(wchar)[] testString2utf16 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
725     test(toFold(testString1utf16) == toFold(testString2utf16));
726     immutable(wchar)[] testString1utf32 = "?!Mädchen \u0390\u0390,;";
727     immutable(wchar)[] testString2utf32 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
728     test(toFold(testString1utf32) == toFold(testString2utf32));
729 }