1 /*******************************************************************************
2
3 Provides case mapping Functions for Unicode Strings. As of now it is
4 only 99 % complete, because it does not take into account Conditional
5 case mappings. This means the Greek Letter Sigma will not be correctly
6 case mapped at the end of a Word, and the Locales Lithuanian, Turkish
7 and Azeri are not taken into account during Case Mappings. This means
8 all in all around 12 Characters will not be mapped correctly under
9 some circumstances.
10
11 ICU4j also does not handle these cases at the moment.
12
13 Unittests are written against output from ICU4j
14
15 This Module tries to minimize Memory allocation and usage. You can
16 always pass the output buffer that should be used to the case mapping
17 function, which will be resized if necessary.
18
19 Copyright:
20 Copyright (c) 2007 Peter Triller.
21 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
22 All rights reserved.
23
24 License:
25 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
26 See LICENSE_TANGO.txt for details.
27
28 Version: Initial release: Sept 2007
29
30 Authors: Peter
31
32 *******************************************************************************/
33
34 module ocean.text.Unicode;
35
36 import ocean.meta.types.Qualifiers;
37 import ocean.text.UnicodeData;
38 import ocean.text.convert.Utf;
39 import ocean.core.Verify;
40
41 version (unittest) import ocean.core.Test;
42
43
44 /**
45 * Converts an Utf8 String to Upper case
46 *
47 * Params:
48 * input = String to be case mapped
49 * output = this output buffer will be used unless too small
50 * Returns: the case mapped string
51 */
52 char[] toUpper(const(char)[] input, char[] output = null) {
53
54 dchar[1] buf;
55 // assume most common case: String stays the same length
56 if (output.length < input.length)
57 output.length = input.length;
58
59 auto produced = 0;
60 size_t ate;
61 foreach(dchar ch; input) {
62 // TODO Conditional Case Mapping
63 UnicodeData *d = getUnicodeData(ch);
64 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
65 SpecialCaseData *s = getSpecialCaseData(ch);
66 verify(s !is null);
67 if(s.upperCaseMapping !is null) {
68 // To speed up, use worst case for memory prealocation
69 // since the length of an UpperCaseMapping list is at most 4
70 // Make sure no relocation is made in the toString Method
71 // better allocation algorithm ?
72 if(produced + s.upperCaseMapping.length * 4 >= output.length)
73 output.length = output.length + output.length / 2 + s.upperCaseMapping.length * 4;
74 auto res = toString(s.upperCaseMapping, output[produced..output.length], &ate);
75 verify(ate == s.upperCaseMapping.length);
76 verify(res.ptr == output[produced..output.length].ptr);
77 produced += res.length;
78 continue;
79 }
80 }
81 // Make sure no relocation is made in the toString Method
82 if(produced + 4 >= output.length)
83 output.length = output.length + output.length / 2 + 4;
84 buf[0] = d is null ? ch:d.simpleUpperCaseMapping;
85 auto res = toString(buf, output[produced..output.length], &ate);
86 verify(ate == 1);
87 verify(res.ptr == output[produced..output.length].ptr);
88 produced += res.length;
89 }
90 return output[0..produced];
91 }
92
93
94 /**
95 * Converts an Utf16 String to Upper case
96 *
97 * Params:
98 * input = String to be case mapped
99 * output = this output buffer will be used unless too small
100 * Returns: the case mapped string
101 */
102 wchar[] toUpper(const(wchar)[] input, wchar[] output = null) {
103
104 dchar[1] buf;
105 // assume most common case: String stays the same length
106 if (output.length < input.length)
107 output.length = input.length;
108
109 auto produced = 0;
110 size_t ate;
111 foreach(dchar ch; input) {
112 // TODO Conditional Case Mapping
113 UnicodeData *d = getUnicodeData(ch);
114 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
115 SpecialCaseData *s = getSpecialCaseData(ch);
116 verify(s !is null);
117 if(s.upperCaseMapping !is null) {
118 // To speed up, use worst case for memory prealocation
119 // Make sure no relocation is made in the toString16 Method
120 // better allocation algorithm ?
121 if(produced + s.upperCaseMapping.length * 2 >= output.length)
122 output.length = output.length + output.length / 2 + s.upperCaseMapping.length * 3;
123 auto res = toString16(s.upperCaseMapping, output[produced..output.length], &ate);
124 verify(ate == s.upperCaseMapping.length);
125 verify(res.ptr == output[produced..output.length].ptr);
126 produced += res.length;
127 continue;
128 }
129 }
130 // Make sure no relocation is made in the toString16 Method
131 if(produced + 4 >= output.length)
132 output.length = output.length + output.length / 2 + 3;
133 buf[0] = d is null ? ch:d.simpleUpperCaseMapping;
134 auto res = toString16(buf, output[produced..output.length], &ate);
135 verify(ate == 1);
136 verify(res.ptr == output[produced..output.length].ptr);
137 produced += res.length;
138 }
139 return output[0..produced];
140 }
141
142 /**
143 * Converts an Utf32 String to Upper case
144 *
145 * Params:
146 * input = String to be case mapped
147 * output = this output buffer will be used unless too small
148 * Returns: the case mapped string
149 */
150 dchar[] toUpper(const(dchar)[] input, dchar[] output = null) {
151
152 // assume most common case: String stays the same length
153 if (input.length > output.length)
154 output.length = input.length;
155
156 uint produced = 0;
157 if (input.length)
158 foreach(dchar orig; input) {
159 // TODO Conditional Case Mapping
160 UnicodeData *d = getUnicodeData(orig);
161 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
162 SpecialCaseData *s = getSpecialCaseData(orig);
163 verify(s !is null);
164 if(s.upperCaseMapping !is null) {
165 // Better resize strategy ???
166 if(produced + s.upperCaseMapping.length > output.length)
167 output.length = output.length + output.length / 2 + s.upperCaseMapping.length;
168 foreach(ch; s.upperCaseMapping) {
169 output[produced++] = ch;
170 }
171 }
172 continue;
173 }
174 if(produced >= output.length)
175 output.length = output.length + output.length / 2;
176 output[produced++] = d is null ? orig:d.simpleUpperCaseMapping;
177 }
178 return output[0..produced];
179 }
180
181
182 /**
183 * Converts an Utf8 String to Lower case
184 *
185 * Params:
186 * input = String to be case mapped
187 * output = this output buffer will be used unless too small
188 * Returns: the case mapped string
189 */
190 char[] toLower(const(char)[] input, char[] output = null) {
191
192 dchar[1] buf;
193 // assume most common case: String stays the same length
194 if (output.length < input.length)
195 output.length = input.length;
196
197 auto produced = 0;
198 size_t ate;
199 foreach(dchar ch; input) {
200 // TODO Conditional Case Mapping
201 UnicodeData *d = getUnicodeData(ch);
202 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
203 SpecialCaseData *s = getSpecialCaseData(ch);
204 verify(s !is null);
205 if(s.lowerCaseMapping !is null) {
206 // To speed up, use worst case for memory prealocation
207 // since the length of an LowerCaseMapping list is at most 4
208 // Make sure no relocation is made in the toString Method
209 // better allocation algorithm ?
210 if(produced + s.lowerCaseMapping.length * 4 >= output.length)
211 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length * 4;
212 auto res = toString(s.lowerCaseMapping, output[produced..output.length], &ate);
213 verify(ate == s.lowerCaseMapping.length);
214 verify(res.ptr == output[produced..output.length].ptr);
215 produced += res.length;
216 continue;
217 }
218 }
219 // Make sure no relocation is made in the toString Method
220 if(produced + 4 >= output.length)
221 output.length = output.length + output.length / 2 + 4;
222 buf[0] = d is null ? ch:d.simpleLowerCaseMapping;
223 auto res = toString(buf, output[produced..output.length], &ate);
224 verify(ate == 1);
225 verify(res.ptr == output[produced..output.length].ptr);
226 produced += res.length;
227 }
228 return output[0..produced];
229 }
230
231
232 /**
233 * Converts an Utf16 String to Lower case
234 *
235 * Params:
236 * input = String to be case mapped
237 * output = this output buffer will be used unless too small
238 * Returns: the case mapped string
239 */
240 wchar[] toLower(const(wchar)[] input, wchar[] output = null) {
241
242 dchar[1] buf;
243 // assume most common case: String stays the same length
244 if (output.length < input.length)
245 output.length = input.length;
246
247 auto produced = 0;
248 size_t ate;
249 foreach(dchar ch; input) {
250 // TODO Conditional Case Mapping
251 UnicodeData *d = getUnicodeData(ch);
252 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
253 SpecialCaseData *s = getSpecialCaseData(ch);
254 verify(s !is null);
255 if(s.lowerCaseMapping !is null) {
256 // To speed up, use worst case for memory prealocation
257 // Make sure no relocation is made in the toString16 Method
258 // better allocation algorithm ?
259 if(produced + s.lowerCaseMapping.length * 2 >= output.length)
260 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length * 3;
261 auto res = toString16(s.lowerCaseMapping, output[produced..output.length], &ate);
262 verify(ate == s.lowerCaseMapping.length);
263 verify(res.ptr == output[produced..output.length].ptr);
264 produced += res.length;
265 continue;
266 }
267 }
268 // Make sure no relocation is made in the toString16 Method
269 if(produced + 4 >= output.length)
270 output.length = output.length + output.length / 2 + 3;
271 buf[0] = d is null ? ch:d.simpleLowerCaseMapping;
272 auto res = toString16(buf, output[produced..output.length], &ate);
273 verify(ate == 1);
274 verify(res.ptr == output[produced..output.length].ptr);
275 produced += res.length;
276 }
277 return output[0..produced];
278 }
279
280
281 /**
282 * Converts an Utf32 String to Lower case
283 *
284 * Params:
285 * input = String to be case mapped
286 * output = this output buffer will be used unless too small
287 * Returns: the case mapped string
288 */
289 dchar[] toLower(const(dchar)[] input, dchar[] output = null) {
290
291 // assume most common case: String stays the same length
292 if (input.length > output.length)
293 output.length = input.length;
294
295 auto produced = 0;
296 if (input.length)
297 foreach(dchar orig; input) {
298 // TODO Conditional Case Mapping
299 UnicodeData *d = getUnicodeData(orig);
300 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
301 SpecialCaseData *s = getSpecialCaseData(orig);
302 verify(s !is null);
303 if(s.lowerCaseMapping !is null) {
304 // Better resize strategy ???
305 if(produced + s.lowerCaseMapping.length > output.length)
306 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length;
307 foreach(ch; s.lowerCaseMapping) {
308 output[produced++] = ch;
309 }
310 }
311 continue;
312 }
313 if(produced >= output.length)
314 output.length = output.length + output.length / 2;
315 output[produced++] = d is null ? orig:d.simpleLowerCaseMapping;
316 }
317 return output[0..produced];
318 }
319
320 /**
321 * Converts an Utf8 String to Folding case
322 * Folding case is used for case insensitive comparsions.
323 *
324 * Params:
325 * input = String to be case mapped
326 * output = this output buffer will be used unless too small
327 * Returns: the case mapped string
328 */
329 char[] toFold(const(char)[] input, char[] output = null) {
330
331 dchar[1] buf;
332 // assume most common case: String stays the same length
333 if (output.length < input.length)
334 output.length = input.length;
335
336 auto produced = 0;
337 size_t ate;
338 foreach(dchar ch; input) {
339 FoldingCaseData *s = getFoldingCaseData(ch);
340 if(s !is null) {
341 // To speed up, use worst case for memory prealocation
342 // since the length of an UpperCaseMapping list is at most 4
343 // Make sure no relocation is made in the toString Method
344 // better allocation algorithm ?
345 if(produced + s.mapping.length * 4 >= output.length)
346 output.length = output.length + output.length / 2 + s.mapping.length * 4;
347 auto res = toString(s.mapping, output[produced..output.length], &ate);
348 verify(ate == s.mapping.length);
349 verify(res.ptr == output[produced..output.length].ptr);
350 produced += res.length;
351 continue;
352 }
353 // Make sure no relocation is made in the toString Method
354 if(produced + 4 >= output.length)
355 output.length = output.length + output.length / 2 + 4;
356 buf[0] = ch;
357 auto res = toString(buf, output[produced..output.length], &ate);
358 verify(ate == 1);
359 verify(res.ptr == output[produced..output.length].ptr);
360 produced += res.length;
361 }
362 return output[0..produced];
363 }
364
365 /**
366 * Converts an Utf16 String to Folding case
367 * Folding case is used for case insensitive comparsions.
368 *
369 * Params:
370 * input = String to be case mapped
371 * output = this output buffer will be used unless too small
372 * Returns: the case mapped string
373 */
374 wchar[] toFold(const(wchar)[] input, wchar[] output = null) {
375
376 dchar[1] buf;
377 // assume most common case: String stays the same length
378 if (output.length < input.length)
379 output.length = input.length;
380
381 auto produced = 0;
382 size_t ate;
383 foreach(dchar ch; input) {
384 FoldingCaseData *s = getFoldingCaseData(ch);
385 if(s !is null) {
386 // To speed up, use worst case for memory prealocation
387 // Make sure no relocation is made in the toString16 Method
388 // better allocation algorithm ?
389 if(produced + s.mapping.length * 2 >= output.length)
390 output.length = output.length + output.length / 2 + s.mapping.length * 3;
391 auto res = toString16(s.mapping, output[produced..output.length], &ate);
392 verify(ate == s.mapping.length);
393 verify(res.ptr == output[produced..output.length].ptr);
394 produced += res.length;
395 continue;
396 }
397 // Make sure no relocation is made in the toString16 Method
398 if(produced + 4 >= output.length)
399 output.length = output.length + output.length / 2 + 3;
400 buf[0] = ch;
401 auto res = toString16(buf, output[produced..output.length], &ate);
402 verify(ate == 1);
403 verify(res.ptr == output[produced..output.length].ptr);
404 produced += res.length;
405 }
406 return output[0..produced];
407 }
408
409 /**
410 * Converts an Utf32 String to Folding case
411 * Folding case is used for case insensitive comparsions.
412 *
413 * Params:
414 * input = String to be case mapped
415 * output = this output buffer will be used unless too small
416 * Returns: the case mapped string
417 */
418 dchar[] toFold(const(dchar)[] input, dchar[] output = null) {
419
420 // assume most common case: String stays the same length
421 if (input.length > output.length)
422 output.length = input.length;
423
424 uint produced = 0;
425 if (input.length)
426 foreach(dchar orig; input) {
427 FoldingCaseData *d = getFoldingCaseData(orig);
428 if(d !is null ) {
429 // Better resize strategy ???
430 if(produced + d.mapping.length > output.length)
431 output.length = output.length + output.length / 2 + d.mapping.length;
432 foreach(ch; d.mapping) {
433 output[produced++] = ch;
434 }
435 continue;
436 }
437 if(produced >= output.length)
438 output.length = output.length + output.length / 2;
439 output[produced++] = orig;
440 }
441 return output[0..produced];
442 }
443
444
445 /**
446 * Determines if a character is a digit. It returns true for decimal
447 * digits only.
448 *
449 * Params:
450 * ch = the character to be inspected
451 */
452 bool isDigit(dchar ch) {
453 UnicodeData *d = getUnicodeData(ch);
454 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Nd);
455 }
456
457
458 /**
459 * Determines if a character is a letter.
460 *
461 * Params:
462 * ch = the character to be inspected
463 */
464 bool isLetter(int ch) {
465 UnicodeData *d = getUnicodeData(ch);
466 return (d !is null) && (d.generalCategory &
467 ( UnicodeData.GeneralCategory.Lu
468 | UnicodeData.GeneralCategory.Ll
469 | UnicodeData.GeneralCategory.Lt
470 | UnicodeData.GeneralCategory.Lm
471 | UnicodeData.GeneralCategory.Lo));
472 }
473
474 /**
475 * Determines if a character is a letter or a
476 * decimal digit.
477 *
478 * Params:
479 * ch = the character to be inspected
480 */
481 bool isLetterOrDigit(int ch) {
482 UnicodeData *d = getUnicodeData(ch);
483 return (d !is null) && (d.generalCategory &
484 ( UnicodeData.GeneralCategory.Lu
485 | UnicodeData.GeneralCategory.Ll
486 | UnicodeData.GeneralCategory.Lt
487 | UnicodeData.GeneralCategory.Lm
488 | UnicodeData.GeneralCategory.Lo
489 | UnicodeData.GeneralCategory.Nd));
490 }
491
492 /**
493 * Determines if a character is a lower case letter.
494 * Params:
495 * ch = the character to be inspected
496 */
497 bool isLower(dchar ch) {
498 UnicodeData *d = getUnicodeData(ch);
499 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Ll);
500 }
501
502 /**
503 * Determines if a character is a title case letter.
504 * In case of combined letters, only the first is upper and the second is lower.
505 * Some of these special characters can be found in the croatian and greek language.
506 * See_Also: http://en.wikipedia.org/wiki/Capitalization
507 * Params:
508 * ch = the character to be inspected
509 */
510 bool isTitle(dchar ch) {
511 UnicodeData *d = getUnicodeData(ch);
512 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lt);
513 }
514
515 /**
516 * Determines if a character is a upper case letter.
517 * Params:
518 * ch = the character to be inspected
519 */
520 bool isUpper(dchar ch) {
521 UnicodeData *d = getUnicodeData(ch);
522 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lu);
523 }
524
525 /**
526 * Determines if a character is a Whitespace character.
527 * Whitespace characters are characters in the
528 * General Catetories Zs, Zl, Zp without the No Break
529 * spaces plus the control characters out of the ASCII
530 * range, that are used as spaces:
531 * TAB VT LF FF CR FS GS RS US NL
532 *
533 * WARNING: look at isSpace, maybe that function does
534 * more what you expect.
535 *
536 * Params:
537 * ch = the character to be inspected
538 */
539 bool isWhitespace(dchar ch) {
540 if((ch >= 0x0009 && ch <= 0x000D) || (ch >= 0x001C && ch <= 0x001F))
541 return true;
542 UnicodeData *d = getUnicodeData(ch);
543 return (d !is null) && (d.generalCategory &
544 ( UnicodeData.GeneralCategory.Zs
545 | UnicodeData.GeneralCategory.Zl
546 | UnicodeData.GeneralCategory.Zp))
547 && ch != 0x00A0 // NBSP
548 && ch != 0x202F // NARROW NBSP
549 && ch != 0xFEFF; // ZERO WIDTH NBSP
550 }
551
552 /**
553 * Detemines if a character is a Space character as
554 * specified in the Unicode Standard.
555 *
556 * WARNING: look at isWhitespace, maybe that function does
557 * more what you expect.
558 *
559 * Params:
560 * ch = the character to be inspected
561 */
562 bool isSpace(dchar ch) {
563 UnicodeData *d = getUnicodeData(ch);
564 return (d !is null) && (d.generalCategory &
565 ( UnicodeData.GeneralCategory.Zs
566 | UnicodeData.GeneralCategory.Zl
567 | UnicodeData.GeneralCategory.Zp));
568 }
569
570
571 /**
572 * Detemines if a character is a printable character as
573 * specified in the Unicode Standard.
574 *
575 * Params:
576 * ch = the character to be inspected
577 */
578 bool isPrintable(dchar ch) {
579 UnicodeData *d = getUnicodeData(ch);
580 return (d !is null) && !(d.generalCategory &
581 ( UnicodeData.GeneralCategory.Cn
582 | UnicodeData.GeneralCategory.Cc
583 | UnicodeData.GeneralCategory.Cf
584 | UnicodeData.GeneralCategory.Co
585 | UnicodeData.GeneralCategory.Cs));
586 }
587
588 unittest {
589 // 1) No Buffer passed, no resize, no SpecialCase
590
591 immutable(char)[] testString1utf8 = "\u00E4\u00F6\u00FC";
592 immutable(wchar)[] testString1utf16 = "\u00E4\u00F6\u00FC";
593 immutable(dchar)[] testString1utf32 = "\u00E4\u00F6\u00FC";
594 immutable(char)[] refString1utf8 = "\u00C4\u00D6\u00DC";
595 immutable(wchar)[] refString1utf16 = "\u00C4\u00D6\u00DC";
596 immutable(dchar)[] refString1utf32 = "\u00C4\u00D6\u00DC";
597 char[] resultString1utf8 = toUpper(testString1utf8);
598 test(resultString1utf8 == refString1utf8);
599 wchar[] resultString1utf16 = toUpper(testString1utf16);
600 test(resultString1utf16 == refString1utf16);
601 dchar[] resultString1utf32 = toUpper(testString1utf32);
602 test(resultString1utf32 == refString1utf32);
603
604 // 2) Buffer passed, no resize, no SpecialCase
605 char[60] buffer1utf8;
606 wchar[30] buffer1utf16;
607 dchar[30] buffer1utf32;
608 resultString1utf8 = toUpper(testString1utf8,buffer1utf8);
609 test(resultString1utf8.ptr == buffer1utf8.ptr);
610 test(resultString1utf8 == refString1utf8);
611 resultString1utf16 = toUpper(testString1utf16,buffer1utf16);
612 test(resultString1utf16.ptr == buffer1utf16.ptr);
613 test(resultString1utf16 == refString1utf16);
614 resultString1utf32 = toUpper(testString1utf32,buffer1utf32);
615 test(resultString1utf32.ptr == buffer1utf32.ptr);
616 test(resultString1utf32 == refString1utf32);
617
618 // 3/ Buffer passed, resize necessary, no Special case
619
620 char[5] buffer2utf8;
621 wchar[2] buffer2utf16;
622 dchar[2] buffer2utf32;
623 resultString1utf8 = toUpper(testString1utf8,buffer2utf8);
624 test(resultString1utf8.ptr != buffer2utf8.ptr);
625 test(resultString1utf8 == refString1utf8);
626 resultString1utf16 = toUpper(testString1utf16,buffer2utf16);
627 test(resultString1utf16.ptr != buffer2utf16.ptr);
628 test(resultString1utf16 == refString1utf16);
629 resultString1utf32 = toUpper(testString1utf32,buffer2utf32);
630 test(resultString1utf32.ptr != buffer2utf32.ptr);
631 test(resultString1utf32 == refString1utf32);
632
633 // 4) Buffer passed, resize necessary, extensive SpecialCase
634
635
636 immutable(char)[] testString2utf8 = "\uFB03\uFB04\uFB05";
637 immutable(wchar)[] testString2utf16 = "\uFB03\uFB04\uFB05";
638 immutable(dchar)[] testString2utf32 = "\uFB03\uFB04\uFB05";
639 immutable(char)[] refString2utf8 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
640 immutable(wchar)[] refString2utf16 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
641 immutable(dchar)[] refString2utf32 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
642 resultString1utf8 = toUpper(testString2utf8,buffer2utf8);
643 test(resultString1utf8.ptr != buffer2utf8.ptr);
644 test(resultString1utf8 == refString2utf8);
645 resultString1utf16 = toUpper(testString2utf16,buffer2utf16);
646 test(resultString1utf16.ptr != buffer2utf16.ptr);
647 test(resultString1utf16 == refString2utf16);
648 resultString1utf32 = toUpper(testString2utf32,buffer2utf32);
649 test(resultString1utf32.ptr != buffer2utf32.ptr);
650 test(resultString1utf32 == refString2utf32);
651
652 }
653
654
655 unittest {
656 // 1) No Buffer passed, no resize, no SpecialCase
657
658 immutable(char)[] testString1utf8 = "\u00C4\u00D6\u00DC";
659 immutable(wchar)[] testString1utf16 = "\u00C4\u00D6\u00DC";
660 immutable(dchar)[] testString1utf32 = "\u00C4\u00D6\u00DC";
661 immutable(char)[] refString1utf8 = "\u00E4\u00F6\u00FC";
662 immutable(wchar)[] refString1utf16 = "\u00E4\u00F6\u00FC";
663 immutable(dchar)[] refString1utf32 = "\u00E4\u00F6\u00FC";
664 char[] resultString1utf8 = toLower(testString1utf8);
665 test(resultString1utf8 == refString1utf8);
666 wchar[] resultString1utf16 = toLower(testString1utf16);
667 test(resultString1utf16 == refString1utf16);
668 dchar[] resultString1utf32 = toLower(testString1utf32);
669 test(resultString1utf32 == refString1utf32);
670
671 // 2) Buffer passed, no resize, no SpecialCase
672 char[60] buffer1utf8;
673 wchar[30] buffer1utf16;
674 dchar[30] buffer1utf32;
675 resultString1utf8 = toLower(testString1utf8,buffer1utf8);
676 test(resultString1utf8.ptr == buffer1utf8.ptr);
677 test(resultString1utf8 == refString1utf8);
678 resultString1utf16 = toLower(testString1utf16,buffer1utf16);
679 test(resultString1utf16.ptr == buffer1utf16.ptr);
680 test(resultString1utf16 == refString1utf16);
681 resultString1utf32 = toLower(testString1utf32,buffer1utf32);
682 test(resultString1utf32.ptr == buffer1utf32.ptr);
683 test(resultString1utf32 == refString1utf32);
684
685 // 3/ Buffer passed, resize necessary, no Special case
686
687 char[5] buffer2utf8;
688 wchar[2] buffer2utf16;
689 dchar[2] buffer2utf32;
690 resultString1utf8 = toLower(testString1utf8,buffer2utf8);
691 test(resultString1utf8.ptr != buffer2utf8.ptr);
692 test(resultString1utf8 == refString1utf8);
693 resultString1utf16 = toLower(testString1utf16,buffer2utf16);
694 test(resultString1utf16.ptr != buffer2utf16.ptr);
695 test(resultString1utf16 == refString1utf16);
696 resultString1utf32 = toLower(testString1utf32,buffer2utf32);
697 test(resultString1utf32.ptr != buffer2utf32.ptr);
698 test(resultString1utf32 == refString1utf32);
699
700 // 4) Buffer passed, resize necessary, extensive SpecialCase
701
702 immutable(char)[] testString2utf8 = "\u0130\u0130\u0130";
703 immutable(wchar)[] testString2utf16 = "\u0130\u0130\u0130";
704 immutable(dchar)[] testString2utf32 = "\u0130\u0130\u0130";
705 immutable(char)[] refString2utf8 = "\u0069\u0307\u0069\u0307\u0069\u0307";
706 immutable(wchar)[] refString2utf16 = "\u0069\u0307\u0069\u0307\u0069\u0307";
707 immutable(dchar)[] refString2utf32 = "\u0069\u0307\u0069\u0307\u0069\u0307";
708 resultString1utf8 = toLower(testString2utf8,buffer2utf8);
709 test(resultString1utf8.ptr != buffer2utf8.ptr);
710 test(resultString1utf8 == refString2utf8);
711 resultString1utf16 = toLower(testString2utf16,buffer2utf16);
712 test(resultString1utf16.ptr != buffer2utf16.ptr);
713 test(resultString1utf16 == refString2utf16);
714 resultString1utf32 = toLower(testString2utf32,buffer2utf32);
715 test(resultString1utf32.ptr != buffer2utf32.ptr);
716 test(resultString1utf32 == refString2utf32);
717 }
718
719 unittest {
720 immutable(char)[] testString1utf8 = "?!Mädchen \u0390\u0390,;";
721 immutable(char)[] testString2utf8 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
722 test(toFold(testString1utf8) == toFold(testString2utf8));
723 immutable(wchar)[] testString1utf16 = "?!Mädchen \u0390\u0390,;";
724 immutable(wchar)[] testString2utf16 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
725 test(toFold(testString1utf16) == toFold(testString2utf16));
726 immutable(wchar)[] testString1utf32 = "?!Mädchen \u0390\u0390,;";
727 immutable(wchar)[] testString2utf32 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
728 test(toFold(testString1utf32) == toFold(testString2utf32));
729 }