1 /******************************************************************************* 2 3 Provides case mapping Functions for Unicode Strings. As of now it is 4 only 99 % complete, because it does not take into account Conditional 5 case mappings. This means the Greek Letter Sigma will not be correctly 6 case mapped at the end of a Word, and the Locales Lithuanian, Turkish 7 and Azeri are not taken into account during Case Mappings. This means 8 all in all around 12 Characters will not be mapped correctly under 9 some circumstances. 10 11 ICU4j also does not handle these cases at the moment. 12 13 Unittests are written against output from ICU4j 14 15 This Module tries to minimize Memory allocation and usage. You can 16 always pass the output buffer that should be used to the case mapping 17 function, which will be resized if necessary. 18 19 Copyright: 20 Copyright (c) 2007 Peter Triller. 21 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 22 All rights reserved. 23 24 License: 25 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 26 See LICENSE_TANGO.txt for details. 27 28 Version: Initial release: Sept 2007 29 30 Authors: Peter 31 32 *******************************************************************************/ 33 34 module ocean.text.Unicode; 35 36 import ocean.meta.types.Qualifiers; 37 import ocean.text.UnicodeData; 38 import ocean.text.convert.Utf; 39 import ocean.core.Verify; 40 41 version (unittest) import ocean.core.Test; 42 43 44 /** 45 * Converts an Utf8 String to Upper case 46 * 47 * Params: 48 * input = String to be case mapped 49 * output = this output buffer will be used unless too small 50 * Returns: the case mapped string 51 */ 52 char[] toUpper(const(char)[] input, char[] output = null) { 53 54 dchar[1] buf; 55 // assume most common case: String stays the same length 56 if (output.length < input.length) 57 output.length = input.length; 58 59 auto produced = 0; 60 size_t ate; 61 foreach(dchar ch; input) { 62 // TODO Conditional Case Mapping 63 UnicodeData *d = getUnicodeData(ch); 64 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 65 SpecialCaseData *s = getSpecialCaseData(ch); 66 verify(s !is null); 67 if(s.upperCaseMapping !is null) { 68 // To speed up, use worst case for memory prealocation 69 // since the length of an UpperCaseMapping list is at most 4 70 // Make sure no relocation is made in the toString Method 71 // better allocation algorithm ? 72 if(produced + s.upperCaseMapping.length * 4 >= output.length) 73 output.length = output.length + output.length / 2 + s.upperCaseMapping.length * 4; 74 auto res = toString(s.upperCaseMapping, output[produced..output.length], &ate); 75 verify(ate == s.upperCaseMapping.length); 76 verify(res.ptr == output[produced..output.length].ptr); 77 produced += res.length; 78 continue; 79 } 80 } 81 // Make sure no relocation is made in the toString Method 82 if(produced + 4 >= output.length) 83 output.length = output.length + output.length / 2 + 4; 84 buf[0] = d is null ? ch:d.simpleUpperCaseMapping; 85 auto res = toString(buf, output[produced..output.length], &ate); 86 verify(ate == 1); 87 verify(res.ptr == output[produced..output.length].ptr); 88 produced += res.length; 89 } 90 return output[0..produced]; 91 } 92 93 94 /** 95 * Converts an Utf16 String to Upper case 96 * 97 * Params: 98 * input = String to be case mapped 99 * output = this output buffer will be used unless too small 100 * Returns: the case mapped string 101 */ 102 wchar[] toUpper(const(wchar)[] input, wchar[] output = null) { 103 104 dchar[1] buf; 105 // assume most common case: String stays the same length 106 if (output.length < input.length) 107 output.length = input.length; 108 109 auto produced = 0; 110 size_t ate; 111 foreach(dchar ch; input) { 112 // TODO Conditional Case Mapping 113 UnicodeData *d = getUnicodeData(ch); 114 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 115 SpecialCaseData *s = getSpecialCaseData(ch); 116 verify(s !is null); 117 if(s.upperCaseMapping !is null) { 118 // To speed up, use worst case for memory prealocation 119 // Make sure no relocation is made in the toString16 Method 120 // better allocation algorithm ? 121 if(produced + s.upperCaseMapping.length * 2 >= output.length) 122 output.length = output.length + output.length / 2 + s.upperCaseMapping.length * 3; 123 auto res = toString16(s.upperCaseMapping, output[produced..output.length], &ate); 124 verify(ate == s.upperCaseMapping.length); 125 verify(res.ptr == output[produced..output.length].ptr); 126 produced += res.length; 127 continue; 128 } 129 } 130 // Make sure no relocation is made in the toString16 Method 131 if(produced + 4 >= output.length) 132 output.length = output.length + output.length / 2 + 3; 133 buf[0] = d is null ? ch:d.simpleUpperCaseMapping; 134 auto res = toString16(buf, output[produced..output.length], &ate); 135 verify(ate == 1); 136 verify(res.ptr == output[produced..output.length].ptr); 137 produced += res.length; 138 } 139 return output[0..produced]; 140 } 141 142 /** 143 * Converts an Utf32 String to Upper case 144 * 145 * Params: 146 * input = String to be case mapped 147 * output = this output buffer will be used unless too small 148 * Returns: the case mapped string 149 */ 150 dchar[] toUpper(const(dchar)[] input, dchar[] output = null) { 151 152 // assume most common case: String stays the same length 153 if (input.length > output.length) 154 output.length = input.length; 155 156 uint produced = 0; 157 if (input.length) 158 foreach(dchar orig; input) { 159 // TODO Conditional Case Mapping 160 UnicodeData *d = getUnicodeData(orig); 161 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 162 SpecialCaseData *s = getSpecialCaseData(orig); 163 verify(s !is null); 164 if(s.upperCaseMapping !is null) { 165 // Better resize strategy ??? 166 if(produced + s.upperCaseMapping.length > output.length) 167 output.length = output.length + output.length / 2 + s.upperCaseMapping.length; 168 foreach(ch; s.upperCaseMapping) { 169 output[produced++] = ch; 170 } 171 } 172 continue; 173 } 174 if(produced >= output.length) 175 output.length = output.length + output.length / 2; 176 output[produced++] = d is null ? orig:d.simpleUpperCaseMapping; 177 } 178 return output[0..produced]; 179 } 180 181 182 /** 183 * Converts an Utf8 String to Lower case 184 * 185 * Params: 186 * input = String to be case mapped 187 * output = this output buffer will be used unless too small 188 * Returns: the case mapped string 189 */ 190 char[] toLower(const(char)[] input, char[] output = null) { 191 192 dchar[1] buf; 193 // assume most common case: String stays the same length 194 if (output.length < input.length) 195 output.length = input.length; 196 197 auto produced = 0; 198 size_t ate; 199 foreach(dchar ch; input) { 200 // TODO Conditional Case Mapping 201 UnicodeData *d = getUnicodeData(ch); 202 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 203 SpecialCaseData *s = getSpecialCaseData(ch); 204 verify(s !is null); 205 if(s.lowerCaseMapping !is null) { 206 // To speed up, use worst case for memory prealocation 207 // since the length of an LowerCaseMapping list is at most 4 208 // Make sure no relocation is made in the toString Method 209 // better allocation algorithm ? 210 if(produced + s.lowerCaseMapping.length * 4 >= output.length) 211 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length * 4; 212 auto res = toString(s.lowerCaseMapping, output[produced..output.length], &ate); 213 verify(ate == s.lowerCaseMapping.length); 214 verify(res.ptr == output[produced..output.length].ptr); 215 produced += res.length; 216 continue; 217 } 218 } 219 // Make sure no relocation is made in the toString Method 220 if(produced + 4 >= output.length) 221 output.length = output.length + output.length / 2 + 4; 222 buf[0] = d is null ? ch:d.simpleLowerCaseMapping; 223 auto res = toString(buf, output[produced..output.length], &ate); 224 verify(ate == 1); 225 verify(res.ptr == output[produced..output.length].ptr); 226 produced += res.length; 227 } 228 return output[0..produced]; 229 } 230 231 232 /** 233 * Converts an Utf16 String to Lower case 234 * 235 * Params: 236 * input = String to be case mapped 237 * output = this output buffer will be used unless too small 238 * Returns: the case mapped string 239 */ 240 wchar[] toLower(const(wchar)[] input, wchar[] output = null) { 241 242 dchar[1] buf; 243 // assume most common case: String stays the same length 244 if (output.length < input.length) 245 output.length = input.length; 246 247 auto produced = 0; 248 size_t ate; 249 foreach(dchar ch; input) { 250 // TODO Conditional Case Mapping 251 UnicodeData *d = getUnicodeData(ch); 252 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 253 SpecialCaseData *s = getSpecialCaseData(ch); 254 verify(s !is null); 255 if(s.lowerCaseMapping !is null) { 256 // To speed up, use worst case for memory prealocation 257 // Make sure no relocation is made in the toString16 Method 258 // better allocation algorithm ? 259 if(produced + s.lowerCaseMapping.length * 2 >= output.length) 260 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length * 3; 261 auto res = toString16(s.lowerCaseMapping, output[produced..output.length], &ate); 262 verify(ate == s.lowerCaseMapping.length); 263 verify(res.ptr == output[produced..output.length].ptr); 264 produced += res.length; 265 continue; 266 } 267 } 268 // Make sure no relocation is made in the toString16 Method 269 if(produced + 4 >= output.length) 270 output.length = output.length + output.length / 2 + 3; 271 buf[0] = d is null ? ch:d.simpleLowerCaseMapping; 272 auto res = toString16(buf, output[produced..output.length], &ate); 273 verify(ate == 1); 274 verify(res.ptr == output[produced..output.length].ptr); 275 produced += res.length; 276 } 277 return output[0..produced]; 278 } 279 280 281 /** 282 * Converts an Utf32 String to Lower case 283 * 284 * Params: 285 * input = String to be case mapped 286 * output = this output buffer will be used unless too small 287 * Returns: the case mapped string 288 */ 289 dchar[] toLower(const(dchar)[] input, dchar[] output = null) { 290 291 // assume most common case: String stays the same length 292 if (input.length > output.length) 293 output.length = input.length; 294 295 auto produced = 0; 296 if (input.length) 297 foreach(dchar orig; input) { 298 // TODO Conditional Case Mapping 299 UnicodeData *d = getUnicodeData(orig); 300 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 301 SpecialCaseData *s = getSpecialCaseData(orig); 302 verify(s !is null); 303 if(s.lowerCaseMapping !is null) { 304 // Better resize strategy ??? 305 if(produced + s.lowerCaseMapping.length > output.length) 306 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length; 307 foreach(ch; s.lowerCaseMapping) { 308 output[produced++] = ch; 309 } 310 } 311 continue; 312 } 313 if(produced >= output.length) 314 output.length = output.length + output.length / 2; 315 output[produced++] = d is null ? orig:d.simpleLowerCaseMapping; 316 } 317 return output[0..produced]; 318 } 319 320 /** 321 * Converts an Utf8 String to Folding case 322 * Folding case is used for case insensitive comparsions. 323 * 324 * Params: 325 * input = String to be case mapped 326 * output = this output buffer will be used unless too small 327 * Returns: the case mapped string 328 */ 329 char[] toFold(const(char)[] input, char[] output = null) { 330 331 dchar[1] buf; 332 // assume most common case: String stays the same length 333 if (output.length < input.length) 334 output.length = input.length; 335 336 auto produced = 0; 337 size_t ate; 338 foreach(dchar ch; input) { 339 FoldingCaseData *s = getFoldingCaseData(ch); 340 if(s !is null) { 341 // To speed up, use worst case for memory prealocation 342 // since the length of an UpperCaseMapping list is at most 4 343 // Make sure no relocation is made in the toString Method 344 // better allocation algorithm ? 345 if(produced + s.mapping.length * 4 >= output.length) 346 output.length = output.length + output.length / 2 + s.mapping.length * 4; 347 auto res = toString(s.mapping, output[produced..output.length], &ate); 348 verify(ate == s.mapping.length); 349 verify(res.ptr == output[produced..output.length].ptr); 350 produced += res.length; 351 continue; 352 } 353 // Make sure no relocation is made in the toString Method 354 if(produced + 4 >= output.length) 355 output.length = output.length + output.length / 2 + 4; 356 buf[0] = ch; 357 auto res = toString(buf, output[produced..output.length], &ate); 358 verify(ate == 1); 359 verify(res.ptr == output[produced..output.length].ptr); 360 produced += res.length; 361 } 362 return output[0..produced]; 363 } 364 365 /** 366 * Converts an Utf16 String to Folding case 367 * Folding case is used for case insensitive comparsions. 368 * 369 * Params: 370 * input = String to be case mapped 371 * output = this output buffer will be used unless too small 372 * Returns: the case mapped string 373 */ 374 wchar[] toFold(const(wchar)[] input, wchar[] output = null) { 375 376 dchar[1] buf; 377 // assume most common case: String stays the same length 378 if (output.length < input.length) 379 output.length = input.length; 380 381 auto produced = 0; 382 size_t ate; 383 foreach(dchar ch; input) { 384 FoldingCaseData *s = getFoldingCaseData(ch); 385 if(s !is null) { 386 // To speed up, use worst case for memory prealocation 387 // Make sure no relocation is made in the toString16 Method 388 // better allocation algorithm ? 389 if(produced + s.mapping.length * 2 >= output.length) 390 output.length = output.length + output.length / 2 + s.mapping.length * 3; 391 auto res = toString16(s.mapping, output[produced..output.length], &ate); 392 verify(ate == s.mapping.length); 393 verify(res.ptr == output[produced..output.length].ptr); 394 produced += res.length; 395 continue; 396 } 397 // Make sure no relocation is made in the toString16 Method 398 if(produced + 4 >= output.length) 399 output.length = output.length + output.length / 2 + 3; 400 buf[0] = ch; 401 auto res = toString16(buf, output[produced..output.length], &ate); 402 verify(ate == 1); 403 verify(res.ptr == output[produced..output.length].ptr); 404 produced += res.length; 405 } 406 return output[0..produced]; 407 } 408 409 /** 410 * Converts an Utf32 String to Folding case 411 * Folding case is used for case insensitive comparsions. 412 * 413 * Params: 414 * input = String to be case mapped 415 * output = this output buffer will be used unless too small 416 * Returns: the case mapped string 417 */ 418 dchar[] toFold(const(dchar)[] input, dchar[] output = null) { 419 420 // assume most common case: String stays the same length 421 if (input.length > output.length) 422 output.length = input.length; 423 424 uint produced = 0; 425 if (input.length) 426 foreach(dchar orig; input) { 427 FoldingCaseData *d = getFoldingCaseData(orig); 428 if(d !is null ) { 429 // Better resize strategy ??? 430 if(produced + d.mapping.length > output.length) 431 output.length = output.length + output.length / 2 + d.mapping.length; 432 foreach(ch; d.mapping) { 433 output[produced++] = ch; 434 } 435 continue; 436 } 437 if(produced >= output.length) 438 output.length = output.length + output.length / 2; 439 output[produced++] = orig; 440 } 441 return output[0..produced]; 442 } 443 444 445 /** 446 * Determines if a character is a digit. It returns true for decimal 447 * digits only. 448 * 449 * Params: 450 * ch = the character to be inspected 451 */ 452 bool isDigit(dchar ch) { 453 UnicodeData *d = getUnicodeData(ch); 454 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Nd); 455 } 456 457 458 /** 459 * Determines if a character is a letter. 460 * 461 * Params: 462 * ch = the character to be inspected 463 */ 464 bool isLetter(int ch) { 465 UnicodeData *d = getUnicodeData(ch); 466 return (d !is null) && (d.generalCategory & 467 ( UnicodeData.GeneralCategory.Lu 468 | UnicodeData.GeneralCategory.Ll 469 | UnicodeData.GeneralCategory.Lt 470 | UnicodeData.GeneralCategory.Lm 471 | UnicodeData.GeneralCategory.Lo)); 472 } 473 474 /** 475 * Determines if a character is a letter or a 476 * decimal digit. 477 * 478 * Params: 479 * ch = the character to be inspected 480 */ 481 bool isLetterOrDigit(int ch) { 482 UnicodeData *d = getUnicodeData(ch); 483 return (d !is null) && (d.generalCategory & 484 ( UnicodeData.GeneralCategory.Lu 485 | UnicodeData.GeneralCategory.Ll 486 | UnicodeData.GeneralCategory.Lt 487 | UnicodeData.GeneralCategory.Lm 488 | UnicodeData.GeneralCategory.Lo 489 | UnicodeData.GeneralCategory.Nd)); 490 } 491 492 /** 493 * Determines if a character is a lower case letter. 494 * Params: 495 * ch = the character to be inspected 496 */ 497 bool isLower(dchar ch) { 498 UnicodeData *d = getUnicodeData(ch); 499 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Ll); 500 } 501 502 /** 503 * Determines if a character is a title case letter. 504 * In case of combined letters, only the first is upper and the second is lower. 505 * Some of these special characters can be found in the croatian and greek language. 506 * See_Also: http://en.wikipedia.org/wiki/Capitalization 507 * Params: 508 * ch = the character to be inspected 509 */ 510 bool isTitle(dchar ch) { 511 UnicodeData *d = getUnicodeData(ch); 512 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lt); 513 } 514 515 /** 516 * Determines if a character is a upper case letter. 517 * Params: 518 * ch = the character to be inspected 519 */ 520 bool isUpper(dchar ch) { 521 UnicodeData *d = getUnicodeData(ch); 522 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lu); 523 } 524 525 /** 526 * Determines if a character is a Whitespace character. 527 * Whitespace characters are characters in the 528 * General Catetories Zs, Zl, Zp without the No Break 529 * spaces plus the control characters out of the ASCII 530 * range, that are used as spaces: 531 * TAB VT LF FF CR FS GS RS US NL 532 * 533 * WARNING: look at isSpace, maybe that function does 534 * more what you expect. 535 * 536 * Params: 537 * ch = the character to be inspected 538 */ 539 bool isWhitespace(dchar ch) { 540 if((ch >= 0x0009 && ch <= 0x000D) || (ch >= 0x001C && ch <= 0x001F)) 541 return true; 542 UnicodeData *d = getUnicodeData(ch); 543 return (d !is null) && (d.generalCategory & 544 ( UnicodeData.GeneralCategory.Zs 545 | UnicodeData.GeneralCategory.Zl 546 | UnicodeData.GeneralCategory.Zp)) 547 && ch != 0x00A0 // NBSP 548 && ch != 0x202F // NARROW NBSP 549 && ch != 0xFEFF; // ZERO WIDTH NBSP 550 } 551 552 /** 553 * Detemines if a character is a Space character as 554 * specified in the Unicode Standard. 555 * 556 * WARNING: look at isWhitespace, maybe that function does 557 * more what you expect. 558 * 559 * Params: 560 * ch = the character to be inspected 561 */ 562 bool isSpace(dchar ch) { 563 UnicodeData *d = getUnicodeData(ch); 564 return (d !is null) && (d.generalCategory & 565 ( UnicodeData.GeneralCategory.Zs 566 | UnicodeData.GeneralCategory.Zl 567 | UnicodeData.GeneralCategory.Zp)); 568 } 569 570 571 /** 572 * Detemines if a character is a printable character as 573 * specified in the Unicode Standard. 574 * 575 * Params: 576 * ch = the character to be inspected 577 */ 578 bool isPrintable(dchar ch) { 579 UnicodeData *d = getUnicodeData(ch); 580 return (d !is null) && !(d.generalCategory & 581 ( UnicodeData.GeneralCategory.Cn 582 | UnicodeData.GeneralCategory.Cc 583 | UnicodeData.GeneralCategory.Cf 584 | UnicodeData.GeneralCategory.Co 585 | UnicodeData.GeneralCategory.Cs)); 586 } 587 588 unittest { 589 // 1) No Buffer passed, no resize, no SpecialCase 590 591 immutable(char)[] testString1utf8 = "\u00E4\u00F6\u00FC"; 592 immutable(wchar)[] testString1utf16 = "\u00E4\u00F6\u00FC"; 593 immutable(dchar)[] testString1utf32 = "\u00E4\u00F6\u00FC"; 594 immutable(char)[] refString1utf8 = "\u00C4\u00D6\u00DC"; 595 immutable(wchar)[] refString1utf16 = "\u00C4\u00D6\u00DC"; 596 immutable(dchar)[] refString1utf32 = "\u00C4\u00D6\u00DC"; 597 char[] resultString1utf8 = toUpper(testString1utf8); 598 test(resultString1utf8 == refString1utf8); 599 wchar[] resultString1utf16 = toUpper(testString1utf16); 600 test(resultString1utf16 == refString1utf16); 601 dchar[] resultString1utf32 = toUpper(testString1utf32); 602 test(resultString1utf32 == refString1utf32); 603 604 // 2) Buffer passed, no resize, no SpecialCase 605 char[60] buffer1utf8; 606 wchar[30] buffer1utf16; 607 dchar[30] buffer1utf32; 608 resultString1utf8 = toUpper(testString1utf8,buffer1utf8); 609 test(resultString1utf8.ptr == buffer1utf8.ptr); 610 test(resultString1utf8 == refString1utf8); 611 resultString1utf16 = toUpper(testString1utf16,buffer1utf16); 612 test(resultString1utf16.ptr == buffer1utf16.ptr); 613 test(resultString1utf16 == refString1utf16); 614 resultString1utf32 = toUpper(testString1utf32,buffer1utf32); 615 test(resultString1utf32.ptr == buffer1utf32.ptr); 616 test(resultString1utf32 == refString1utf32); 617 618 // 3/ Buffer passed, resize necessary, no Special case 619 620 char[5] buffer2utf8; 621 wchar[2] buffer2utf16; 622 dchar[2] buffer2utf32; 623 resultString1utf8 = toUpper(testString1utf8,buffer2utf8); 624 test(resultString1utf8.ptr != buffer2utf8.ptr); 625 test(resultString1utf8 == refString1utf8); 626 resultString1utf16 = toUpper(testString1utf16,buffer2utf16); 627 test(resultString1utf16.ptr != buffer2utf16.ptr); 628 test(resultString1utf16 == refString1utf16); 629 resultString1utf32 = toUpper(testString1utf32,buffer2utf32); 630 test(resultString1utf32.ptr != buffer2utf32.ptr); 631 test(resultString1utf32 == refString1utf32); 632 633 // 4) Buffer passed, resize necessary, extensive SpecialCase 634 635 636 immutable(char)[] testString2utf8 = "\uFB03\uFB04\uFB05"; 637 immutable(wchar)[] testString2utf16 = "\uFB03\uFB04\uFB05"; 638 immutable(dchar)[] testString2utf32 = "\uFB03\uFB04\uFB05"; 639 immutable(char)[] refString2utf8 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054"; 640 immutable(wchar)[] refString2utf16 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054"; 641 immutable(dchar)[] refString2utf32 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054"; 642 resultString1utf8 = toUpper(testString2utf8,buffer2utf8); 643 test(resultString1utf8.ptr != buffer2utf8.ptr); 644 test(resultString1utf8 == refString2utf8); 645 resultString1utf16 = toUpper(testString2utf16,buffer2utf16); 646 test(resultString1utf16.ptr != buffer2utf16.ptr); 647 test(resultString1utf16 == refString2utf16); 648 resultString1utf32 = toUpper(testString2utf32,buffer2utf32); 649 test(resultString1utf32.ptr != buffer2utf32.ptr); 650 test(resultString1utf32 == refString2utf32); 651 652 } 653 654 655 unittest { 656 // 1) No Buffer passed, no resize, no SpecialCase 657 658 immutable(char)[] testString1utf8 = "\u00C4\u00D6\u00DC"; 659 immutable(wchar)[] testString1utf16 = "\u00C4\u00D6\u00DC"; 660 immutable(dchar)[] testString1utf32 = "\u00C4\u00D6\u00DC"; 661 immutable(char)[] refString1utf8 = "\u00E4\u00F6\u00FC"; 662 immutable(wchar)[] refString1utf16 = "\u00E4\u00F6\u00FC"; 663 immutable(dchar)[] refString1utf32 = "\u00E4\u00F6\u00FC"; 664 char[] resultString1utf8 = toLower(testString1utf8); 665 test(resultString1utf8 == refString1utf8); 666 wchar[] resultString1utf16 = toLower(testString1utf16); 667 test(resultString1utf16 == refString1utf16); 668 dchar[] resultString1utf32 = toLower(testString1utf32); 669 test(resultString1utf32 == refString1utf32); 670 671 // 2) Buffer passed, no resize, no SpecialCase 672 char[60] buffer1utf8; 673 wchar[30] buffer1utf16; 674 dchar[30] buffer1utf32; 675 resultString1utf8 = toLower(testString1utf8,buffer1utf8); 676 test(resultString1utf8.ptr == buffer1utf8.ptr); 677 test(resultString1utf8 == refString1utf8); 678 resultString1utf16 = toLower(testString1utf16,buffer1utf16); 679 test(resultString1utf16.ptr == buffer1utf16.ptr); 680 test(resultString1utf16 == refString1utf16); 681 resultString1utf32 = toLower(testString1utf32,buffer1utf32); 682 test(resultString1utf32.ptr == buffer1utf32.ptr); 683 test(resultString1utf32 == refString1utf32); 684 685 // 3/ Buffer passed, resize necessary, no Special case 686 687 char[5] buffer2utf8; 688 wchar[2] buffer2utf16; 689 dchar[2] buffer2utf32; 690 resultString1utf8 = toLower(testString1utf8,buffer2utf8); 691 test(resultString1utf8.ptr != buffer2utf8.ptr); 692 test(resultString1utf8 == refString1utf8); 693 resultString1utf16 = toLower(testString1utf16,buffer2utf16); 694 test(resultString1utf16.ptr != buffer2utf16.ptr); 695 test(resultString1utf16 == refString1utf16); 696 resultString1utf32 = toLower(testString1utf32,buffer2utf32); 697 test(resultString1utf32.ptr != buffer2utf32.ptr); 698 test(resultString1utf32 == refString1utf32); 699 700 // 4) Buffer passed, resize necessary, extensive SpecialCase 701 702 immutable(char)[] testString2utf8 = "\u0130\u0130\u0130"; 703 immutable(wchar)[] testString2utf16 = "\u0130\u0130\u0130"; 704 immutable(dchar)[] testString2utf32 = "\u0130\u0130\u0130"; 705 immutable(char)[] refString2utf8 = "\u0069\u0307\u0069\u0307\u0069\u0307"; 706 immutable(wchar)[] refString2utf16 = "\u0069\u0307\u0069\u0307\u0069\u0307"; 707 immutable(dchar)[] refString2utf32 = "\u0069\u0307\u0069\u0307\u0069\u0307"; 708 resultString1utf8 = toLower(testString2utf8,buffer2utf8); 709 test(resultString1utf8.ptr != buffer2utf8.ptr); 710 test(resultString1utf8 == refString2utf8); 711 resultString1utf16 = toLower(testString2utf16,buffer2utf16); 712 test(resultString1utf16.ptr != buffer2utf16.ptr); 713 test(resultString1utf16 == refString2utf16); 714 resultString1utf32 = toLower(testString2utf32,buffer2utf32); 715 test(resultString1utf32.ptr != buffer2utf32.ptr); 716 test(resultString1utf32 == refString2utf32); 717 } 718 719 unittest { 720 immutable(char)[] testString1utf8 = "?!Mädchen \u0390\u0390,;"; 721 immutable(char)[] testString2utf8 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;"; 722 test(toFold(testString1utf8) == toFold(testString2utf8)); 723 immutable(wchar)[] testString1utf16 = "?!Mädchen \u0390\u0390,;"; 724 immutable(wchar)[] testString2utf16 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;"; 725 test(toFold(testString1utf16) == toFold(testString2utf16)); 726 immutable(wchar)[] testString1utf32 = "?!Mädchen \u0390\u0390,;"; 727 immutable(wchar)[] testString2utf32 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;"; 728 test(toFold(testString1utf32) == toFold(testString2utf32)); 729 }