1 /******************************************************************************* 2 3 Contains utility functions for working with unicode strings. Contains a 4 function to return the length of a UTF-8 string, a method to truncate a 5 UTF-8 string to the nearest whitespace character that is less than a maximum 6 length parameter, and a method to truncate a UTF-8 string and append a set 7 ending to it. 8 9 Example usage: 10 11 --- 12 13 char[] utf = ...; // some UTF-8 character sequence 14 15 // using the default unicode error handler 16 size_t len1 = utf8Length(utf); 17 18 // using a custom error handler 19 // which takes the index of the string as a parameter 20 size_t len2 = utf8Length(utf, (size_t i){ // error handling code... }); 21 22 --- 23 24 Copyright: 25 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 26 All rights reserved. 27 28 License: 29 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 30 Alternatively, this file may be distributed under the terms of the Tango 31 3-Clause BSD License (see LICENSE_BSD.txt for details). 32 33 *******************************************************************************/ 34 35 module ocean.text.utf.UtfUtil; 36 37 import core.exception: onUnicodeError; 38 39 import ocean.transition; 40 41 import ocean.stdc.string: memrchr; 42 43 import ocean.core.Array: append, copy; 44 import ocean.core.Verify; 45 46 import ocean.math.IEEE: isNaN; 47 48 import ocean.text.Unicode : isSpace; 49 50 import ocean.text.utf.c.glib_unicode; 51 52 import ocean.core.Test; 53 54 55 /******************************************************************************* 56 57 UTF-8 representation of "…". 58 59 *******************************************************************************/ 60 61 public istring ellipsis = "\xE2\x80\xA6"; // The char '…' 62 63 64 /******************************************************************************* 65 66 This array gives the length of a UTF-8 sequence indexed by the value 67 of the leading byte. An FF (ubyte.max) represents an illegal starting value 68 of a UTF-8 sequence. 69 FF is used instead of 0 to avoid having loops hang. 70 71 *******************************************************************************/ 72 73 private static immutable ubyte[char.max + 1] utf8_stride = 74 [ 75 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 76 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 77 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 78 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 79 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 80 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 81 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 82 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 83 ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max, 84 ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max, 85 ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max, 86 ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max,ubyte.max, 87 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 88 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 89 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 90 4,4,4,4,4,4,4,4,5,5,5,5,6,6,ubyte.max,ubyte.max, 91 ]; 92 93 94 /******************************************************************************* 95 96 Calculates the number of UTF8 code points in a UTF8-encoded string. 97 Calls the standard unicode error handler on error, 98 which throws a new UnicodeException. 99 100 Params: 101 str = The string to calculate the length of. 102 103 Returns: 104 The length of the given string. 105 106 Throws: 107 UnicodeException if an invalid UTF8 code unit is detected. 108 109 *******************************************************************************/ 110 111 public size_t utf8Length ( cstring str ) 112 { 113 void error ( size_t i ) 114 { 115 onUnicodeError("invalid UTF-8 sequence", i); 116 } 117 118 return utf8Length(str, &error); 119 } 120 121 122 /******************************************************************************* 123 124 Calculates the number of UTF8 code points in a UTF8-encoded string. 125 Calls error_dg if an invalid UTF8 code unit is detected, 126 which may throw an exception to abort processing. 127 128 Params: 129 str = The string to calculate the length of. 130 error_dg = The error delegate to call upon finding an invalid code unit. 131 Takes a size_t parameter representing the index of the current 132 code point in the string. 133 134 Returns: 135 The length of the given string. 136 137 *******************************************************************************/ 138 139 public size_t utf8Length ( cstring str, scope void delegate ( size_t ) error_dg ) 140 { 141 size_t length; 142 size_t i; 143 size_t stride; 144 145 for ( i = 0; i < str.length; i += stride ) 146 { 147 // check how much we should increment the index 148 // based on the size of the current UTF8 code point 149 stride = utf8_stride[str[i]]; 150 151 if ( stride == ubyte.max ) 152 { 153 error_dg(i); 154 } 155 156 length++; 157 } 158 159 if ( i > str.length ) 160 { 161 verify(i >= stride, "i should be stride or greater"); 162 i -= stride; 163 verify(i < str.length, "i - stride should be less than str.length"); 164 error_dg(i); 165 } 166 167 return length; 168 } 169 170 unittest 171 { 172 test(utf8Length(null) == 0, 173 "the length of a null string should be 0"); 174 175 test(utf8Length("") == 0, 176 "the length of an empty string should be 0"); 177 178 test(utf8Length("foo bar baz xyzzy") == 17, 179 "the length of \"foo bar baz xyzzy\" should be 17"); 180 181 test(utf8Length("ðäß ßøø+ ì$ æ ¢ööđ µøvi€ →→→") == 28, 182 "the length of \"ðäß ßøø+ ì$ æ ¢ööđ µøvi€ →→→\" should be 28"); 183 184 // test if error delegate is called for an invalid string 185 bool error_caught = false; 186 static immutable istring error_str = "error in " ~ char.init ~ " the middle"; 187 utf8Length(error_str, ( size_t i ) { error_caught = true; }); 188 test(error_caught, 189 "the call to utf8Length should have caught an error"); 190 191 // test if error delegate is called for a valid string 192 error_caught = false; 193 static immutable istring valid_str = "There are no errors in this string!"; 194 utf8Length(valid_str, ( size_t i ) { error_caught = true; }); 195 test(!error_caught, 196 "the call to utf8Length should not have caught an error"); 197 } 198 199 200 /******************************************************************************* 201 202 Limits str to a length of n UTF-8 code points, cutting off on the last 203 space, if found. If str is not valid UTF-8, str.length is assumed to be the 204 number of code points. 205 206 Params: 207 str = string to limit the length 208 n = maximum number of code points in the resulting string 209 210 Out: 211 The maximum number of code points in str is n. 212 213 Returns: 214 The truncated string for method chaining 215 216 *******************************************************************************/ 217 218 public mstring truncateAtWordBreak ( ref mstring str, size_t n ) 219 out (result) 220 { 221 if (result.length > n) 222 { 223 assert(g_utf8_validate(result.ptr, result.length, null)); 224 assert(g_utf8_strlen(result.ptr, result.length) <= n); 225 } 226 } 227 body 228 { 229 if (n < str.length) 230 { 231 bool valid_utf8 = g_utf8_validate(str.ptr, str.length, null); 232 233 auto utf8_len = valid_utf8 ? utf8Length(str) : str.length; 234 235 if (n < utf8_len) 236 { 237 size_t last = n; 238 239 if (valid_utf8) 240 { 241 last = g_utf8_offset_to_pointer(str.ptr, last) - str.ptr; 242 } 243 244 void* result = memrchr(str.ptr, ' ', last); 245 char* c = cast(char*) result; 246 if (c) 247 { 248 // Skip consecutive ' ' characters. 249 while (*c == ' ' && c > str.ptr) 250 { 251 c--; 252 } 253 254 str.length = c - str.ptr + (c != str.ptr); 255 } 256 else 257 { 258 // If no ' ' is found to break on, set the break to the maximum 259 // number of code points 260 str.length = last; 261 } 262 } 263 } 264 265 return str; 266 } 267 268 unittest 269 { 270 void doTest ( cstring input, cstring expected_output, int length, int line = __LINE__ ) 271 { 272 mstring buffer; 273 buffer.copy(input); 274 test!("==")(truncateAtWordBreak(buffer, length), expected_output, __FILE__, line); 275 } 276 277 doTest("Hello World!", "Hello World!", "Hello World!".length); 278 279 doTest("Hello World!", "Hello World!", "Hello World!".length + 5); 280 281 doTest("Hello World!", "Hello", 9); 282 283 doTest("Hällö World!", "Hällö", 9); 284 285 doTest("äöü", "äöü", 3); 286 287 doTest("Hello World!", "Hello", 9); 288 } 289 290 291 /******************************************************************************* 292 293 Truncate the length of a UTF-8 string and append a set ending. The string 294 is initially truncated so that it is of maximum length n (this includes 295 the extra ending paramter so the string is truncated to position 296 n - ending.length). 297 298 Params: 299 str = string to truncate and append the ending to 300 n = maximum number of code points in the resulting string 301 ending = the ending to append to the string, defaults to "..." 302 303 In: 304 n must be at least `ending.length` 305 306 Returns: 307 The truncated and appended string for method chaining 308 309 *******************************************************************************/ 310 311 public mstring truncateAppendEnding ( ref mstring str, size_t n, cstring ending = "...") 312 { 313 verify (n >= ending.length); 314 315 bool valid_utf8 = g_utf8_validate(str.ptr, str.length, null); 316 317 auto utf8_len = valid_utf8 ? utf8Length(str) : str.length; 318 319 if (n < utf8_len) 320 { 321 truncateAtWordBreak(str, (n - ending.length)); 322 str.append(ending); 323 } 324 325 return str; 326 } 327 328 unittest 329 { 330 mstring buffer; 331 332 void doTest ( cstring input, cstring expected_output, int length, 333 cstring ending = "..." , int line = __LINE__ ) 334 { 335 buffer.copy(input); 336 test!("==")(truncateAppendEnding(buffer, length, ending), 337 expected_output, __FILE__, line); 338 } 339 340 doTest("Hello World!", "Hello World!", "Hello World!".length); 341 342 doTest("Hello World!", "Hello World!", "Hello World!".length + 5); 343 344 doTest("Hello World!", "Hello...", 9); 345 346 doTest("Hällö World!", "Hällö...", 9); 347 348 doTest("äöü äöü", "ä...", 4); 349 350 doTest("Hello World!", "Hello...", 9); 351 352 doTest("HelloW" ~ cast (char) 0x81 ~ "rld!", 353 "HelloW" ~ cast (char) 0x81 ~ "...", 10); 354 355 doTest("HelloWörld!", "HelloWörl+", 10, "+"); 356 357 doTest("Designstarker Couchtisch in hochwertiger Holznachbildung. Mit " 358 ~ "praktischem Ablagebogen in Kernnussbaumfarben oder Schwarz. " 359 ~ "Winkelfüße mit Alukante. B", "Designstarker Couchtisch in hochwertiger" 360 ~ " Holznachbildung. Mit praktischem Ablagebogen...", 90); 361 } 362 363 364 /******************************************************************************* 365 366 Limits the length of a UTF-8 string, to at most the specified number of 367 bytes. 368 369 This is conceptually equal to str[0..max_len], except that we take care to 370 avoid chopping a multi-byte UTF-8 character in half. 371 372 Params: 373 str = the string to be sliced 374 max_len = the maximum allowable length (in bytes) of the string 375 376 Returns: 377 a slice of the original string, of length at most max_len. 378 379 *******************************************************************************/ 380 381 public Inout!(mstring) limitStringLength ( Inout!(mstring) str, size_t max_len ) 382 { 383 if ( str.length <= max_len ) 384 { 385 return str; 386 } 387 388 // Make sure we don't chop a character in half. 389 // All UTF-8 continuation bytes are of the form 0b10xxxxxxx, 390 // so we must skip all such bytes 391 392 auto k = max_len; 393 394 while ( k != 0 && ( (str[k] & 0xC0 ) == 0x80) ) 395 { 396 --k; 397 } 398 399 return str[ 0 .. k ]; 400 401 } 402 403 404 unittest 405 { 406 // String ending with a 1-byte character 407 408 test!("==")(limitStringLength("abc", 5), "abc"); 409 test!("==")(limitStringLength("abc", 2), "ab"); 410 411 // String ending with a 2-byte character 412 413 test!("==")(limitStringLength("ÜÄ", 5), "ÜÄ"); 414 test!("==")(limitStringLength("ÜÄ", 4), "ÜÄ"); 415 test!("==")(limitStringLength("ÜÄ", 3), "Ü"); 416 test!("==")(limitStringLength("ÜÄ", 2), "Ü"); 417 test!("==")(limitStringLength("ÜÄ", 1), ""); 418 419 // String ending with a 3-byte character 420 421 test!("==")(limitStringLength("Ü眼", 6), "Ü眼"); 422 test!("==")(limitStringLength("Ü眼", 5), "Ü眼"); 423 test!("==")(limitStringLength("Ü眼", 4), "Ü"); 424 425 // Ensure it compiles with an mstring 426 427 mstring x = "abcd".dup; 428 mstring y = limitStringLength(x, 2); 429 } 430 431 432 /******************************************************************************* 433 434 Truncates a string at the last space before the n-th Unicode character or, 435 if the resulting string is too short, at the n-th Unicode character. 436 The string should be a valid UTF-8 (the caller should have validated it 437 before calling this function). 438 439 If a string is truncated before the end, then the final Unicode chartacter 440 is made an ending. Trailing space is removed before the ending is added. 441 The returned string will always be no more than n Unicode characters 442 (including the ending). 443 444 The basic algorithm is to walk through src keeping track of how many 445 bytes needed to be sliced at any particular time until we know when 446 we need to end. Because we don't know till the end if we need an 447 ending we need to keep track of one Unicode character behind as well as the 448 position of the Unicode character berore the last space. We have to be 449 careful we never point at spaces. 450 451 Important points when reading the algorithm: 452 453 1) Unicode character != byte 454 2) i == the number of bytes required to include the _previous_ 455 Unicode character (i.e. the number of bytes to the start of c) 456 457 Params: 458 src = the string to truncate (must be UTF-8 encoded) 459 n = the maximum number of Unicode characters allowed in the 460 returned string 461 buffer = a buffer to be used to store the result in (may be 462 resized). The buffer is required because "ending" may 463 contain Unicode characters taking more bytes than the 464 Unicode characters in src they replace, thus leading to a 465 string with fewer Unicode characters but more bytes. 466 ending = These Unicode characters will be appended when "src" needs 467 to be truncated. 468 fill_ratio = if cutting the string in the last space would make its 469 Unicode character length smaller than "n*fill_ratio", 470 then we cut it on the n-th Unicode character 471 472 Returns: 473 buffer 474 475 *******************************************************************************/ 476 477 public mstring truncateAtN(cstring src, size_t n, ref mstring buffer, 478 cstring ending = ellipsis, float fill_ratio = 0.75) 479 out (result) 480 { 481 size_t result_length = 0; 482 foreach ( dchar c; result ) 483 { 484 ++result_length; 485 } 486 487 assert(result_length <= n); 488 } 489 body 490 { 491 { 492 size_t ending_length = 0; // Ending's number of Unicode characters 493 foreach ( dchar c; ending ) 494 { 495 ++ending_length; 496 } 497 498 verify(n > ending_length); 499 500 verify(!isNaN(fill_ratio)); 501 verify(fill_ratio>=0 && fill_ratio<=1); 502 } 503 504 size_t ending_length = 0; // Ending's number of Unicode characters 505 foreach ( size_t i, dchar c; ending ) 506 { 507 ++ending_length; 508 } 509 510 size_t net_length = n - ending_length; // The maximum number of Unicode 511 // characters that can be kept, if 512 // ending is used. 513 514 size_t code_point_count; // Which Unicode character are we up to. 515 size_t bytes_needed = 0; // Number of bytes needed to include the last 516 // valid looking Unicode character. 517 size_t last_space_bytes_net = 0; // Number of bytes needed to include the 518 // last valid Unicode character which is 519 // before the last known space, if ending 520 // is used. 521 size_t last_space_code_points_net = 0; // The number of Unicode characters 522 // that precede the last space, if ending 523 // is used. 524 size_t last_space_bytes_n = 0; // Number of bytes needed to include the 525 // last valid Unicode character which is 526 // before the last known space, if ending 527 // is not used. 528 size_t last_space_code_points_n = 0; // The number of Unicode characters 529 // that precede the last space, if ending 530 // is not used. 531 bool need_ending; // Do we know we need an ending already? 532 bool last_was_space; // Was the previous character a space? 533 534 foreach ( size_t i, dchar c; src ) 535 { 536 bool curr_is_space = isSpace(c); 537 538 // Keep Unicode characters that will be returned if the ending is used. 539 if ( code_point_count <= net_length ) 540 { 541 // We still need more Unicode characters so we update the counters. 542 // In the edge case (code_point_count == net_length), the 543 // current Unicode character is not needed. However, we need its "i" 544 // in order to find the bytes of the string which includes the 545 // previous Unicode character. 546 if ( ! last_was_space ) 547 { 548 bytes_needed = i; 549 550 if ( curr_is_space ) 551 { 552 // If the current Unicode character is a space, the previous 553 // is not a space and we are not at the end, keep its 554 // position. 555 last_space_bytes_net = i; 556 last_space_code_points_net = code_point_count; 557 } 558 } 559 } 560 561 // Keep Unicode characters that will be returned if the ending is not 562 // used. 563 if ( code_point_count <= n 564 && ! last_was_space 565 && curr_is_space ) 566 { 567 // Use "n" instead of "net_length". 568 last_space_bytes_n = i; 569 last_space_code_points_n = code_point_count; 570 } 571 572 last_was_space = curr_is_space; 573 574 // This Unicode character will be truncated, but we need to check if it 575 // is a space character. If the Unicode characters that we ommit are 576 // spaces, we will not append the ending, we will just remove the spaces. 577 if ( code_point_count >= n ) 578 { 579 if ( ! curr_is_space ) 580 { 581 // This is a non-space Unicode character so we are truncating. 582 need_ending = true; 583 break; 584 } 585 } 586 587 // Track which Unicode character we are up to (as opposed to byte) 588 ++code_point_count; 589 } 590 591 // We may have fallen off the end of src before we had time to set up all 592 // our variables. If need_ending is true though we know that isn't the case. 593 if ( need_ending ) 594 { 595 // Check if there is a long enough string before the last space. 596 if ( last_space_bytes_net 597 && (last_space_code_points_net / (cast(float)n) > fill_ratio) ) 598 { 599 bytes_needed = last_space_bytes_net; 600 } 601 // Copy up to the prev positon, which may be the 2nd last Unicode 602 // character or the Unicode character before the last space. 603 enableStomping(buffer); 604 buffer.length = bytes_needed + ending.length; 605 enableStomping(buffer); 606 buffer[0 .. bytes_needed] = src[0 .. bytes_needed]; 607 // And append an ending 608 buffer[bytes_needed .. bytes_needed + ending.length] = ending[]; 609 } 610 else 611 { 612 // We need to check if we finished one or more iterations short 613 if ( code_point_count <= n ) 614 { 615 // We did so src is short and if there is no trailing space 616 // we can just use it as is. If there was trailing space then 617 // "last_space_bytes" will have already been set correctly on the 618 // iteration caused by the space 619 if ( ! last_was_space ) 620 { 621 last_space_bytes_n = src.length; 622 } 623 } 624 // No need to append the ending so use the full string we found 625 enableStomping(buffer); 626 buffer.length = last_space_bytes_n; 627 enableStomping(buffer); 628 buffer[] = src[0 .. last_space_bytes_n]; 629 } 630 return(buffer); 631 } 632 633 unittest 634 { 635 auto t = new NamedTest( 636 "truncateAtN" 637 ); 638 639 mstring buffer; 640 641 // Old test 642 foreach (i, char c; "…") 643 { 644 t.test!("==")(ellipsis[i], c); 645 } 646 647 istring str = "Hello World!"; 648 t.test!("==")(str.truncateAtN(str.length, buffer), "Hello World!"); 649 t.test!("==")(str.truncateAtN(str.length + 5, buffer), "Hello World!"); 650 t.test!("==")(str.truncateAtN(10, buffer), "Hello Wor" ~ ellipsis); 651 652 t.test!("==")("Hällö World!"c.truncateAtN(10, buffer), 653 "Hällö Wor"c ~ ellipsis); 654 t.test!("==")("äöü"c.truncateAtN(3, buffer), "äöü"c); 655 t.test!("==")("Hello World!".dup.truncateAtN(10, buffer), 656 "Hello Wo" ~ ellipsis); 657 t.test!("==")("HelloWörld!"c.truncateAtN(10, buffer, "+"), "HelloWörl+"c); 658 t.test!("==")( 659 "Designstarker Couchtisch in hochwertiger Holznachbildung. Mit praktischem Ablagebogen in Kernnussbaumfarben oder Schwarz. Winkelfüße mit Alukante. B"c.truncateAtN(100, buffer), 660 "Designstarker Couchtisch in hochwertiger Holznachbildung. Mit praktischem Ablagebogen in"c ~ ellipsis 661 ); 662 663 // Andrew's tests 664 665 t.test!("==")(("This should be the longest string of all the unit tests.\n" 666 ~ "We do this so that the buffer never needs expanding again.\n" 667 ~ "This way we can check for unnecessary allocations.") 668 .truncateAtN(160, buffer), 669 "This should be the longest string of all the unit tests.\n" 670 ~ "We do this so that the buffer never needs expanding again.\n" 671 ~ "This way we can check for unnecessary…" 672 ); 673 674 typeof(buffer.ptr) orig_ptr = buffer.ptr; 675 676 t.test!("==")(" ".truncateAtN(2, buffer), ""); 677 t.test!("==")("12 ".truncateAtN(4, buffer), "12"); 678 t.test!("==")("12 ".truncateAtN(6, buffer), "12"); 679 t.test!("==")("hello".truncateAtN(2, buffer), "h…"); 680 t.test!("==")("hello".truncateAtN(4, buffer), "hel…"); 681 t.test!("==")("hello".truncateAtN(5, buffer), "hello"); 682 t.test!("==")("hello".truncateAtN(6, buffer), "hello"); 683 t.test!("==")("hello".truncateAtN(10, buffer), "hello"); 684 t.test!("==")("h l o".truncateAtN(5, buffer), "h l o"); 685 t.test!("==")("hello ".truncateAtN(5, buffer), "hello"); 686 t.test!("==")("hello ".truncateAtN(6, buffer), "hello"); 687 t.test!("==")("hello ".truncateAtN(7, buffer), "hello"); 688 t.test!("==")("hello ".truncateAtN(10, buffer), "hello"); 689 t.test!("==")("hello world".truncateAtN(8, buffer), "hello…"); 690 t.test!("==")("hello | world".truncateAtN(7, buffer), "hello…"); 691 t.test!("==")("hello | world".truncateAtN(8, buffer), "hello |…"); 692 t.test!("==")("hello | world".truncateAtN(32, buffer), "hello | world"); 693 t.test!("==")("h llo world".truncateAtN(3, buffer), "h…"); 694 t.test!("==")("he ll o world".truncateAtN(9, buffer), "he ll…"); 695 t.test!("==")("he ll o world".truncateAtN(10, buffer), "he ll o…"); 696 t.test!("==")("he ll o world".truncateAtN(32, buffer), 697 "he ll o world"); 698 699 t.test!("==")("a".truncateAtN(4, buffer), "a"); 700 t.test!("==")("ab".truncateAtN(4, buffer), "ab"); 701 t.test!("==")("a|".truncateAtN(4, buffer), "a|"); 702 t.test!("==")("ab|".truncateAtN(4, buffer), "ab|"); 703 t.test!("==")("ab|d".truncateAtN(4, buffer), "ab|d"); 704 t.test!("==")("abc|".truncateAtN(4, buffer), "abc|"); 705 t.test!("==")("abcd| ".truncateAtN(4, buffer), "abc…"); 706 t.test!("==")("a| d".truncateAtN(4, buffer), "a| d"); 707 708 t.test!("==")("По оживлённым берегам"c.truncateAtN(2, buffer), "П…"c); 709 t.test!("==")("По оживлённым берегам"c.truncateAtN(3, buffer), "По…"c); 710 t.test!("==")("По оживлённым берегам"c.truncateAtN(4, buffer), "По…"c); 711 t.test!("==")("По оживлённым берегам"c.truncateAtN(5, buffer), "По о…"c); 712 t.test!("==")("Ἰοὺ ἰού· τὰ πάντʼ ἂν ἐξήκοι σαφῆ."c.truncateAtN(2, buffer), 713 "Ἰ…"c); 714 t.test!("==")("Ἰοὺ ἰού· τὰ πάντʼ ἂν ἐξήκοι σαφῆ."c.truncateAtN(3, buffer), 715 "Ἰο…"c); 716 t.test!("==")("Ἰοὺ ἰού· τὰ πάντʼ ἂν ἐξήκοι σαφῆ."c.truncateAtN(4, buffer), 717 "Ἰοὺ…"c); 718 t.test!("==")("Ἰοὺ ἰού· τὰ πάντʼ ἂν ἐξήκοι σαφῆ."c.truncateAtN(5, buffer), 719 "Ἰοὺ…"c); 720 t.test!("==")("Ἰοὺ ἰού· τὰ πάντʼ ἂν ἐξήκοι σαφῆ."c.truncateAtN(6, buffer), 721 "Ἰοὺ ἰ…"c); 722 t.test!("==")("Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία"c.truncateAtN(256, buffer), 723 "Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία"c); 724 t.test!("==")("पशुपतिरपि तान्यहानि कृच्छ्राद्"c.truncateAtN(6,buffer), "पशुपत…"c); // NB शु is 2 chars 725 t.test!("==")("पशुपतिरपि तान्यहानि कृच्छ्राद्"c.truncateAtN(8, buffer), "पशुपतिर…"c); 726 t.test!("==")("子曰:「學而時習之,不亦說乎?有朋自遠方來,不亦樂乎?"c.truncateAtN(5, buffer), "子曰:「…"c); 727 728 // we don't yet support R-To-L languages so don't test Arabic 729 //test(truncate_at_n("بِسْمِ ٱللّٰهِ ٱلرَّحْمـَبنِ ٱلرَّحِيمِ", 5c, buffer) = "…رَّحِيمِ"c); 730 731 // Use some other ending that is not one character. 732 t.test!("==")("a| d".truncateAtN(4, buffer, "..."), "a| d"); 733 t.test!("==")("a| d1".truncateAtN(4, buffer, "..."), "a..."); 734 t.test!("==")("1234567890".truncateAtN(7, buffer, "..."), "1234..."); 735 t.test!("==")("1234567890".truncateAtN(70, buffer, "..."), "1234567890"); 736 t.test!("==")("1234 6789 1234 6789 1234 6789".truncateAtN(25, buffer, "..."), 737 "1234 6789 1234 6789..."); 738 739 // check nothing has allocated 740 t.test!("==")(orig_ptr, buffer.ptr); 741 }