1 /******************************************************************************* 2 3 Collection of utilities to create new data based on arrays. 4 5 All functions in this module must never mutate their arguments. Some of 6 them allocate new GC memory to store return data, some request explicit 7 buffer argument for result. 8 9 Based on `tango.core.Array` module from Tango library. 10 11 Copyright: 12 Copyright (C) 2005-2006 Sean Kelly. 13 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 14 All rights reserved. 15 16 License: 17 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 18 See LICENSE_TANGO.txt for details. 19 20 *******************************************************************************/ 21 22 module ocean.core.array.Transformation; 23 24 25 import ocean.meta.traits.Basic; 26 import ocean.meta.types.Function; 27 28 import ocean.core.array.DefaultPredicates; 29 import ocean.core.Buffer; 30 31 import ocean.core.array.Search; 32 33 version (unittest) 34 { 35 import ocean.core.Test; 36 import ocean.meta.types.Qualifiers; 37 } 38 39 /******************************************************************************* 40 41 Computes the union of setA and setB as a set operation and returns the 42 retult in a new sorted array. Both setA and setB are required to be 43 sorted. If either setA or setB contain duplicates, the result will 44 contain the larger number of duplicates from setA and setB. When an 45 overlap occurs, entries will be copied from setA. Comparisons will be 46 performed using the supplied predicate or '<' if none is supplied. 47 48 Params: 49 setA = The first sorted array to evaluate. 50 setB = The second sorted array to evaluate. 51 pred = The evaluation predicate, which should return true if e1 is 52 less than e2 and false if not. This predicate may be any 53 callable type. 54 55 Returns: 56 A new array containing the union of setA and setB. 57 58 *******************************************************************************/ 59 60 T[] unionOf ( T, Pred = DefaultPredicates.IsLess!(T) ) 61 ( in T[] setA, in T[] setB, Pred pred = Pred.init ) 62 { 63 static assert( isCallableType!(Pred ) ); 64 65 size_t posA = 0, 66 posB = 0; 67 T[] setU; 68 69 while( posA < setA.length && posB < setB.length ) 70 { 71 if( pred( setA[posA], setB[posB] ) ) 72 setU ~= setA[posA++]; 73 else if( pred( setB[posB], setA[posA] ) ) 74 setU ~= setB[posB++]; 75 else 76 setU ~= setA[posA++], posB++; 77 } 78 setU ~= setA[posA .. $]; 79 setU ~= setB[posB .. $]; 80 return setU; 81 } 82 83 /// 84 unittest 85 { 86 test!("==")(unionOf( "", "" ), ""); 87 test!("==")(unionOf( "abc", "def" ), "abcdef"); 88 test!("==")(unionOf( "abbbcd", "aadeefg" ), "aabbbcdeefg"); 89 } 90 91 /******************************************************************************* 92 93 Computes the intersection of setA and setB as a set operation and 94 returns the retult in a new sorted array. Both setA and setB are 95 required to be sorted. If either setA or setB contain duplicates, the 96 result will contain the smaller number of duplicates from setA and setB. 97 All entries will be copied from setA. Comparisons will be performed 98 using the supplied predicate or '<' if none is supplied. 99 100 Params: 101 setA = The first sorted array to evaluate. 102 setB = The second sorted array to evaluate. 103 pred = The evaluation predicate, which should return true if e1 is 104 less than e2 and false if not. This predicate may be any 105 callable type. 106 107 Returns: 108 A new array containing the intersection of setA and setB. 109 110 *******************************************************************************/ 111 112 T[] intersectionOf ( T, Pred = DefaultPredicates.IsLess!(T) ) 113 ( in T[] setA, in T[] setB, Pred pred = Pred.init ) 114 { 115 static assert( isCallableType!(Pred ) ); 116 117 size_t posA = 0, 118 posB = 0; 119 T[] setI; 120 121 while( posA < setA.length && posB < setB.length ) 122 { 123 if( pred( setA[posA], setB[posB] ) ) 124 ++posA; 125 else if( pred( setB[posB], setA[posA] ) ) 126 ++posB; 127 else 128 setI ~= setA[posA++], posB++; 129 } 130 return setI; 131 } 132 133 /// 134 unittest 135 { 136 test!("==")(intersectionOf( ""[], ""[] ), ""); 137 test!("==")(intersectionOf( "abc"[], "def"[] ), ""); 138 test!("==")(intersectionOf( "abbbcd"[], "aabdddeefg"[] ), "abd"); 139 } 140 141 /******************************************************************************* 142 143 Returns a new array containing all elements in setA which are not 144 present in setB. Both setA and setB are required to be sorted. 145 Comparisons will be performed using the supplied predicate or '<' 146 if none is supplied. 147 148 Params: 149 setA = The first sorted array to evaluate. 150 setB = The second sorted array to evaluate. 151 pred = The evaluation predicate, which should return true if e1 is 152 less than e2 and false if not. This predicate may be any 153 callable type. 154 155 Returns: 156 A new array containing the elements in setA that are not in setB. 157 158 *******************************************************************************/ 159 160 T[] missingFrom ( T, Pred = DefaultPredicates.IsLess!(T) ) 161 ( in T[] setA, in T[] setB, Pred pred = Pred.init ) 162 { 163 static assert( isCallableType!(Pred ) ); 164 165 size_t posA = 0, 166 posB = 0; 167 T[] setM; 168 169 while( posA < setA.length && posB < setB.length ) 170 { 171 if( pred( setA[posA], setB[posB] ) ) 172 setM ~= setA[posA++]; 173 else if( pred( setB[posB], setA[posA] ) ) 174 ++posB; 175 else 176 ++posA, ++posB; 177 } 178 setM ~= setA[posA .. $]; 179 return setM; 180 } 181 182 /// 183 unittest 184 { 185 test!("==")(missingFrom("abbbcd", "abd"), "bbc" ); 186 } 187 188 unittest 189 { 190 test!("==")(missingFrom( ""[], ""[] ), "" ); 191 test!("==")(missingFrom( ""[], "abc"[] ), "" ); 192 test!("==")(missingFrom( "abc"[], ""[] ), "abc" ); 193 test!("==")(missingFrom( "abc"[], "abc"[] ), "" ); 194 test!("==")(missingFrom( "abc"[], "def"[] ), "abc" ); 195 test!("==")(missingFrom( "abced"[], "dedf"[] ), "abc" ); 196 test!("==")(missingFrom( "abbbcd"[], "abd"[] ), "bbc" ); 197 test!("==")(missingFrom( "abcdef"[], "bc"[] ), "adef" ); 198 } 199 200 /******************************************************************************* 201 202 Returns a new array containing all elements in setA which are not 203 present in setB and the elements in setB which are not present in 204 setA. Both setA and setB are required to be sorted. Comparisons 205 will be performed using the supplied predicate or '<' if none is 206 supplied. 207 208 Params: 209 setA = The first sorted array to evaluate. 210 setB = The second sorted array to evaluate. 211 pred = The evaluation predicate, which should return true if e1 is 212 less than e2 and false if not. This predicate may be any 213 callable type. 214 215 Returns: 216 A new array containing the elements in setA that are not in setB 217 and the elements in setB that are not in setA. 218 219 *******************************************************************************/ 220 221 T[] differenceOf ( T, Pred = DefaultPredicates.IsLess!(T) ) 222 ( in T[] setA, in T[] setB, Pred pred = Pred.init ) 223 { 224 static assert( isCallableType!(Pred ) ); 225 226 size_t posA = 0, 227 posB = 0; 228 T[] setD; 229 230 while( posA < setA.length && posB < setB.length ) 231 { 232 if( pred( setA[posA], setB[posB] ) ) 233 setD ~= setA[posA++]; 234 else if( pred( setB[posB], setA[posA] ) ) 235 setD ~= setB[posB++]; 236 else 237 ++posA, ++posB; 238 } 239 setD ~= setA[posA .. $]; 240 setD ~= setB[posB .. $]; 241 return setD; 242 } 243 244 unittest 245 { 246 test!("==")( differenceOf( ""[], ""[] ), "" ); 247 test!("==")( differenceOf( ""[], "abc"[] ), "abc" ); 248 test!("==")( differenceOf( "abc"[], ""[] ), "abc" ); 249 test!("==")( differenceOf( "abc"[], "abc"[] ), "" ); 250 test!("==")( differenceOf( "abc"[], "def"[] ), "abcdef" ); 251 test!("==")( differenceOf( "abbbcd"[], "abd"[] ), "bbc" ); 252 test!("==")( differenceOf( "abd"[], "abbbcd"[] ), "bbc" ); 253 } 254 255 /******************************************************************************* 256 257 Apply a function to each element an array. The function's 258 return values are stored in another array. 259 260 Params: 261 array = the array. 262 func = the function to apply. 263 buf = a buffer in which to store the results. This will be 264 resized if it does not have sufficient space. 265 266 Returns: 267 an array (the same as the buffer passed in, if possible) where the 268 ith element is the result of applying func to the ith element of the 269 input array 270 271 *******************************************************************************/ 272 273 U[] map ( T, U, MapFunc ) 274 ( in T[] array, MapFunc func, ref Buffer!(U) buf ) 275 { 276 static assert (is(U == ReturnTypeOf!(MapFunc))); 277 278 if (buf.length < array.length) 279 buf.length = array.length; 280 281 foreach (i, a; array) 282 buf[i] = func(a); 283 284 return buf[]; 285 } 286 287 /// 288 unittest 289 { 290 Buffer!(long) buffer; 291 auto mapping = map( 292 [1, 17, 8, 12][], 293 (int i) { return i * 2L; }, 294 buffer 295 ); 296 test!("==")(mapping[], [2L, 34L, 16L, 24L]); 297 } 298 299 U[] map ( T, MapFunc, U = ReturnTypeOf!(MapFunc) ) 300 ( in T[] array, MapFunc func, U[] pseudo_buff = null ) 301 { 302 auto tmp_buffer = Buffer!(U)(pseudo_buff); 303 return map(array, func, tmp_buffer); 304 } 305 306 unittest 307 { 308 auto arr = map([1, 17, 8, 12][], (int i) { return i * 2L; }); 309 test(arr == [2L, 34L, 16L, 24L]); 310 } 311 312 /******************************************************************************* 313 314 Reduce an array of elements to a single element, using a user-supplied 315 reductor function. 316 317 If the array is empty, return the default value for the element type. 318 319 If the array contains only one element, return that element. 320 321 Otherwise, the reductor function will be called on every member of the 322 array and on every resulting element until there is only one element, 323 which is then returned. 324 325 Params: 326 array = the array to reduce 327 func = the reductor function 328 329 Returns: the single element reduction 330 331 *******************************************************************************/ 332 333 ReturnTypeOf!(ReduceFunc) reduce (T, ReduceFunc) 334 ( in T[] array, ReduceFunc func ) 335 { 336 static assert(isCallableType!(ReduceFunc)); 337 338 if (array.length == 0) 339 return ReturnTypeOf!(ReduceFunc).init; 340 T e = array[0]; 341 342 foreach (i, a; array) 343 { 344 if (i == 0) continue; 345 e = func(e, a); 346 } 347 348 return e; 349 } 350 351 /// 352 unittest 353 { 354 auto result = reduce([1, 17, 8, 12][], (int i, int j){ return i * j; }); 355 test!("==")(result, 1632); 356 357 result = reduce("", (char c1, char c2) { return 'X'; }); 358 test!("==")(result, char.init); 359 } 360 361 /******************************************************************************* 362 363 Performs a linear scan of buf from [0 .. buf.length$(RP), creating a new 364 array with just the elements that satisfy pred. The relative order of 365 elements will be preserved. 366 367 Params: 368 array = The array to scan. 369 pred = The evaluation predicate, which should return true if the 370 element satisfies the condition and false if not. This 371 predicate may be any callable type. 372 buf = an optional buffer into which elements are filtered. This 373 is the array that gets returned to you. 374 375 Returns: 376 A new array with just the elements from buf that satisfy pred. 377 378 Notes: 379 While most Array functions that take an output buffer size that buffer 380 optimally, in this case, there is no way of knowing whether the output 381 will be empty or the entire input array. If you have special knowledge 382 in this regard, preallocating the output buffer will be advantageous. 383 384 *******************************************************************************/ 385 386 T[] filter ( T, Pred ) 387 ( in T[] array, Pred pred, ref Buffer!(T) buf) 388 { 389 static assert(isCallableType!(Pred)); 390 391 // Unfortunately, we don't know our output size -- it could be empty or 392 // the length of the input array. So we won't try to do anything fancy 393 // with preallocation. 394 buf.length = 0; 395 396 foreach (i, e; array) 397 { 398 if (pred(e)) 399 { 400 buf ~= e; 401 } 402 } 403 404 return buf[]; 405 } 406 407 /// 408 unittest 409 { 410 Buffer!(char) result; 411 test!("==")(filter("aabbaab", 412 (char c) { return c == 'a'; }, result), "aaaa"); 413 } 414 415 T[] filter ( T, Pred ) 416 ( in T[] array, Pred pred, T[] pseudo_buf = null ) 417 { 418 auto buffer = Buffer!(T)(pseudo_buf); 419 return filter(array, pred, buffer)[]; 420 } 421 422 unittest 423 { 424 test!("==")(filter("aabbaab", 425 (char c) { return c == 'a'; }), "aaaa"); 426 } 427 428 unittest 429 { 430 void test( cstring array, scope bool delegate( char ) dg, size_t num ) 431 { 432 Buffer!(char) buf; 433 auto r = filter( array, dg, buf ); 434 .test( r.length == num ); 435 size_t rpos = 0; 436 foreach( pos, cur; buf ) 437 { 438 if ( dg( cur ) ) 439 { 440 .test( r[rpos] == cur ); 441 rpos++; 442 } 443 } 444 445 .test( rpos == num ); 446 } 447 448 test( "abcdefghij", ( char c ) { return c == 'x'; }, 0 ); 449 test( "xabcdefghi", ( char c ) { return c == 'x'; }, 1 ); 450 test( "abcdefghix", ( char c ) { return c == 'x'; }, 1 ); 451 test( "abxxcdefgh", ( char c ) { return c == 'x'; }, 2 ); 452 test( "xaxbcdxxex", ( char c ) { return c == 'x'; }, 5 ); 453 } 454 455 /******************************************************************************* 456 457 Removes all instances of match from source. 458 459 TODO: merge with `filter` 460 461 Template params: 462 T = type of array element 463 464 Params: 465 src = source array to search 466 match = pattern to remove from source array 467 result = buffer to write resulting array to 468 469 Returns: 470 result 471 472 *******************************************************************************/ 473 474 public T[] remove ( T ) ( in T[] source, in T[] match, ref Buffer!(T) result ) 475 { 476 T[] replacement = null; 477 return substitute(source, match, replacement, result); 478 } 479 480 /// 481 unittest 482 { 483 Buffer!(char) result; 484 remove("aaabbbaaa"[], "bbb"[], result); 485 test!("==")(result[], "aaaaaa"[]); 486 } 487 488 public T[] remove ( T ) ( in T[] source, in T[] match, ref T[] result ) 489 { 490 return remove(source, match, *cast(Buffer!(T)*) &result); 491 } 492 493 unittest 494 { 495 mstring result; 496 remove("aaabbbaaa"[], "bbb"[], result); 497 test!("==")(result, "aaaaaa"[]); 498 } 499 /******************************************************************************* 500 501 Split the provided array wherever a pattern instance is found, and return 502 the resultant segments. The pattern is excluded from each of the segments. 503 504 Note that the src content is not duplicated by this function, but is sliced 505 instead. 506 507 (Adapted from ocean.text.Util : split, which isn't memory safe.) 508 509 Template params: 510 T = type of array element 511 512 Params: 513 src = source array to split 514 pattern = pattern to split array by 515 result = receives split array segments (slices into src) 516 517 Returns: 518 result 519 520 *******************************************************************************/ 521 522 public T3[] split ( T1, T2, T3 ) ( T1[] src, T2[] pattern, 523 ref Buffer!(T3) result ) 524 { 525 result.length = 0; 526 527 while (true) 528 { 529 auto index = find(src, pattern); 530 result ~= src[0 .. index]; 531 if (index < src.length) 532 { 533 index += pattern.length; 534 src = src[index .. $]; 535 } 536 else 537 break; 538 } 539 540 return result[]; 541 } 542 543 /// 544 unittest 545 { 546 Buffer!(cstring) result; 547 split("aaa..bbb..ccc", "..", result); 548 test!("==")(result[], [ "aaa", "bbb", "ccc" ]); 549 } 550 551 unittest 552 { 553 Buffer!(cstring) result; 554 555 split(`abc"def"`, `"`, result); 556 test!("==")(result[], [ "abc", "def", "" ]); 557 558 split(`abc"def"`.dup, `"`, result); 559 test!("==")(result[], [ "abc", "def", "" ]); 560 } 561 562 public T3[][] split ( T1, T2, T3 ) ( T1[] src, T2[] pattern, ref T3[][] result ) 563 { 564 auto buffer = cast(Buffer!(T3[])*) &result; 565 return split(src, pattern, *buffer)[]; 566 } 567 568 unittest 569 { 570 istring[] result; 571 split("aaa..bbb..ccc", "..", result); 572 test!("==")(result, [ "aaa", "bbb", "ccc" ]); 573 } 574 575 unittest 576 { 577 mstring[] result; 578 split("aaa.bbb.".dup, ".", result); 579 test!("==")(result, [ "aaa", "bbb", "" ]); 580 } 581 582 /******************************************************************************* 583 584 Substitute all instances of match from source. Set replacement to null in 585 order to remove instead of replace (or use the remove() function, below). 586 587 Template params: 588 T = type of array element 589 590 Params: 591 src = source array to search 592 match = pattern to match in source array 593 replacement = pattern to replace matched sub-arrays 594 result = buffer to write resulting array to 595 596 Returns: 597 result 598 599 *******************************************************************************/ 600 601 public T[] substitute ( T ) ( in T[] source, in T[] match, 602 in T[] replacement, ref Buffer!(T) result ) 603 { 604 result.length = 0; 605 const(T)[] src = source; 606 607 do 608 { 609 auto index = find(src, match); 610 result ~= src[0 .. index]; 611 if (index < src.length) 612 { 613 result ~= replacement; 614 index += match.length; 615 } 616 src = src[index .. $]; 617 } 618 while (src.length); 619 620 return result[]; 621 } 622 623 /// 624 unittest 625 { 626 Buffer!(char) result; 627 substitute("some string", "ring", "oops", result); 628 test!("==")(result[], "some stoops"); 629 } 630 631 public T[] substitute ( T ) ( in T[] source, in T[] match, 632 in T[] replacement, ref T[] result ) 633 { 634 return substitute(source, match, replacement, 635 * cast(Buffer!(char)*) &result); 636 } 637 638 unittest 639 { 640 mstring result; 641 substitute("some string", "ring", "oops", result); 642 test!("==")(result[], "some stoops"); 643 } 644 645 /******************************************************************************* 646 647 Creates a single element dynamic array that slices val. This will not 648 allocate memory in contrast to the '[val]' expression. 649 650 Params: 651 val = value to slice 652 653 Returns: 654 single element dynamic array that slices val. 655 656 *******************************************************************************/ 657 658 public T[] toArray ( T ) ( ref T val ) 659 { 660 return (&val)[0 .. 1]; 661 } 662 663 /// 664 unittest 665 { 666 int x; 667 int[] arr = toArray(x); 668 } 669 670 /******************************************************************************* 671 672 Assigns elements to an array. 673 674 The function is a replacement for declaring an array literal, it does 675 not allocate GC memory and ensures the length of the array matches the 676 number of variadic arguments. 677 678 Params: 679 T = the type of array element 680 array = the array to assign the elements to 681 elements = the elements to add to the array 682 683 *******************************************************************************/ 684 685 public void fill (T) (T[] array, T[] elements...) 686 { 687 array[] = elements[]; 688 } 689 690 /// 691 unittest 692 { 693 int[10] array; 694 695 auto previous_ptr = array.ptr; 696 testNoAlloc(array.fill(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); 697 test!("is")(previous_ptr, array.ptr); 698 699 foreach (i, item; array) 700 test!("==")(array[i], item); 701 } 702 703 /// 704 unittest 705 { 706 int[] array; 707 708 array.length = 10; 709 assumeSafeAppend(array); 710 711 auto previous_ptr = array.ptr; 712 testNoAlloc(array.fill(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); 713 test!("is")(previous_ptr, array.ptr); 714 715 foreach (i, item; array) 716 test!("==")(array[i], item); 717 }