ocean.core.array.Transformation source code

1 /*******************************************************************************
2 
3     Collection of utilities to create new data based on arrays.
4 
5     All functions in this module must never mutate their arguments. Some of
6     them allocate new GC memory to store return data, some request explicit
7     buffer argument for result.
8 
9     Based on `tango.core.Array` module from Tango library.
10 
11     Copyright:
12         Copyright (C) 2005-2006 Sean Kelly.
13         Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
14         All rights reserved.
15 
16     License:
17         Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
18         See LICENSE_TANGO.txt for details.
19 
20 *******************************************************************************/
21 
22 module ocean.core.array.Transformation;
23 
24 
25 import ocean.meta.traits.Basic;
26 import ocean.meta.types.Function;
27 
28 import ocean.core.array.DefaultPredicates;
29 import ocean.core.Buffer;
30 
31 import ocean.core.array.Search;
32 
33 version (unittest)
34 {
35     import ocean.core.Test;
36     import ocean.meta.types.Qualifiers;
37 }
38 
39 /*******************************************************************************
40 
41     Computes the union of setA and setB as a set operation and returns the
42     retult in a new sorted array.  Both setA and setB are required to be
43     sorted.  If either setA or setB contain duplicates, the result will
44     contain the larger number of duplicates from setA and setB.  When an
45     overlap occurs, entries will be copied from setA.  Comparisons will be
46     performed using the supplied predicate or '<' if none is supplied.
47 
48     Params:
49         setA = The first sorted array to evaluate.
50         setB = The second sorted array to evaluate.
51         pred = The evaluation predicate, which should return true if e1 is
52             less than e2 and false if not.  This predicate may be any
53             callable type.
54 
55     Returns:
56         A new array containing the union of setA and setB.
57 
58 *******************************************************************************/
59 
60 T[] unionOf ( T, Pred = DefaultPredicates.IsLess!(T) )
61     ( in T[] setA, in T[] setB, Pred pred = Pred.init )
62 {
63     static assert( isCallableType!(Pred ) );
64 
65     size_t  posA = 0,
66             posB = 0;
67     T[] setU;
68 
69     while( posA < setA.length && posB < setB.length )
70     {
71         if( pred( setA[posA], setB[posB] ) )
72             setU ~= setA[posA++];
73         else if( pred( setB[posB], setA[posA] ) )
74             setU ~= setB[posB++];
75         else
76             setU ~= setA[posA++], posB++;
77     }
78     setU ~= setA[posA .. $];
79     setU ~= setB[posB .. $];
80     return setU;
81 }
82 
83 ///
84 unittest
85 {
86     test!("==")(unionOf( "", "" ), "");
87     test!("==")(unionOf( "abc", "def" ), "abcdef");
88     test!("==")(unionOf( "abbbcd", "aadeefg" ), "aabbbcdeefg");
89 }
90 
91 /*******************************************************************************
92 
93     Computes the intersection of setA and setB as a set operation and
94     returns the retult in a new sorted array.  Both setA and setB are
95     required to be sorted.  If either setA or setB contain duplicates, the
96     result will contain the smaller number of duplicates from setA and setB.
97     All entries will be copied from setA.  Comparisons will be performed
98     using the supplied predicate or '<' if none is supplied.
99 
100     Params:
101         setA = The first sorted array to evaluate.
102         setB = The second sorted array to evaluate.
103         pred = The evaluation predicate, which should return true if e1 is
104              less than e2 and false if not.  This predicate may be any
105              callable type.
106 
107     Returns:
108         A new array containing the intersection of setA and setB.
109 
110 *******************************************************************************/
111 
112 T[] intersectionOf ( T, Pred = DefaultPredicates.IsLess!(T) )
113     ( in T[] setA, in T[] setB, Pred pred = Pred.init )
114 {
115     static assert( isCallableType!(Pred ) );
116 
117     size_t  posA = 0,
118             posB = 0;
119     T[]  setI;
120 
121     while( posA < setA.length && posB < setB.length )
122     {
123         if( pred( setA[posA], setB[posB] ) )
124             ++posA;
125         else if( pred( setB[posB], setA[posA] ) )
126             ++posB;
127         else
128             setI ~= setA[posA++], posB++;
129     }
130     return setI;
131 }
132 
133 ///
134 unittest
135 {
136     test!("==")(intersectionOf( ""[], ""[] ), "");
137     test!("==")(intersectionOf( "abc"[], "def"[] ), "");
138     test!("==")(intersectionOf( "abbbcd"[], "aabdddeefg"[] ), "abd");
139 }
140 
141 /*******************************************************************************
142 
143     Returns a new array containing all elements in setA which are not
144     present in setB.  Both setA and setB are required to be sorted.
145     Comparisons will be performed using the supplied predicate or '<'
146     if none is supplied.
147 
148     Params:
149     setA = The first sorted array to evaluate.
150     setB = The second sorted array to evaluate.
151     pred = The evaluation predicate, which should return true if e1 is
152         less than e2 and false if not.  This predicate may be any
153         callable type.
154 
155     Returns:
156         A new array containing the elements in setA that are not in setB.
157 
158 *******************************************************************************/
159 
160 T[] missingFrom ( T, Pred = DefaultPredicates.IsLess!(T) )
161     ( in T[] setA, in T[] setB, Pred pred = Pred.init )
162 {
163     static assert( isCallableType!(Pred ) );
164 
165     size_t  posA = 0,
166             posB = 0;
167     T[]  setM;
168 
169     while( posA < setA.length && posB < setB.length )
170     {
171         if( pred( setA[posA], setB[posB] ) )
172             setM ~= setA[posA++];
173         else if( pred( setB[posB], setA[posA] ) )
174             ++posB;
175         else
176             ++posA, ++posB;
177     }
178     setM ~= setA[posA .. $];
179     return setM;
180 }
181 
182 ///
183 unittest
184 {
185     test!("==")(missingFrom("abbbcd", "abd"), "bbc" );
186 }
187 
188 unittest
189 {
190     test!("==")(missingFrom( ""[], ""[] ), "" );
191     test!("==")(missingFrom( ""[], "abc"[] ), "" );
192     test!("==")(missingFrom( "abc"[], ""[] ), "abc" );
193     test!("==")(missingFrom( "abc"[], "abc"[] ), "" );
194     test!("==")(missingFrom( "abc"[], "def"[] ), "abc" );
195     test!("==")(missingFrom( "abced"[], "dedf"[] ), "abc" );
196     test!("==")(missingFrom( "abbbcd"[], "abd"[] ), "bbc" );
197     test!("==")(missingFrom( "abcdef"[], "bc"[] ), "adef" );
198 }
199 
200 /*******************************************************************************
201 
202     Returns a new array containing all elements in setA which are not
203     present in setB and the elements in setB which are not present in
204     setA.  Both setA and setB are required to be sorted.  Comparisons
205     will be performed using the supplied predicate or '<' if none is
206     supplied.
207 
208     Params:
209         setA = The first sorted array to evaluate.
210         setB = The second sorted array to evaluate.
211         pred = The evaluation predicate, which should return true if e1 is
212             less than e2 and false if not.  This predicate may be any
213             callable type.
214 
215     Returns:
216         A new array containing the elements in setA that are not in setB
217         and the elements in setB that are not in setA.
218 
219 *******************************************************************************/
220 
221 T[] differenceOf ( T, Pred = DefaultPredicates.IsLess!(T) )
222     ( in T[] setA, in T[] setB, Pred pred = Pred.init )
223 {
224     static assert( isCallableType!(Pred ) );
225 
226     size_t  posA = 0,
227             posB = 0;
228     T[] setD;
229 
230     while( posA < setA.length && posB < setB.length )
231     {
232         if( pred( setA[posA], setB[posB] ) )
233             setD ~= setA[posA++];
234         else if( pred( setB[posB], setA[posA] ) )
235             setD ~= setB[posB++];
236         else
237             ++posA, ++posB;
238     }
239     setD ~= setA[posA .. $];
240     setD ~= setB[posB .. $];
241     return setD;
242 }
243 
244 unittest
245 {
246     test!("==")( differenceOf( ""[], ""[] ), "" );
247     test!("==")( differenceOf( ""[], "abc"[] ), "abc" );
248     test!("==")( differenceOf( "abc"[], ""[] ), "abc" );
249     test!("==")( differenceOf( "abc"[], "abc"[] ), "" );
250     test!("==")( differenceOf( "abc"[], "def"[] ), "abcdef" );
251     test!("==")( differenceOf( "abbbcd"[], "abd"[] ), "bbc" );
252     test!("==")( differenceOf( "abd"[], "abbbcd"[] ), "bbc" );
253 }
254 
255 /*******************************************************************************
256 
257     Apply a function to each element an array. The function's
258     return values are stored in another array.
259 
260     Params:
261         array = the array.
262         func  = the function to apply.
263         buf   = a buffer in which to store the results. This will be
264             resized if it does not have sufficient space.
265 
266     Returns:
267         an array (the same as the buffer passed in, if possible) where the
268         ith element is the result of applying func to the ith element of the
269         input array
270 
271 *******************************************************************************/
272 
273 U[] map ( T, U, MapFunc )
274     ( in T[] array, MapFunc func, ref Buffer!(U) buf )
275 {
276     static assert (is(U == ReturnTypeOf!(MapFunc)));
277 
278     if (buf.length < array.length)
279         buf.length = array.length;
280 
281     foreach (i, a; array)
282         buf[i] = func(a);
283 
284     return buf[];
285 }
286 
287 ///
288 unittest
289 {
290     Buffer!(long) buffer;
291     auto mapping = map(
292         [1, 17, 8, 12][],
293         (int i) { return i * 2L; },
294         buffer
295     );
296     test!("==")(mapping[], [2L, 34L, 16L, 24L]);
297 }
298 
299 U[] map ( T, MapFunc, U = ReturnTypeOf!(MapFunc) )
300     ( in T[] array, MapFunc func, U[] pseudo_buff = null )
301 {
302     auto tmp_buffer = Buffer!(U)(pseudo_buff);
303     return map(array, func, tmp_buffer);
304 }
305 
306 unittest
307 {
308     auto arr = map([1, 17, 8, 12][], (int i) { return i * 2L; });
309     test(arr == [2L, 34L, 16L, 24L]);
310 }
311 
312 /*******************************************************************************
313 
314     Reduce an array of elements to a single element, using a user-supplied
315     reductor function.
316 
317     If the array is empty, return the default value for the element type.
318 
319     If the array contains only one element, return that element.
320 
321     Otherwise, the reductor function will be called on every member of the
322     array and on every resulting element until there is only one element,
323     which is then returned.
324 
325     Params:
326         array = the array to reduce
327         func = the reductor function
328 
329     Returns: the single element reduction
330 
331 *******************************************************************************/
332 
333 ReturnTypeOf!(ReduceFunc) reduce (T, ReduceFunc)
334     ( in T[] array, ReduceFunc func )
335 {
336     static assert(isCallableType!(ReduceFunc));
337 
338     if (array.length == 0)
339         return ReturnTypeOf!(ReduceFunc).init;
340     T e = array[0];
341 
342     foreach (i, a; array)
343     {
344         if (i == 0) continue;
345         e = func(e, a);
346     }
347 
348     return e;
349 }
350 
351 ///
352 unittest
353 {
354     auto result = reduce([1, 17, 8, 12][], (int i, int j){ return i * j; });
355     test!("==")(result, 1632);
356 
357     result = reduce("", (char c1, char c2) { return 'X'; });
358     test!("==")(result, char.init);
359 }
360 
361 /*******************************************************************************
362 
363     Performs a linear scan of buf from [0 .. buf.length$(RP), creating a new
364     array with just the elements that satisfy pred.  The relative order of
365     elements will be preserved.
366 
367     Params:
368         array = The array to scan.
369         pred  = The evaluation predicate, which should return true if the
370           element satisfies the condition and false if not.  This
371           predicate may be any callable type.
372         buf   = an optional buffer into which elements are filtered. This
373           is the array that gets returned to you.
374 
375     Returns:
376         A new array with just the elements from buf that satisfy pred.
377 
378     Notes:
379         While most Array functions that take an output buffer size that buffer
380         optimally, in this case, there is no way of knowing whether the output
381         will be empty or the entire input array. If you have special knowledge
382         in this regard, preallocating the output buffer will be advantageous.
383 
384 *******************************************************************************/
385 
386 T[] filter ( T, Pred )
387     ( in T[] array, Pred pred, ref Buffer!(T) buf)
388 {
389     static assert(isCallableType!(Pred));
390 
391     // Unfortunately, we don't know our output size -- it could be empty or
392     // the length of the input array. So we won't try to do anything fancy
393     // with preallocation.
394     buf.length = 0;
395 
396     foreach (i, e; array)
397     {
398         if (pred(e))
399         {
400             buf ~= e;
401         }
402     }
403 
404     return buf[];
405 }
406 
407 ///
408 unittest
409 {
410     Buffer!(char) result;
411     test!("==")(filter("aabbaab",
412         (char c) { return c == 'a'; }, result), "aaaa");
413 }
414 
415 T[] filter ( T, Pred )
416     ( in T[] array, Pred pred, T[] pseudo_buf = null )
417 {
418     auto buffer = Buffer!(T)(pseudo_buf);
419     return filter(array, pred, buffer)[];
420 }
421 
422 unittest
423 {
424     test!("==")(filter("aabbaab",
425         (char c) { return c == 'a'; }), "aaaa");
426 }
427 
428 unittest
429 {
430     void test( cstring array, scope bool delegate( char ) dg, size_t num )
431     {
432         Buffer!(char) buf;
433         auto r = filter( array, dg, buf );
434         .test( r.length == num );
435         size_t rpos = 0;
436         foreach( pos, cur; buf )
437         {
438             if ( dg( cur ) )
439             {
440                 .test( r[rpos] == cur );
441                 rpos++;
442             }
443         }
444 
445         .test( rpos == num );
446     }
447 
448     test( "abcdefghij", ( char c ) { return c == 'x'; },  0 );
449     test( "xabcdefghi", ( char c ) { return c == 'x'; },  1 );
450     test( "abcdefghix", ( char c ) { return c == 'x'; },  1 );
451     test( "abxxcdefgh", ( char c ) { return c == 'x'; },  2 );
452     test( "xaxbcdxxex", ( char c ) { return c == 'x'; },  5 );
453 }
454 
455 /*******************************************************************************
456 
457     Removes all instances of match from source.
458 
459     TODO: merge with `filter`
460 
461     Template params:
462         T = type of array element
463 
464     Params:
465         src = source array to search
466         match = pattern to remove from source array
467         result = buffer to write resulting array to
468 
469     Returns:
470         result
471 
472 *******************************************************************************/
473 
474 public T[] remove ( T ) ( in T[] source, in T[] match, ref Buffer!(T) result )
475 {
476     T[] replacement = null;
477     return substitute(source, match, replacement, result);
478 }
479 
480 ///
481 unittest
482 {
483     Buffer!(char) result;
484     remove("aaabbbaaa"[], "bbb"[], result);
485     test!("==")(result[], "aaaaaa"[]);
486 }
487 
488 public T[] remove ( T ) ( in T[] source, in T[] match, ref T[] result )
489 {
490     return remove(source, match, *cast(Buffer!(T)*) &result);
491 }
492 
493 unittest
494 {
495     mstring result;
496     remove("aaabbbaaa"[], "bbb"[], result);
497     test!("==")(result, "aaaaaa"[]);
498 }
499 /*******************************************************************************
500 
501     Split the provided array wherever a pattern instance is found, and return
502     the resultant segments. The pattern is excluded from each of the segments.
503 
504     Note that the src content is not duplicated by this function, but is sliced
505     instead.
506 
507     (Adapted from ocean.text.Util : split, which isn't memory safe.)
508 
509     Template params:
510         T = type of array element
511 
512     Params:
513         src = source array to split
514         pattern = pattern to split array by
515         result = receives split array segments (slices into src)
516 
517     Returns:
518         result
519 
520 *******************************************************************************/
521 
522 public T3[] split ( T1, T2, T3 ) ( T1[] src, T2[] pattern,
523     ref Buffer!(T3) result )
524 {
525     result.length = 0;
526 
527     while (true)
528     {
529         auto index = find(src, pattern);
530         result ~= src[0 .. index];
531         if (index < src.length)
532         {
533             index += pattern.length;
534             src = src[index .. $];
535         }
536         else
537             break;
538     }
539 
540     return result[];
541 }
542 
543 ///
544 unittest
545 {
546     Buffer!(cstring) result;
547     split("aaa..bbb..ccc", "..", result);
548     test!("==")(result[], [ "aaa", "bbb", "ccc" ]);
549 }
550 
551 unittest
552 {
553     Buffer!(cstring) result;
554 
555     split(`abc"def"`, `"`, result);
556     test!("==")(result[], [ "abc", "def", "" ]);
557 
558     split(`abc"def"`.dup, `"`, result);
559     test!("==")(result[], [ "abc", "def", "" ]);
560 }
561 
562 public T3[][] split ( T1, T2, T3 ) ( T1[] src, T2[] pattern, ref T3[][] result )
563 {
564     auto buffer = cast(Buffer!(T3[])*) &result;
565     return split(src, pattern, *buffer)[];
566 }
567 
568 unittest
569 {
570     istring[] result;
571     split("aaa..bbb..ccc", "..", result);
572     test!("==")(result, [ "aaa", "bbb", "ccc" ]);
573 }
574 
575 unittest
576 {
577     mstring[] result;
578     split("aaa.bbb.".dup, ".", result);
579     test!("==")(result, [ "aaa", "bbb", "" ]);
580 }
581 
582 /*******************************************************************************
583 
584     Substitute all instances of match from source. Set replacement to null in
585     order to remove instead of replace (or use the remove() function, below).
586 
587     Template params:
588         T = type of array element
589 
590     Params:
591         src = source array to search
592         match = pattern to match in source array
593         replacement = pattern to replace matched sub-arrays
594         result = buffer to write resulting array to
595 
596     Returns:
597         result
598 
599 *******************************************************************************/
600 
601 public T[] substitute ( T ) ( in T[] source, in T[] match,
602     in T[] replacement, ref Buffer!(T) result )
603 {
604     result.length = 0;
605     const(T)[] src = source;
606 
607     do
608     {
609         auto index = find(src, match);
610         result ~= src[0 .. index];
611         if (index < src.length)
612         {
613             result ~= replacement;
614             index += match.length;
615         }
616         src = src[index .. $];
617     }
618     while (src.length);
619 
620     return result[];
621 }
622 
623 ///
624 unittest
625 {
626     Buffer!(char) result;
627     substitute("some string", "ring", "oops", result);
628     test!("==")(result[], "some stoops");
629 }
630 
631 public T[] substitute ( T ) ( in T[] source, in T[] match,
632     in T[] replacement, ref T[] result )
633 {
634     return substitute(source, match, replacement,
635         * cast(Buffer!(char)*) &result);
636 }
637 
638 unittest
639 {
640     mstring result;
641     substitute("some string", "ring", "oops", result);
642     test!("==")(result[], "some stoops");
643 }
644 
645 /*******************************************************************************
646 
647     Creates a single element dynamic array that slices val. This will not
648     allocate memory in contrast to the '[val]' expression.
649 
650     Params:
651         val = value to slice
652 
653     Returns:
654         single element dynamic array that slices val.
655 
656 *******************************************************************************/
657 
658 public T[] toArray ( T ) ( ref T val )
659 {
660     return (&val)[0 .. 1];
661 }
662 
663 ///
664 unittest
665 {
666     int x;
667     int[] arr = toArray(x);
668 }
669 
670 /*******************************************************************************
671 
672     Assigns elements to an array.
673 
674     The function is a replacement for declaring an array literal, it does
675     not allocate GC memory and ensures the length of the array matches the
676     number of variadic arguments.
677 
678     Params:
679         T = the type of array element
680         array = the array to assign the elements to
681         elements = the elements to add to the array
682 
683 *******************************************************************************/
684 
685 public void fill (T) (T[] array, T[] elements...)
686 {
687     array[] = elements[];
688 }
689 
690 ///
691 unittest
692 {
693     int[10] array;
694 
695     auto previous_ptr = array.ptr;
696     testNoAlloc(array.fill(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
697     test!("is")(previous_ptr, array.ptr);
698 
699     foreach (i, item; array)
700         test!("==")(array[i], item);
701 }
702 
703 ///
704 unittest
705 {
706     int[] array;
707 
708     array.length = 10;
709     assumeSafeAppend(array);
710 
711     auto previous_ptr = array.ptr;
712     testNoAlloc(array.fill(0, 1, 2, 3, 4, 5, 6, 7, 8, 9));
713     test!("is")(previous_ptr, array.ptr);
714 
715     foreach (i, item; array)
716         test!("==")(array[i], item);
717 }