1 /*******************************************************************************
2 
3     Template for a class implementing a set of buckets containing elements
4     indexed by unique keys. The bucket set contains both a set of buckets and a
5     pool of bucket elements. The bucket elements are structured as linked lists,
6     thus each bucket simply contains a pointer to its first element.
7 
8     The number of buckets in the set is always a power of 2. In this way the
9     getBucket() method, which determines which bucket is responsible for a key,
10     can use a simple bit mask instead of a modulo operation, leading to greater
11     efficiency.
12 
13     The method of bucket element allocation and pool management can be
14     customised by passing a custom IAllocator implementation to the constructor.
15     The default implementation--the BucketSet.FreeBuckets class--uses
16     'new Bucket.Element' for allocation and manages the pool as a linked list of
17     bucket elements. Possible alternative implementations include leaving the
18     pool management up to an external memory manager such as the D or C runtime
19     library using 'new'/'delete' or malloc()/free(), respectively. Also, if the
20     maximum number of elements in the map is known in advance, all elements can
21     be preallocated in a row.
22 
23     Usage:
24         See ocean.util.container.map.HashMap & ocean.util.container.map.HashSet
25 
26     Copyright:
27         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
28         All rights reserved.
29 
30     License:
31         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
32         Alternatively, this file may be distributed under the terms of the Tango
33         3-Clause BSD License (see LICENSE_BSD.txt for details).
34 
35 *******************************************************************************/
36 
37 module ocean.util.container.map.model.BucketSet;
38 
39 import ocean.meta.types.Qualifiers;
40 
41 import ocean.util.container.map.model.Bucket,
42        ocean.util.container.map.model.BucketInfo,
43        ocean.util.container.map.model.IAllocator;
44 
45 import ocean.core.Array: clear, isClearable;
46 
47 import ocean.core.BitManip: bsr;
48 
49 import ocean.util.container.map.model.BucketElementGCAllocator;
50 
51 version (unittest) import ocean.core.Test;
52 
53 /******************************************************************************
54 
55     Generic BucketSet base class
56 
57  ******************************************************************************/
58 
59 public abstract class IBucketSet
60 {
61     import ocean.core.Verify;
62 
63     /**************************************************************************
64 
65         Convenience type alias for subclasses.
66 
67      **************************************************************************/
68 
69     alias .IAllocator IAllocator;
70 
71     /**************************************************************************
72 
73         Map and and bucket statistics like the map length or the number of
74         buckets.
75 
76      **************************************************************************/
77 
78     public BucketInfo bucket_info;
79 
80     /**************************************************************************
81 
82         Bucket element allocator.
83 
84      **************************************************************************/
85 
86     protected IAllocator bucket_element_allocator;
87 
88     /**************************************************************************
89 
90         Bit mask used by the getBucket() method to determine which bucket is
91         responsible for a key.
92 
93      **************************************************************************/
94 
95     private size_t bucket_mask;
96 
97     /**************************************************************************
98 
99         Constructor.
100 
101         Params:
102             bucket_element_allocator = bucket element allocator
103             n = expected number of elements in mapping
104             load_factor = ratio of n to the number of internal buckets. The
105                 desired (approximate) number of elements per bucket. For
106                 example, 0.5 sets the number of buckets to double n; for 2 the
107                 number of buckets is the half of n. load_factor must be greater
108                 than 0. The load factor is basically a trade-off between memory
109                 usage (number of buckets) and search time (number of elements
110                 per bucket).
111 
112      **************************************************************************/
113 
114     protected this ( IAllocator bucket_element_allocator, size_t n, float load_factor = 0.75 )
115     {
116         size_t num_buckets = 1 << this.calcNumBucketsExp2(n, load_factor);
117 
118         this.bucket_mask = num_buckets - 1;
119 
120         this.bucket_info          = new BucketInfo(num_buckets);
121         this.bucket_element_allocator = bucket_element_allocator;
122     }
123 
124     /**************************************************************************
125 
126         Get the length of the buckets.
127 
128         Note: In the D2 port we should use subtyping via 'alias this' and
129         avoid these forwarding functions.
130 
131         Returns:
132             the length of the buckets.
133 
134      **************************************************************************/
135 
136     public final size_t length ( )
137     {
138         return this.bucket_info.length;
139     }
140 
141     /***************************************************************************
142 
143         Removes all elements from all buckets.
144 
145         Note:
146             Beware that calling this method is known to sometimes cause
147             unexpected behaviour when the bucket-set is reused afterwards (where
148             cyclic links are introduced).
149             If you are using one of the of the children *Map classes then
150             call clearErase() instead as it has been reported to properly clear
151             the map.
152 
153         Returns:
154             this instance
155 
156      **************************************************************************/
157 
158     public typeof(this) clear ( )
159     {
160         this.clear_();
161 
162         return this;
163     }
164 
165     /***************************************************************************
166 
167         Changes the number of buckets to 2 ^ exp2.
168 
169         Params:
170             exp2 = exponent of 2 of the new number of buckets
171 
172         Returns:
173             this instance.
174 
175         In:
176             2 ^ exp2 must fit into size_t.
177 
178     ***************************************************************************/
179 
180     abstract typeof (this) setNumBuckets ( uint exp2 );
181 
182     /***************************************************************************
183 
184         Changes the number of buckets to the lowest power of 2 that results in a
185         load factor of at least load_factor with the current number of elements.
186 
187         Params:
188             load_factor = factor of n / number of buckets
189 
190         Returns:
191             this instance.
192 
193         In:
194             load_factor must be greater than 0.
195 
196     ***************************************************************************/
197 
198     public typeof (this) redistribute ( float load_factor = 0.75 )
199     {
200         verify (load_factor > 0);
201 
202         return this.setNumBuckets(this.calcNumBucketsExp2(this.bucket_info.length, load_factor));
203     }
204 
205     /***************************************************************************
206 
207         Removes all elements from all buckets and sets the values to val_init if
208         val_init is not empty.
209 
210         Params:
211             val_init = initial element value
212 
213         Returns:
214             this instance
215 
216      **************************************************************************/
217 
218     protected typeof(this) clear_ ( void[] val_init = null )
219     {
220         this.clearBuckets(val_init);
221 
222         this.bucket_info.clear();
223 
224         return this;
225     }
226 
227     /***************************************************************************
228 
229         Removes all elements from all buckets.
230 
231         Returns:
232             this instance
233 
234      **************************************************************************/
235 
236     abstract protected void clearBuckets ( void[] val_init );
237 
238     /***************************************************************************
239 
240         Calculates the lowest exponent of 2 so that a power of 2 with this
241         exponent is at least n / load_factor.
242 
243         Params:
244             n           = number of expected elements in the set
245             load_factor = desired maximum load factor
246 
247         Returns:
248             exponent of 2.
249 
250         In:
251             load_factor must be greater than 0.
252 
253     ***************************************************************************/
254 
255     public static uint calcNumBucketsExp2 ( size_t n, float load_factor = 0.75 )
256     {
257         verify (load_factor > 0);
258 
259         return n? bsr(cast(size_t)(n / load_factor)) + 1 : 0;
260     }
261 }
262 
263 /******************************************************************************
264 
265     Bucket set class template.
266 
267     Params:
268         V = value size (.sizeof of the value type), may be 0 to store no value
269         K = key type
270 
271  ******************************************************************************/
272 
273 public abstract class BucketSet ( size_t V, K = hash_t ) : IBucketSet
274 {
275     import ocean.core.Verify;
276 
277     /**************************************************************************
278 
279         Bucket type
280 
281     **************************************************************************/
282 
283     public alias .Bucket!(V, K) Bucket;
284 
285 
286     /***************************************************************************
287 
288         List of buckets
289 
290     ***************************************************************************/
291 
292     private Bucket[] buckets;
293 
294     /***************************************************************************
295 
296         Constructor, uses the default implementation for the bucket element
297         allocator: Elements are allocated by 'new' and stored in a free list.
298 
299         Sets the number of buckets to n / load_factor, rounded up to the nearest
300         power or 2.
301 
302         Params:
303             n = expected number of elements in bucket set
304             load_factor = ratio of n to the number of buckets. The desired
305                 (approximate) number of elements per bucket. For example, 0.5
306                 sets the number of buckets to double n; for 2 the number of
307                 buckets is the half of n. load_factor must be greater than 0
308                 (this is asserted in IBucketSet.calcNumBucketsExp2()). The load
309                 factor is basically a trade-off between memory usage (number of
310                 buckets) and search time (number of elements per bucket).
311 
312     ***************************************************************************/
313 
314     protected this ( size_t n, float load_factor = 0.75 )
315     {
316         auto allocator = new BucketElementGCAllocator!(Bucket)();
317         this(allocator, n, load_factor);
318     }
319 
320     /***************************************************************************
321 
322         Constructor.
323 
324         Sets the number of buckets to n / load_factor, rounded up to the nearest
325         power or 2.
326 
327         Params:
328             allocator = allocator to use to allocate with
329             n = expected number of elements in bucket set
330             load_factor = ratio of n to the number of buckets. The desired
331                 (approximate) number of elements per bucket. For example, 0.5
332                 sets the number of buckets to double n; for 2 the number of
333                 buckets is the half of n. load_factor must be greater than 0
334                 (this is asserted in IBucketSet.calcNumBucketsExp2()). The load
335                 factor is basically a trade-off between memory usage (number of
336                 buckets) and search time (number of elements per bucket).
337 
338     ***************************************************************************/
339 
340     protected this ( IAllocator allocator, size_t n, float load_factor = 0.75 )
341     {
342         super(allocator, n, load_factor);
343 
344         this.buckets = new Bucket[this.bucket_info.num_buckets];
345     }
346 
347     /**************************************************************************
348 
349         Ensures that Bucket.init consists only of zero bytes so that the
350         memset() method in clear() will work.
351 
352      **************************************************************************/
353 
354     unittest
355     {
356         test(isClearable!(Bucket),
357                Bucket.stringof ~ ".init contains non-zero byte: " ~
358                typeof (this).stringof ~ ".clear_() will not work");
359     }
360 
361     /***************************************************************************
362 
363         Looks up a mapping from the specified key.
364 
365         Params:
366             key        = key to look up mapping for
367             must_exist = true: verify that the mapping exists, false: the
368                          mapping may or may not exist
369 
370         Returns:
371             a pointer to the element mapped to by the specified key or null if
372             not found and must_exist is false.
373             The caller should make sure that the key is not changed.
374 
375         Out:
376             - The returned array can only be null if must_exist is false.
377             - The length of the returned array is V unless the array is null.
378 
379      ***************************************************************************/
380 
381     final protected Bucket.Element* get_ ( in K key, bool must_exist = false )
382     out (element)
383     {
384         // FIXME: Disabled due to DMD bug 6417, the method parameter argument
385         // values are junk inside this contract.
386 
387         version (none)
388         {
389             if (element)
390             {
391                 assert (element.key == key, "key mismatch");
392             }
393             else
394             {
395                 assert (!must_exist, "element not found");
396             }
397         }
398     }
399     do
400     {
401         auto element = this
402             .buckets[this.toHash(key) & this.bucket_mask]
403             .find(key);
404 
405         if (element)
406         {
407             // cast(bool) to handle Key==Object: opEquals returns int
408             verify (cast(bool)(element.key == key), "key mismatch");
409         }
410         else
411         {
412             verify (!must_exist, "element not found");
413         }
414 
415         return element;
416     }
417 
418     /***************************************************************************
419 
420         Adds or updates a mapping from the specified key.
421 
422         Params:
423             key   = key to add/update mapping for
424             added = set to true if the record did not exist but was added
425 
426         Returns:
427             a pointer to the element mapped to by the specified key. The caller
428             should set the value as desired and make sure that the key is not
429             changed.
430 
431      ***************************************************************************/
432 
433     final protected Bucket.Element* put_ ( K key, out bool added )
434     out (element)
435     {
436         // FIXME: Disabled due to DMD bug 6417, the method parameter argument
437         // values are junk inside this contract.
438 
439         version (none)
440         {
441             assert (element !is null);
442 
443             assert (element.key == key, "key mismatch");
444         }
445     }
446     do
447     {
448         size_t bucket_index = this.toHash(key) & this.bucket_mask;
449 
450         with (this.buckets[bucket_index])
451         {
452             auto element = add(key,
453             {
454                 added = true;
455 
456                 if (has_element)
457                 {
458                     this.bucket_info.put(bucket_index);
459                 }
460                 else
461                 {
462                     this.bucket_info.create(bucket_index);
463                 }
464 
465                 return cast (Bucket.Element*) this.bucket_element_allocator.get();
466             }());
467 
468             verify (element !is null);
469 
470             // cast(bool) to handle Key==Object: opEquals returns int
471             verify (cast(bool)(element.key == key), "key mismatch");
472 
473             return element;
474         }
475     }
476 
477     /***************************************************************************
478 
479         Adds or updates a mapping from the specified key.
480 
481         Params:
482             key = key to add/update mapping for
483 
484         Returns:
485             the element mapped to by the specified key. The caller should set
486             the value as desired and make sure that the key is not changed.
487 
488     ***************************************************************************/
489 
490     final protected Bucket.Element* put_ ( K key )
491     {
492         bool added;
493 
494         return this.put_(key, added);
495     }
496 
497     /***************************************************************************
498 
499         Removes the mapping for the specified key and optionally invokes dg with
500         the value that is about to be removed.
501 
502         Note that, if references to GC-allocated objects (objects or dynamic
503         arrays), it is a good idea to set the value of the element referenced
504         by the element parameter of the callback delegate to null to avoid these
505         objects from being prevented from garbage collection. In general
506         pointers should be set to null for the same reason and to avoid dangling
507         pointers.
508 
509         If the default allocator is used (that is, no allocator instance was
510         passed to the constructor), the value of the element referenced
511         by the element parameter of the callback delegate dg is accessible and
512         remains unchanged after dg returned until the next call to put() or
513         clear().
514 
515         Params:
516             key = key to remove mapping for
517             dg  = optional delegate to call with the removed element value (not
518                   called if key was not found)
519 
520         Returns:
521             true if key was found in the map or false if not. In case of false
522             dg was not called.
523 
524     ***************************************************************************/
525 
526     final protected bool remove_ ( K key, scope void delegate ( ref Bucket.Element element ) dg = null )
527     {
528         size_t bucket_index = this.toHash(key) & this.bucket_mask;
529 
530         Bucket.Element* element = this.buckets[bucket_index].remove(key);
531 
532         scope (exit) if ( element )
533         {
534             this.bucket_info.remove(bucket_index);
535 
536             if (dg)
537             {
538                 dg(*element);
539             }
540 
541             this.bucket_element_allocator.recycle(element);
542         }
543 
544         return !!element;
545     }
546 
547     /***************************************************************************
548 
549         Removes the mapping for the specified key.
550 
551         Params:
552             key = key to remove mapping for
553 
554         Returns:
555             true if key was found and the mapping removed or false otherwise.
556 
557     ***************************************************************************/
558 
559     public bool remove ( K key )
560     {
561         return this.remove_(key);
562     }
563 
564     /***************************************************************************
565 
566         Calculates the hash value from key.
567 
568         Params:
569             key = key to hash
570 
571         Returns:
572             the hash value that corresponds to key.
573 
574     ***************************************************************************/
575 
576     abstract public hash_t toHash ( in K key );
577 
578     /***************************************************************************
579 
580         Changes the number of buckets to 2 ^ exp2.
581 
582         Params:
583             exp2 = exponent of 2 of the new number of buckets
584 
585         Returns:
586             this instance.
587 
588         In:
589             2 ^ exp2 must fit into size_t.
590 
591     ***************************************************************************/
592 
593     public override typeof (this) setNumBuckets ( uint exp2 )
594     {
595         verify (exp2 < size_t.sizeof * 8);
596 
597         size_t n_prev = this.buckets.length,
598         n_new  = 1 << exp2;
599 
600         if (n_prev != n_new)
601         {
602             // Park the bucket elements that are currently in the set.
603 
604             this.bucket_element_allocator.parkElements(this.bucket_info.length,
605             (IAllocator.IParkingStack parked_elements)
606             {
607                 scope Iterator it = this.new Iterator(true);
608 
609                 foreach (ref element; it)
610                 {
611                     parked_elements.push(&element);
612                 }
613 
614                 // Resize the array of buckets and the bucket_info and calculate
615                 // the new bucket_mask.
616 
617                 assumeSafeAppend(this.buckets);
618                 this.buckets.length = n_new;
619 
620                 .clear(this.buckets[0 .. (n_prev < $)? n_prev : $]);
621 
622                 this.bucket_info.clearResize(n_new);
623 
624                 this.bucket_mask = n_new - 1;
625 
626                 // Put the parked elements back into the buckets.
627 
628                 foreach (element_; parked_elements)
629                 {
630                     auto element = cast (Bucket.Element*) element_,
631                     bucket_index = this.toHash(element.key) & this.bucket_mask;
632 
633                     if (this.bucket_info[bucket_index])
634                     {
635                         verify (this.buckets[bucket_index].has_element,
636                                 "bucket with non-zero length has no element");
637                     }
638                     else
639                     {
640                         verify (!this.bucket_info[bucket_index],
641                                 "bucket with zero length has an element");
642                     }
643 
644                     this.bucket_info.put(bucket_index);
645 
646                     this.buckets[bucket_index].add(element);
647                 }
648             });
649         }
650 
651         return this;
652     }
653 
654     /***************************************************************************
655 
656         Removes all elements from all buckets and sets the values to val_init if
657         val_init is not empty.
658 
659         Params:
660             val_init = initial element value, the length must be V or 0
661 
662         In:
663             val_init.length must be V.
664 
665         Out:
666             all the buckets.first are set to null
667 
668     ***************************************************************************/
669 
670     protected override void clearBuckets ( void[] val_init = null )
671     out
672     {
673         foreach(bucket; this.buckets)
674         {
675             assert(bucket.first == null, "non-Null first bucket element found");
676         }
677     }
678     do
679     {
680         verify (!val_init.length || val_init.length == V);
681 
682         // Recycle all bucket elements.
683 
684         scope Iterator it = this.new Iterator(true);
685 
686         foreach (ref element; it)
687         {
688             static if (V) if (val_init.length)
689             {
690                 element.val[] = cast (ubyte[]) val_init[];
691             }
692 
693             this.bucket_element_allocator.recycle(&element);
694         }
695 
696         // Clear bucket contents.
697         .clear(this.buckets);
698     }
699 
700     /***************************************************************************
701 
702         'foreach' iteration over elements in the set.
703         DO NOT change the element keys during iteration because this will
704         corrupt the map (unless it is guaranteed that the element will go to the
705         same bucket).
706 
707     ***************************************************************************/
708 
709     protected class Iterator
710     {
711         /***********************************************************************
712 
713             Whether to reset the counter after each foreach
714 
715         ***********************************************************************/
716 
717         protected bool reset_after_foreach = true;
718 
719         /***********************************************************************
720 
721             Index of the last bucket that was iterated in the last call to
722             foreach
723 
724         ***********************************************************************/
725 
726         protected size_t last_bucket_index;
727 
728         /***********************************************************************
729 
730             Last element within the last bucket that was iterated
731 
732         ***********************************************************************/
733 
734         protected size_t last_bucket_element;
735 
736         /***********************************************************************
737 
738             Total count of the elements currently iterated
739 
740         ***********************************************************************/
741 
742         protected size_t counter;
743 
744         /***********************************************************************
745 
746             Ctor
747 
748             Params:
749                 reset_after_foreach = whether to reset iteration counters
750                                       after a foreach (true) or not (false)
751 
752         ***********************************************************************/
753 
754         public this ( bool reset_after_foreach = false )
755         {
756             this.reset_after_foreach = reset_after_foreach;
757         }
758 
759         /***********************************************************************
760 
761             Reset the counters, effectively forcing any interrupted iteration to
762             start from the beginning.
763 
764         ***********************************************************************/
765 
766         public void reset ( )
767         {
768             this.last_bucket_index = 0;
769             this.last_bucket_element = 0;
770             this.counter = 0;
771         }
772 
773         /***********************************************************************
774 
775             if reset_after_foreach is true:
776                 resets the counters after each foreach so the next iteration
777                 starts from the beginning
778 
779             if reset_after_foreach is false:
780                 resets the counters only when the whole map was iterated
781 
782             Params:
783                 interrupted = whether the last foreach call was interrupted
784                 using break (true) or not (false)
785 
786         ***********************************************************************/
787 
788         protected void resetIterator ( bool interrupted )
789         {
790             if ( reset_after_foreach || !interrupted )
791             {
792                 this.reset();
793             }
794         }
795 
796         /***********************************************************************
797 
798             Foreach support
799 
800         ***********************************************************************/
801 
802         final protected int opApply ( scope int delegate ( ref Bucket.Element element ) dg )
803         {
804             int tmpDg ( ref size_t i, ref Bucket.Element e )
805             {
806                 return dg(e);
807             }
808 
809             return this.opApply(&tmpDg);
810         }
811 
812         /***********************************************************************
813 
814             Foreach support with counter
815 
816             Instead of remembering the exact pointer we last iterated upon a
817             break, we remember the index within the linked list and re-iterate
818             to that index next time we're called. Using a pointer for this
819             problem would be problematic, probably (alliteration!), because the
820             linked list could change drastically in the mean time. Elements
821             could be removed, especially the one we were remembering. adding
822             checks to make that safe is a lot of hassle and not worth it.
823 
824             As buckets are supposed to be very small anyway, we just remember
825             the index and if the list finished before we reach that index, so be
826             it, we just use the next bucket then.
827 
828         ***********************************************************************/
829 
830         final protected int opApply ( scope int delegate ( ref size_t i,
831                                                      ref Bucket.Element element ) dg )
832         {
833             int result = 0;
834 
835             scope (exit)
836             {
837                 this.resetIterator(result != 0);
838             }
839 
840             if ( this.outer.bucket_info.num_filled_buckets < this.last_bucket_index )
841             {
842                 this.last_bucket_index = this.outer
843                                             .bucket_info
844                                             .num_filled_buckets;
845             }
846 
847             auto remaining_buckets = this.outer
848                                       .bucket_info
849                                       .filled_buckets[this.last_bucket_index .. $];
850 
851             top: foreach (info; remaining_buckets)
852             {
853                 with (this.outer.buckets[info.index])
854                 {
855                     size_t bucket_element_counter = 0;
856                     verify (has_element);
857 
858                     for ( auto element = first; element !is null; )
859                     {
860                         /* element.next needs to be stored before calling dg
861                          * because now dg will modifiy element.next if it
862                          * returns the element to the free list.  */
863                         auto next = element.next;
864 
865                         if ( bucket_element_counter == this.last_bucket_element )
866                         {
867                             result = dg(this.counter, *element);
868 
869                             this.counter++;
870                             this.last_bucket_element++;
871                         }
872 
873                         bucket_element_counter++;
874 
875                         element = next;
876 
877                         if (result) break top;
878                     }
879                 }
880 
881                 this.last_bucket_index++;
882                 this.last_bucket_element = 0;
883             }
884 
885             return result;
886         }
887     }
888 }