1 /*******************************************************************************
2 
3     Template for a class implementing a set of buckets containing elements
4     indexed by unique keys. The bucket set contains both a set of buckets and a
5     pool of bucket elements. The bucket elements are structured as linked lists,
6     thus each bucket simply contains a pointer to its first element.
7 
8     The number of buckets in the set is always a power of 2. In this way the
9     getBucket() method, which determines which bucket is responsible for a key,
10     can use a simple bit mask instead of a modulo operation, leading to greater
11     efficiency.
12 
13     The method of bucket element allocation and pool management can be
14     customised by passing a custom IAllocator implementation to the constructor.
15     The default implementation--the BucketSet.FreeBuckets class--uses
16     'new Bucket.Element' for allocation and manages the pool as a linked list of
17     bucket elements. Possible alternative implementations include leaving the
18     pool management up to an external memory manager such as the D or C runtime
19     library using 'new'/'delete' or malloc()/free(), respectively. Also, if the
20     maximum number of elements in the map is known in advance, all elements can
21     be preallocated in a row.
22 
23     Usage:
24         See ocean.util.container.map.HashMap & ocean.util.container.map.HashSet
25 
26     Copyright:
27         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
28         All rights reserved.
29 
30     License:
31         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
32         Alternatively, this file may be distributed under the terms of the Tango
33         3-Clause BSD License (see LICENSE_BSD.txt for details).
34 
35 *******************************************************************************/
36 
37 module ocean.util.container.map.model.BucketSet;
38 
39 
40 import ocean.util.container.map.model.Bucket,
41        ocean.util.container.map.model.BucketInfo,
42        ocean.util.container.map.model.IAllocator;
43 
44 import ocean.core.Array: clear, isClearable;
45 
46 import ocean.core.BitManip: bsr;
47 
48 import ocean.util.container.map.model.BucketElementGCAllocator;
49 
50 version(UnitTest) import ocean.core.Test;
51 
52 /******************************************************************************
53 
54     Generic BucketSet base class
55 
56  ******************************************************************************/
57 
58 public abstract class IBucketSet
59 {
60     import ocean.core.Verify;
61 
62     /**************************************************************************
63 
64         Convenience type alias for subclasses.
65 
66      **************************************************************************/
67 
68     alias .IAllocator IAllocator;
69 
70     /**************************************************************************
71 
72         Map and and bucket statistics like the map length or the number of
73         buckets.
74 
75      **************************************************************************/
76 
77     public BucketInfo bucket_info;
78 
79     /**************************************************************************
80 
81         Bucket element allocator.
82 
83      **************************************************************************/
84 
85     protected IAllocator bucket_element_allocator;
86 
87     /**************************************************************************
88 
89         Bit mask used by the getBucket() method to determine which bucket is
90         responsible for a key.
91 
92      **************************************************************************/
93 
94     private size_t bucket_mask;
95 
96     /**************************************************************************
97 
98         Constructor.
99 
100         Params:
101             bucket_element_allocator = bucket element allocator
102             n = expected number of elements in mapping
103             load_factor = ratio of n to the number of internal buckets. The
104                 desired (approximate) number of elements per bucket. For
105                 example, 0.5 sets the number of buckets to double n; for 2 the
106                 number of buckets is the half of n. load_factor must be greater
107                 than 0. The load factor is basically a trade-off between memory
108                 usage (number of buckets) and search time (number of elements
109                 per bucket).
110 
111      **************************************************************************/
112 
113     protected this ( IAllocator bucket_element_allocator, size_t n, float load_factor = 0.75 )
114     {
115         size_t num_buckets = 1 << this.calcNumBucketsExp2(n, load_factor);
116 
117         this.bucket_mask = num_buckets - 1;
118 
119         this.bucket_info          = new BucketInfo(num_buckets);
120         this.bucket_element_allocator = bucket_element_allocator;
121     }
122 
123     /**************************************************************************
124 
125         Get the length of the buckets.
126 
127         Note: In the D2 port we should use subtyping via 'alias this' and
128         avoid these forwarding functions.
129 
130         Returns:
131             the length of the buckets.
132 
133      **************************************************************************/
134 
135     public final size_t length ( )
136     {
137         return this.bucket_info.length;
138     }
139 
140     /***************************************************************************
141 
142         Removes all elements from all buckets.
143 
144         Note:
145             Beware that calling this method is known to sometimes cause
146             unexpected behaviour when the bucket-set is reused afterwards (where
147             cyclic links are introduced).
148             If you are using one of the of the children *Map classes then
149             call clearErase() instead as it has been reported to properly clear
150             the map.
151 
152         Returns:
153             this instance
154 
155      **************************************************************************/
156 
157     public typeof(this) clear ( )
158     {
159         this.clear_();
160 
161         return this;
162     }
163 
164     /***************************************************************************
165 
166         Changes the number of buckets to 2 ^ exp2.
167 
168         Params:
169             exp2 = exponent of 2 of the new number of buckets
170 
171         Returns:
172             this instance.
173 
174         In:
175             2 ^ exp2 must fit into size_t.
176 
177     ***************************************************************************/
178 
179     abstract typeof (this) setNumBuckets ( uint exp2 );
180 
181     /***************************************************************************
182 
183         Changes the number of buckets to the lowest power of 2 that results in a
184         load factor of at least load_factor with the current number of elements.
185 
186         Params:
187             load_factor = factor of n / number of buckets
188 
189         Returns:
190             this instance.
191 
192         In:
193             load_factor must be greater than 0.
194 
195     ***************************************************************************/
196 
197     public typeof (this) redistribute ( float load_factor = 0.75 )
198     {
199         verify (load_factor > 0);
200 
201         return this.setNumBuckets(this.calcNumBucketsExp2(this.bucket_info.length, load_factor));
202     }
203 
204     /***************************************************************************
205 
206         Removes all elements from all buckets and sets the values to val_init if
207         val_init is not empty.
208 
209         Params:
210             val_init = initial element value
211 
212         Returns:
213             this instance
214 
215      **************************************************************************/
216 
217     protected typeof(this) clear_ ( void[] val_init = null )
218     {
219         this.clearBuckets(val_init);
220 
221         this.bucket_info.clear();
222 
223         return this;
224     }
225 
226     /***************************************************************************
227 
228         Removes all elements from all buckets.
229 
230         Returns:
231             this instance
232 
233      **************************************************************************/
234 
235     abstract protected void clearBuckets ( void[] val_init );
236 
237     /***************************************************************************
238 
239         Calculates the lowest exponent of 2 so that a power of 2 with this
240         exponent is at least n / load_factor.
241 
242         Params:
243             n           = number of expected elements in the set
244             load_factor = desired maximum load factor
245 
246         Returns:
247             exponent of 2.
248 
249         In:
250             load_factor must be greater than 0.
251 
252     ***************************************************************************/
253 
254     public static uint calcNumBucketsExp2 ( size_t n, float load_factor = 0.75 )
255     {
256         verify (load_factor > 0);
257 
258         return n? bsr(cast(size_t)(n / load_factor)) + 1 : 0;
259     }
260 }
261 
262 /******************************************************************************
263 
264     Bucket set class template.
265 
266     Params:
267         V = value size (.sizeof of the value type), may be 0 to store no value
268         K = key type
269 
270  ******************************************************************************/
271 
272 public abstract class BucketSet ( size_t V, K = hash_t ) : IBucketSet
273 {
274     import ocean.core.Verify;
275 
276     /**************************************************************************
277 
278         Bucket type
279 
280     **************************************************************************/
281 
282     public alias .Bucket!(V, K) Bucket;
283 
284 
285     /***************************************************************************
286 
287         List of buckets
288 
289     ***************************************************************************/
290 
291     private Bucket[] buckets;
292 
293     /***************************************************************************
294 
295         Constructor, uses the default implementation for the bucket element
296         allocator: Elements are allocated by 'new' and stored in a free list.
297 
298         Sets the number of buckets to n / load_factor, rounded up to the nearest
299         power or 2.
300 
301         Params:
302             n = expected number of elements in bucket set
303             load_factor = ratio of n to the number of buckets. The desired
304                 (approximate) number of elements per bucket. For example, 0.5
305                 sets the number of buckets to double n; for 2 the number of
306                 buckets is the half of n. load_factor must be greater than 0
307                 (this is asserted in IBucketSet.calcNumBucketsExp2()). The load
308                 factor is basically a trade-off between memory usage (number of
309                 buckets) and search time (number of elements per bucket).
310 
311     ***************************************************************************/
312 
313     protected this ( size_t n, float load_factor = 0.75 )
314     {
315         auto allocator = new BucketElementGCAllocator!(Bucket)();
316         this(allocator, n, load_factor);
317     }
318 
319     /***************************************************************************
320 
321         Constructor.
322 
323         Sets the number of buckets to n / load_factor, rounded up to the nearest
324         power or 2.
325 
326         Params:
327             allocator = allocator to use to allocate with
328             n = expected number of elements in bucket set
329             load_factor = ratio of n to the number of buckets. The desired
330                 (approximate) number of elements per bucket. For example, 0.5
331                 sets the number of buckets to double n; for 2 the number of
332                 buckets is the half of n. load_factor must be greater than 0
333                 (this is asserted in IBucketSet.calcNumBucketsExp2()). The load
334                 factor is basically a trade-off between memory usage (number of
335                 buckets) and search time (number of elements per bucket).
336 
337     ***************************************************************************/
338 
339     protected this ( IAllocator allocator, size_t n, float load_factor = 0.75 )
340     {
341         super(allocator, n, load_factor);
342 
343         this.buckets = new Bucket[this.bucket_info.num_buckets];
344     }
345 
346     /**************************************************************************
347 
348         Ensures that Bucket.init consists only of zero bytes so that the
349         memset() method in clear() will work.
350 
351      **************************************************************************/
352 
353     unittest
354     {
355         test(isClearable!(Bucket),
356                Bucket.stringof ~ ".init contains non-zero byte: " ~
357                typeof (this).stringof ~ ".clear_() will not work");
358     }
359 
360     /***************************************************************************
361 
362         Looks up a mapping from the specified key.
363 
364         Params:
365             key        = key to look up mapping for
366             must_exist = true: verify that the mapping exists, false: the
367                          mapping may or may not exist
368 
369         Returns:
370             a pointer to the element mapped to by the specified key or null if
371             not found and must_exist is false.
372             The caller should make sure that the key is not changed.
373 
374         Out:
375             - The returned array can only be null if must_exist is false.
376             - The length of the returned array is V unless the array is null.
377 
378      ***************************************************************************/
379 
380     final protected Bucket.Element* get_ ( in K key, bool must_exist = false )
381     out (element)
382     {
383         // FIXME: Disabled due to DMD bug 6417, the method parameter argument
384         // values are junk inside this contract.
385 
386         version (none)
387         {
388             if (element)
389             {
390                 assert (element.key == key, "key mismatch");
391             }
392             else
393             {
394                 assert (!must_exist, "element not found");
395             }
396         }
397     }
398     body
399     {
400         auto element = this
401             .buckets[this.toHash(key) & this.bucket_mask]
402             .find(key);
403 
404         if (element)
405         {
406             // cast(bool) to handle Key==Object: opEquals returns int
407             verify (cast(bool)(element.key == key), "key mismatch");
408         }
409         else
410         {
411             verify (!must_exist, "element not found");
412         }
413 
414         return element;
415     }
416 
417     /***************************************************************************
418 
419         Adds or updates a mapping from the specified key.
420 
421         Params:
422             key   = key to add/update mapping for
423             added = set to true if the record did not exist but was added
424 
425         Returns:
426             a pointer to the element mapped to by the specified key. The caller
427             should set the value as desired and make sure that the key is not
428             changed.
429 
430      ***************************************************************************/
431 
432     final protected Bucket.Element* put_ ( K key, out bool added )
433     out (element)
434     {
435         // FIXME: Disabled due to DMD bug 6417, the method parameter argument
436         // values are junk inside this contract.
437 
438         version (none)
439         {
440             assert (element !is null);
441 
442             assert (element.key == key, "key mismatch");
443         }
444     }
445     body
446     {
447         size_t bucket_index = this.toHash(key) & this.bucket_mask;
448 
449         with (this.buckets[bucket_index])
450         {
451             auto element = add(key,
452             {
453                 added = true;
454 
455                 if (has_element)
456                 {
457                     this.bucket_info.put(bucket_index);
458                 }
459                 else
460                 {
461                     this.bucket_info.create(bucket_index);
462                 }
463 
464                 return cast (Bucket.Element*) this.bucket_element_allocator.get();
465             }());
466 
467             verify (element !is null);
468 
469             // cast(bool) to handle Key==Object: opEquals returns int
470             verify (cast(bool)(element.key == key), "key mismatch");
471 
472             return element;
473         }
474     }
475 
476     /***************************************************************************
477 
478         Adds or updates a mapping from the specified key.
479 
480         Params:
481             key = key to add/update mapping for
482 
483         Returns:
484             the element mapped to by the specified key. The caller should set
485             the value as desired and make sure that the key is not changed.
486 
487     ***************************************************************************/
488 
489     final protected Bucket.Element* put_ ( K key )
490     {
491         bool added;
492 
493         return this.put_(key, added);
494     }
495 
496     /***************************************************************************
497 
498         Removes the mapping for the specified key and optionally invokes dg with
499         the value that is about to be removed.
500 
501         Note that, if references to GC-allocated objects (objects or dynamic
502         arrays), it is a good idea to set the value of the element referenced
503         by the element parameter of the callback delegate to null to avoid these
504         objects from being prevented from garbage collection. In general
505         pointers should be set to null for the same reason and to avoid dangling
506         pointers.
507 
508         If the default allocator is used (that is, no allocator instance was
509         passed to the constructor), the value of the element referenced
510         by the element parameter of the callback delegate dg is accessible and
511         remains unchanged after dg returned until the next call to put() or
512         clear().
513 
514         Params:
515             key = key to remove mapping for
516             dg  = optional delegate to call with the removed element value (not
517                   called if key was not found)
518 
519         Returns:
520             true if key was found in the map or false if not. In case of false
521             dg was not called.
522 
523     ***************************************************************************/
524 
525     final protected bool remove_ ( K key, scope void delegate ( ref Bucket.Element element ) dg = null )
526     {
527         size_t bucket_index = this.toHash(key) & this.bucket_mask;
528 
529         Bucket.Element* element = this.buckets[bucket_index].remove(key);
530 
531         scope (exit) if ( element )
532         {
533             this.bucket_info.remove(bucket_index);
534 
535             if (dg)
536             {
537                 dg(*element);
538             }
539 
540             this.bucket_element_allocator.recycle(element);
541         }
542 
543         return !!element;
544     }
545 
546     /***************************************************************************
547 
548         Removes the mapping for the specified key.
549 
550         Params:
551             key = key to remove mapping for
552 
553         Returns:
554             true if key was found and the mapping removed or false otherwise.
555 
556     ***************************************************************************/
557 
558     public bool remove ( K key )
559     {
560         return this.remove_(key);
561     }
562 
563     /***************************************************************************
564 
565         Calculates the hash value from key.
566 
567         Params:
568             key = key to hash
569 
570         Returns:
571             the hash value that corresponds to key.
572 
573     ***************************************************************************/
574 
575     abstract public hash_t toHash ( in K key );
576 
577     /***************************************************************************
578 
579         Changes the number of buckets to 2 ^ exp2.
580 
581         Params:
582             exp2 = exponent of 2 of the new number of buckets
583 
584         Returns:
585             this instance.
586 
587         In:
588             2 ^ exp2 must fit into size_t.
589 
590     ***************************************************************************/
591 
592     public override typeof (this) setNumBuckets ( uint exp2 )
593     {
594         verify (exp2 < size_t.sizeof * 8);
595 
596         size_t n_prev = this.buckets.length,
597         n_new  = 1 << exp2;
598 
599         if (n_prev != n_new)
600         {
601             // Park the bucket elements that are currently in the set.
602 
603             this.bucket_element_allocator.parkElements(this.bucket_info.length,
604             (IAllocator.IParkingStack parked_elements)
605             {
606                 scope Iterator it = this.new Iterator(true);
607 
608                 foreach (ref element; it)
609                 {
610                     parked_elements.push(&element);
611                 }
612 
613                 // Resize the array of buckets and the bucket_info and calculate
614                 // the new bucket_mask.
615 
616                 this.buckets.length = n_new;
617 
618                 .clear(this.buckets[0 .. (n_prev < $)? n_prev : $]);
619 
620                 this.bucket_info.clearResize(n_new);
621 
622                 this.bucket_mask = n_new - 1;
623 
624                 // Put the parked elements back into the buckets.
625 
626                 foreach (element_; parked_elements)
627                 {
628                     auto element = cast (Bucket.Element*) element_,
629                     bucket_index = this.toHash(element.key) & this.bucket_mask;
630 
631                     if (this.bucket_info[bucket_index])
632                     {
633                         verify (this.buckets[bucket_index].has_element,
634                                 "bucket with non-zero length has no element");
635                     }
636                     else
637                     {
638                         verify (!this.bucket_info[bucket_index],
639                                 "bucket with zero length has an element");
640                     }
641 
642                     this.bucket_info.put(bucket_index);
643 
644                     this.buckets[bucket_index].add(element);
645                 }
646             });
647         }
648 
649         return this;
650     }
651 
652     /***************************************************************************
653 
654         Removes all elements from all buckets and sets the values to val_init if
655         val_init is not empty.
656 
657         Params:
658             val_init = initial element value, the length must be V or 0
659 
660         In:
661             val_init.length must be V.
662 
663         Out:
664             all the buckets.first are set to null
665 
666     ***************************************************************************/
667 
668     protected override void clearBuckets ( void[] val_init = null )
669     out
670     {
671         foreach(bucket; this.buckets)
672         {
673             assert(bucket.first == null, "non-Null first bucket element found");
674         }
675     }
676     body
677     {
678         verify (!val_init.length || val_init.length == V);
679 
680         // Recycle all bucket elements.
681 
682         scope Iterator it = this.new Iterator(true);
683 
684         foreach (ref element; it)
685         {
686             static if (V) if (val_init.length)
687             {
688                 element.val[] = cast (ubyte[]) val_init[];
689             }
690 
691             this.bucket_element_allocator.recycle(&element);
692         }
693 
694         // Clear bucket contents.
695         .clear(this.buckets);
696     }
697 
698     /***************************************************************************
699 
700         'foreach' iteration over elements in the set.
701         DO NOT change the element keys during iteration because this will
702         corrupt the map (unless it is guaranteed that the element will go to the
703         same bucket).
704 
705     ***************************************************************************/
706 
707     protected class Iterator
708     {
709         /***********************************************************************
710 
711             Whether to reset the counter after each foreach
712 
713         ***********************************************************************/
714 
715         protected bool reset_after_foreach = true;
716 
717         /***********************************************************************
718 
719             Index of the last bucket that was iterated in the last call to
720             foreach
721 
722         ***********************************************************************/
723 
724         protected size_t last_bucket_index;
725 
726         /***********************************************************************
727 
728             Last element within the last bucket that was iterated
729 
730         ***********************************************************************/
731 
732         protected size_t last_bucket_element;
733 
734         /***********************************************************************
735 
736             Total count of the elements currently iterated
737 
738         ***********************************************************************/
739 
740         protected size_t counter;
741 
742         /***********************************************************************
743 
744             Ctor
745 
746             Params:
747                 reset_after_foreach = whether to reset iteration counters
748                                       after a foreach (true) or not (false)
749 
750         ***********************************************************************/
751 
752         public this ( bool reset_after_foreach = false )
753         {
754             this.reset_after_foreach = reset_after_foreach;
755         }
756 
757         /***********************************************************************
758 
759             Reset the counters, effectively forcing any interrupted iteration to
760             start from the beginning.
761 
762         ***********************************************************************/
763 
764         public void reset ( )
765         {
766             this.last_bucket_index = 0;
767             this.last_bucket_element = 0;
768             this.counter = 0;
769         }
770 
771         /***********************************************************************
772 
773             if reset_after_foreach is true:
774                 resets the counters after each foreach so the next iteration
775                 starts from the beginning
776 
777             if reset_after_foreach is false:
778                 resets the counters only when the whole map was iterated
779 
780             Params:
781                 interrupted = whether the last foreach call was interrupted
782                 using break (true) or not (false)
783 
784         ***********************************************************************/
785 
786         protected void resetIterator ( bool interrupted )
787         {
788             if ( reset_after_foreach || !interrupted )
789             {
790                 this.reset();
791             }
792         }
793 
794         /***********************************************************************
795 
796             Foreach support
797 
798         ***********************************************************************/
799 
800         final protected int opApply ( scope int delegate ( ref Bucket.Element element ) dg )
801         {
802             int tmpDg ( ref size_t i, ref Bucket.Element e )
803             {
804                 return dg(e);
805             }
806 
807             return this.opApply(&tmpDg);
808         }
809 
810         /***********************************************************************
811 
812             Foreach support with counter
813 
814             Instead of remembering the exact pointer we last iterated upon a
815             break, we remember the index within the linked list and re-iterate
816             to that index next time we're called. Using a pointer for this
817             problem would be problematic, probably (alliteration!), because the
818             linked list could change drastically in the mean time. Elements
819             could be removed, especially the one we were remembering. adding
820             checks to make that safe is a lot of hassle and not worth it.
821 
822             As buckets are supposed to be very small anyway, we just remember
823             the index and if the list finished before we reach that index, so be
824             it, we just use the next bucket then.
825 
826         ***********************************************************************/
827 
828         final protected int opApply ( scope int delegate ( ref size_t i,
829                                                      ref Bucket.Element element ) dg )
830         {
831             int result = 0;
832 
833             scope (exit)
834             {
835                 this.resetIterator(result != 0);
836             }
837 
838             if ( this.outer.bucket_info.num_filled_buckets < this.last_bucket_index )
839             {
840                 this.last_bucket_index = this.outer
841                                             .bucket_info
842                                             .num_filled_buckets;
843             }
844 
845             auto remaining_buckets = this.outer
846                                       .bucket_info
847                                       .filled_buckets[this.last_bucket_index .. $];
848 
849             top: foreach (info; remaining_buckets)
850             {
851                 with (this.outer.buckets[info.index])
852                 {
853                     size_t bucket_element_counter = 0;
854                     verify (has_element);
855 
856                     for ( auto element = first; element !is null; )
857                     {
858                         /* element.next needs to be stored before calling dg
859                          * because now dg will modifiy element.next if it
860                          * returns the element to the free list.  */
861                         auto next = element.next;
862 
863                         if ( bucket_element_counter == this.last_bucket_element )
864                         {
865                             result = dg(this.counter, *element);
866 
867                             this.counter++;
868                             this.last_bucket_element++;
869                         }
870 
871                         bucket_element_counter++;
872 
873                         element = next;
874 
875                         if (result) break top;
876                     }
877                 }
878 
879                 this.last_bucket_index++;
880                 this.last_bucket_element = 0;
881             }
882 
883             return result;
884         }
885     }
886 }