1 /*******************************************************************************
2 
3         Copyright:
4             Copyright (c) 2008 Kris Bell.
5             Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
6             All rights reserved.
7 
8         License:
9             Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
10             See LICENSE_TANGO.txt for details.
11 
12         Version:
13             Apr 2008: Initial release
14             Jan 2009: Added GCChunk allocator
15 
16         Authors: Kris, schveiguy
17 
18 *******************************************************************************/
19 
20 module ocean.util.container.Container;
21 
22 import core.memory;
23 
24 import core.stdc.stdlib;
25 import core.stdc.string;
26 
27 /*******************************************************************************
28 
29         Utility functions and constants
30 
31 *******************************************************************************/
32 
33 struct Container
34 {
35         import ocean.core.Verify;
36 
37         /***********************************************************************
38 
39                default initial number of buckets of a non-empty hashmap
40 
41         ***********************************************************************/
42 
43         static size_t defaultInitialBuckets = 31;
44 
45         /***********************************************************************
46 
47                 default load factor for a non-empty hashmap. The hash
48                 table is resized when the proportion of elements per
49                 buckets exceeds this limit
50 
51         ***********************************************************************/
52 
53         static float defaultLoadFactor = 0.75f;
54 
55         /***********************************************************************
56 
57                 generic value reaper, which does nothing
58 
59         ***********************************************************************/
60 
61         static void reap(V) (V v) {}
62 
63         /***********************************************************************
64 
65                 generic key/value reaper, which does nothing
66 
67         ***********************************************************************/
68 
69         static void reap(K, V) (K k, V v) {}
70 
71         /***********************************************************************
72 
73                 generic hash function, using the default hashing. Thanks
74                 to 'mwarning' for the optimization suggestion
75 
76         ***********************************************************************/
77 
78         static size_t hash(K) (K k, size_t length)
79         {
80                 static if (is(K : int)   || is(K : uint)   ||
81                            is(K : long)  || is(K : ulong)  ||
82                            is(K : short) || is(K : ushort) ||
83                            is(K : byte)  || is(K : ubyte)  ||
84                            is(K : char)  || is(K : wchar)  || is (K : dchar))
85                            return cast(size_t) (k % length);
86                 else
87                    return (typeid(K).getHash(&k) & 0x7FFFFFFF) % length;
88         }
89 
90 
91         /***********************************************************************
92 
93                 GC Chunk allocator
94 
95                 Can save approximately 30% memory for small elements (tested
96                 with integer elements and a chunk size of 1000), and is at
97                 least twice as fast at adding elements when compared to the
98                 generic allocator (approximately 50x faster with LinkedList)
99 
100                 Operates safely with GC managed entities
101 
102         ***********************************************************************/
103 
104         struct ChunkGC(T)
105         {
106                 static assert (T.sizeof >= (T*).sizeof, "The ChunkGC allocator can only be used for data sizes of at least " ~ ((T*).sizeof).stringof[0..$-1] ~ " bytes!");
107 
108                 private struct Cache {Cache* next;}
109 
110                 private Cache*  cache;
111                 private T[][]   lists;
112                 private size_t  chunks = 256;
113 
114                 /***************************************************************
115 
116                         allocate a T-sized memory chunk
117 
118                 ***************************************************************/
119 
120                 T* allocate ()
121                 {
122                         if (cache is null)
123                             newlist;
124                         auto p = cache;
125                         cache = p.next;
126                         return cast(T*) p;
127                 }
128 
129                 /***************************************************************
130 
131                         allocate an array of T* sized memory chunks
132 
133                 ***************************************************************/
134 
135                 T*[] allocate (size_t count)
136                 {
137                         auto p = (cast(T**) calloc(count, (T*).sizeof)) [0 .. count];
138                         GC.addRange (cast(void*) p, count * (T*).sizeof);
139                         return p;
140                 }
141 
142                 /***************************************************************
143 
144                         Invoked when a specific T*[] is discarded
145 
146                 ***************************************************************/
147 
148                 void collect (T*[] t)
149                 {
150                         if (t.ptr)
151                            {
152                            GC.removeRange (t.ptr);
153                            free (t.ptr);
154                            }
155                 }
156 
157                 /***************************************************************
158 
159                         Invoked when a specific T is discarded
160 
161                 ***************************************************************/
162 
163                 void collect (T* p)
164                 {
165                         verify(p !is null);
166                         auto d = cast(Cache*) p;
167                         //*p = T.init;
168                         d.next = cache;
169                         cache = d;
170                 }
171 
172                 /***************************************************************
173 
174                         Invoked when clear/reset is called on the host.
175                         This is a shortcut to clear everything allocated.
176 
177                         Should return true if supported, or false otherwise.
178                         False return will cause a series of discrete collect
179                         calls
180 
181                 ***************************************************************/
182 
183                 bool collect (bool all = true)
184                 {
185                         if (all)
186                            {
187                            foreach (ref list; lists)
188                                    {
189                                    GC.removeRange (list.ptr);
190                                    free (list.ptr);
191                                    list = null;
192                                    }
193                            cache = null;
194                            lists = null;
195                            return true;
196                            }
197                         return false;
198                 }
199 
200                 /***************************************************************
201 
202                         set the chunk size and prepopulate with nodes
203 
204                 ***************************************************************/
205 
206                 void config (size_t chunks, size_t allocate=0)
207                 {
208                         this.chunks = chunks;
209                         if (allocate)
210                             for (ptrdiff_t i=allocate/chunks+1; i--;)
211                                  newlist;
212                 }
213 
214                 /***************************************************************
215 
216                         list manager
217 
218                 ***************************************************************/
219 
220                 private void newlist ()
221                 {
222                         lists.length = lists.length + 1;
223                         auto p = (cast(T*) calloc (chunks, T.sizeof)) [0 .. chunks];
224                         lists[$-1] = p;
225                         GC.addRange (p.ptr, T.sizeof * chunks);
226                         auto head = cache;
227                         foreach (ref node; p)
228                                 {
229                                 auto d = cast(Cache*) &node;
230                                 d.next = head;
231                                 head = d;
232                                 }
233                         cache = head;
234                 }
235         }
236 
237 
238         /***********************************************************************
239 
240                 Chunk allocator (non GC)
241 
242                 Can save approximately 30% memory for small elements (tested
243                 with integer elements and a chunk size of 1000), and is at
244                 least twice as fast at adding elements when compared to the
245                 default allocator (approximately 50x faster with LinkedList)
246 
247                 Note that, due to GC behaviour, you should not configure
248                 a custom allocator for containers holding anything managed
249                 by the GC. For example, you cannot use a MallocAllocator
250                 to manage a container of classes or strings where those
251                 were allocated by the GC. Once something is owned by a GC
252                 then it's lifetime must be managed by GC-managed entities
253                 (otherwise the GC may think there are no live references
254                 and prematurely collect container contents).
255 
256                 You can explicity manage the collection of keys and values
257                 yourself by providing a reaper delegate. For example, if
258                 you use a MallocAllocator to manage key/value pairs which
259                 are themselves allocated via malloc, then you should also
260                 provide a reaper delegate to collect those as required.
261 
262                 The primary benefit of this allocator is to avoid the GC
263                 scanning the date-structures involved. Use ChunkGC where
264                 that option is unwarranted, or if you have GC-managed data
265                 instead
266 
267         ***********************************************************************/
268 
269         struct Chunk(T)
270         {
271                 static assert (T.sizeof >= (T*).sizeof, "The Chunk allocator can only be used for data sizes of at least " ~ ((T*).sizeof).stringof[0..$-1] ~ " bytes!");
272 
273                 private struct Cache {Cache* next;}
274 
275                 private Cache*  cache;
276                 private T[][]   lists;
277                 private size_t  chunks = 256;
278 
279                 /***************************************************************
280 
281                         allocate a T-sized memory chunk
282 
283                 ***************************************************************/
284 
285                 T* allocate ()
286                 {
287                         if (cache is null)
288                             newlist;
289                         auto p = cache;
290                         cache = p.next;
291                         return cast(T*) p;
292                 }
293 
294                 /***************************************************************
295 
296                         allocate an array of T* sized memory chunks
297 
298                 ***************************************************************/
299 
300                 T*[] allocate (size_t count)
301                 {
302                         return (cast(T**) calloc(count, (T*).sizeof)) [0 .. count];
303                 }
304 
305                 /***************************************************************
306 
307                         Invoked when a specific T*[] is discarded
308 
309                 ***************************************************************/
310 
311                 void collect (T*[] t)
312                 {
313                         if (t.ptr)
314                             free (t.ptr);
315                 }
316 
317                 /***************************************************************
318 
319                         Invoked when a specific T is discarded
320 
321                 ***************************************************************/
322 
323                 void collect (T* p)
324                 {
325                         verify(p !is null);
326                         auto d = cast(Cache*) p;
327                         d.next = cache;
328                         cache = d;
329                 }
330 
331                 /***************************************************************
332 
333                         Invoked when clear/reset is called on the host.
334                         This is a shortcut to clear everything allocated.
335 
336                         Should return true if supported, or false otherwise.
337                         False return will cause a series of discrete collect
338                         calls
339 
340                 ***************************************************************/
341 
342                 bool collect (bool all = true)
343                 {
344                         if (all)
345                            {
346                            foreach (ref list; lists)
347                                    {
348                                    free (list.ptr);
349                                    list = null;
350                                    }
351                            cache = null;
352                            lists = null;
353                            return true;
354                            }
355                         return false;
356                 }
357 
358                 /***************************************************************
359 
360                         set the chunk size and prepopulate with nodes
361 
362                 ***************************************************************/
363 
364                 void config (size_t chunks, int allocate=0)
365                 {
366                         this.chunks = chunks;
367                         if (allocate)
368                             for (int i=allocate/chunks+1; i--;)
369                                  newlist;
370                 }
371 
372                 /***************************************************************
373 
374                         list manager
375 
376                 ***************************************************************/
377 
378                 private void newlist ()
379                 {
380                         lists.length = lists.length + 1;
381                         auto p = (cast(T*) calloc (chunks, T.sizeof)) [0 .. chunks];
382                         lists[$-1] = p;
383                         auto head = cache;
384                         foreach (ref node; p)
385                                 {
386                                 auto d = cast(Cache*) &node;
387                                 d.next = head;
388                                 head = d;
389                                 }
390                         cache = head;
391                 }
392         }
393 
394 
395         /***********************************************************************
396 
397                 generic GC allocation manager
398 
399                 Slow and expensive in memory costs
400 
401         ***********************************************************************/
402 
403         struct Collect(T)
404         {
405                 /***************************************************************
406 
407                         allocate a T sized memory chunk
408 
409                 ***************************************************************/
410 
411                 T* allocate ()
412                 {
413                         return cast(T*) GC.calloc (T.sizeof);
414                 }
415 
416                 /***************************************************************
417 
418                         allocate an array of T sized memory chunks
419 
420                 ***************************************************************/
421 
422                 T*[] allocate (size_t count)
423                 {
424                         return new T*[count];
425                 }
426 
427                 /***************************************************************
428 
429                         Invoked when a specific T[] is discarded
430 
431                 ***************************************************************/
432 
433                 void collect (T* p)
434                 {
435                         if (p)
436                             delete p;
437                 }
438 
439                 /***************************************************************
440 
441                         Invoked when a specific T[] is discarded
442 
443                 ***************************************************************/
444 
445                 void collect (T*[] t)
446                 {
447                         if (t)
448                             delete t;
449                 }
450 
451                 /***************************************************************
452 
453                         Invoked when clear/reset is called on the host.
454                         This is a shortcut to clear everything allocated.
455 
456                         Should return true if supported, or false otherwise.
457                         False return will cause a series of discrete collect
458                         calls
459 
460                 ***************************************************************/
461 
462                 bool collect (bool all = true)
463                 {
464                         return false;
465                 }
466 
467                 /***************************************************************
468 
469                         set the chunk size and prepopulate with nodes
470 
471                 ***************************************************************/
472 
473                 void config (size_t chunks, int allocate=0)
474                 {
475                 }
476         }
477 
478 
479         /***********************************************************************
480 
481                 Malloc allocation manager.
482 
483                 Note that, due to GC behaviour, you should not configure
484                 a custom allocator for containers holding anything managed
485                 by the GC. For example, you cannot use a MallocAllocator
486                 to manage a container of classes or strings where those
487                 were allocated by the GC. Once something is owned by a GC
488                 then it's lifetime must be managed by GC-managed entities
489                 (otherwise the GC may think there are no live references
490                 and prematurely collect container contents).
491 
492                 You can explicity manage the collection of keys and values
493                 yourself by providing a reaper delegate. For example, if
494                 you use a MallocAllocator to manage key/value pairs which
495                 are themselves allocated via malloc, then you should also
496                 provide a reaper delegate to collect those as required.
497 
498         ***********************************************************************/
499 
500         struct Malloc(T)
501         {
502                 /***************************************************************
503 
504                         allocate an array of T sized memory chunks
505 
506                 ***************************************************************/
507 
508                 T* allocate ()
509                 {
510                         return cast(T*) calloc (1, T.sizeof);
511                 }
512 
513                 /***************************************************************
514 
515                         allocate an array of T sized memory chunks
516 
517                 ***************************************************************/
518 
519                 T*[] allocate (size_t count)
520                 {
521                         return (cast(T**) calloc(count, (T*).sizeof)) [0 .. count];
522                 }
523 
524                 /***************************************************************
525 
526                         Invoked when a specific T[] is discarded
527 
528                 ***************************************************************/
529 
530                 void collect (T*[] t)
531                 {
532                         if (t.length)
533                             free (t.ptr);
534                 }
535 
536                 /***************************************************************
537 
538                         Invoked when a specific T[] is discarded
539 
540                 ***************************************************************/
541 
542                 void collect (T* p)
543                 {
544                         if (p)
545                             free (p);
546                 }
547 
548                 /***************************************************************
549 
550                         Invoked when clear/reset is called on the host.
551                         This is a shortcut to clear everything allocated.
552 
553                         Should return true if supported, or false otherwise.
554                         False return will cause a series of discrete collect
555                         calls
556 
557                 ***************************************************************/
558 
559                 bool collect (bool all = true)
560                 {
561                         return false;
562                 }
563 
564                 /***************************************************************
565 
566                         set the chunk size and prepopulate with nodes
567 
568                 ***************************************************************/
569 
570                 void config (size_t chunks, int allocate=0)
571                 {
572                 }
573         }
574 
575 
576 version (prior_allocator)
577 {
578         /***********************************************************************
579 
580                 GCChunk allocator
581 
582                 Like the Chunk allocator, this allocates elements in chunks,
583                 but allows you to allocate elements that can have GC pointers.
584 
585                 Tests have shown about a 60% speedup when using the GC chunk
586                 allocator for a Hashmap!(int, int).
587 
588         ***********************************************************************/
589 
590         struct GCChunk(T, uint chunkSize)
591         {
592             static if(T.sizeof < (void*).sizeof)
593             {
594                 static assert(false, "Error, allocator for " ~ T.stringof ~ " failed to instantiate");
595             }
596 
597             /**
598              * This is the form used to link recyclable elements together.
599              */
600             struct element
601             {
602                 element *next;
603             }
604 
605             /**
606              * A chunk of elements
607              */
608             struct chunk
609             {
610                 /**
611                  * The next chunk in the chain
612                  */
613                 chunk *next;
614 
615                 /**
616                  * The previous chunk in the chain.  Required for O(1) removal
617                  * from the chain.
618                  */
619                 chunk *prev;
620 
621                 /**
622                  * The linked list of free elements in the chunk.  This list is
623                  * amended each time an element in this chunk is freed.
624                  */
625                 element *freeList;
626 
627                 /**
628                  * The number of free elements in the freeList.  Used to determine
629                  * whether this chunk can be given back to the GC
630                  */
631                 uint numFree;
632 
633                 /**
634                  * The elements in the chunk.
635                  */
636                 T[chunkSize] elems;
637 
638                 /**
639                  * Allocate a T* from the free list.
640                  */
641                 T *allocateFromFree()
642                 {
643                     element *x = freeList;
644                     freeList = x.next;
645                     //
646                     // clear the pointer, this clears the element as if it was
647                     // newly allocated
648                     //
649                     x.next = null;
650                     numFree--;
651                     return cast(T*)x;
652                 }
653 
654                 /**
655                  * deallocate a T*, send it to the free list
656                  *
657                  * returns true if this chunk no longer has any used elements.
658                  */
659                 bool deallocate(T *t)
660                 {
661                     //
662                     // clear the element so the GC does not interpret the element
663                     // as pointing to anything else.
664                     //
665                     memset(t, 0, (T).sizeof);
666                     element *x = cast(element *)t;
667                     x.next = freeList;
668                     freeList = x;
669                     return (++numFree == chunkSize);
670                 }
671             }
672 
673             /**
674              * The chain of used chunks.  Used chunks have had all their elements
675              * allocated at least once.
676              */
677             chunk *used;
678 
679             /**
680              * The fresh chunk.  This is only used if no elements are available in
681              * the used chain.
682              */
683             chunk *fresh;
684 
685             /**
686              * The next element in the fresh chunk.  Because we don't worry about
687              * the free list in the fresh chunk, we need to keep track of the next
688              * fresh element to use.
689              */
690             uint nextFresh;
691 
692             /**
693              * Allocate a T*
694              */
695             T* allocate()
696             {
697                 if(used !is null && used.numFree > 0)
698                 {
699                     //
700                     // allocate one element of the used list
701                     //
702                     T* result = used.allocateFromFree();
703                     if(used.numFree == 0)
704                         //
705                         // move used to the end of the list
706                         //
707                         used = used.next;
708                     return result;
709                 }
710 
711                 //
712                 // no used elements are available, allocate out of the fresh
713                 // elements
714                 //
715                 if(fresh is null)
716                 {
717                     fresh = new chunk;
718                     nextFresh = 0;
719                 }
720 
721                 T* result = &fresh.elems[nextFresh];
722                 if(++nextFresh == chunkSize)
723                 {
724                     if(used is null)
725                     {
726                         used = fresh;
727                         fresh.next = fresh;
728                         fresh.prev = fresh;
729                     }
730                     else
731                     {
732                         //
733                         // insert fresh into the used chain
734                         //
735                         fresh.prev = used.prev;
736                         fresh.next = used;
737                         fresh.prev.next = fresh;
738                         fresh.next.prev = fresh;
739                         if(fresh.numFree != 0)
740                         {
741                             //
742                             // can recycle elements from fresh
743                             //
744                             used = fresh;
745                         }
746                     }
747                     fresh = null;
748                 }
749                 return result;
750             }
751 
752             T*[] allocate(uint count)
753             {
754                 return new T*[count];
755             }
756 
757 
758             /**
759              * free a T*
760              */
761             void collect(T* t)
762             {
763                 //
764                 // need to figure out which chunk t is in
765                 //
766                 chunk *cur = cast(chunk *)GC.addrOf(t);
767 
768                 if(cur !is fresh && cur.numFree == 0)
769                 {
770                     //
771                     // move cur to the front of the used list, it has free nodes
772                     // to be used.
773                     //
774                     if(cur !is used)
775                     {
776                         if(used.numFree != 0)
777                         {
778                             //
779                             // first, unlink cur from its current location
780                             //
781                             cur.prev.next = cur.next;
782                             cur.next.prev = cur.prev;
783 
784                             //
785                             // now, insert cur before used.
786                             //
787                             cur.prev = used.prev;
788                             cur.next = used;
789                             used.prev = cur;
790                             cur.prev.next = cur;
791                         }
792                         used = cur;
793                     }
794                 }
795 
796                 if(cur.deallocate(t))
797                 {
798                     //
799                     // cur no longer has any elements in use, it can be deleted.
800                     //
801                     if(cur.next is cur)
802                     {
803                         //
804                         // only one element, don't free it.
805                         //
806                     }
807                     else
808                     {
809                         //
810                         // remove cur from list
811                         //
812                         if(used is cur)
813                         {
814                             //
815                             // update used pointer
816                             //
817                             used = used.next;
818                         }
819                         cur.next.prev = cur.prev;
820                         cur.prev.next = cur.next;
821                         delete cur;
822                     }
823                 }
824             }
825 
826             void collect(T*[] t)
827             {
828                 if(t)
829                     delete t;
830             }
831 
832             /**
833              * Deallocate all chunks used by this allocator.  Depends on the GC to do
834              * the actual collection
835              */
836             bool collect(bool all = true)
837             {
838                 used = null;
839 
840                 //
841                 // keep fresh around
842                 //
843                 if(fresh !is null)
844                 {
845                     nextFresh = 0;
846                     fresh.freeList = null;
847                 }
848 
849                 return true;
850             }
851 
852             void config (size_t chunks, int allocate=0)
853             {
854             }
855         }
856 
857         /***********************************************************************
858 
859                 aliases to the correct Default allocator depending on how big
860                 the type is.  It makes less sense to use a GCChunk allocator
861                 if the type is going to be larger than a page (currently there
862                 is no way to get the page size from the GC, so we assume 4096
863                 bytes).  If not more than one unit can fit into a page, then
864                 we use the default GC allocator.
865 
866         ***********************************************************************/
867         template DefaultCollect(T)
868         {
869             static if((T).sizeof + ((void*).sizeof * 3) + uint.sizeof >= 4095 / 2)
870             {
871                 alias Collect!(T) DefaultCollect;
872             }
873             else
874             {
875                 alias GCChunk!(T, (4095 - ((void *).sizeof * 3) - uint.sizeof) / (T).sizeof) DefaultCollect;
876             }
877             // TODO: see if we can automatically figure out whether a type has
878             // any pointers in it, this would allow automatic usage of the
879             // Chunk allocator for added speed.
880         }
881 }
882 else
883    template DefaultCollect(T) {alias ChunkGC!(T) DefaultCollect;}
884 
885 }