1 /******************************************************************************* 2 3 Copyright: 4 Copyright (c) 2008 Kris Bell. 5 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 6 All rights reserved. 7 8 License: 9 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 10 See LICENSE_TANGO.txt for details. 11 12 Version: 13 Apr 2008: Initial release 14 Jan 2009: Added GCChunk allocator 15 16 Authors: Kris, schveiguy 17 18 *******************************************************************************/ 19 20 module ocean.util.container.Container; 21 22 import core.memory; 23 24 import core.stdc.stdlib; 25 import core.stdc..string; 26 27 /******************************************************************************* 28 29 Utility functions and constants 30 31 *******************************************************************************/ 32 33 struct Container 34 { 35 import ocean.core.Verify; 36 37 /*********************************************************************** 38 39 default initial number of buckets of a non-empty hashmap 40 41 ***********************************************************************/ 42 43 static size_t defaultInitialBuckets = 31; 44 45 /*********************************************************************** 46 47 default load factor for a non-empty hashmap. The hash 48 table is resized when the proportion of elements per 49 buckets exceeds this limit 50 51 ***********************************************************************/ 52 53 static float defaultLoadFactor = 0.75f; 54 55 /*********************************************************************** 56 57 generic value reaper, which does nothing 58 59 ***********************************************************************/ 60 61 static void reap(V) (V v) {} 62 63 /*********************************************************************** 64 65 generic key/value reaper, which does nothing 66 67 ***********************************************************************/ 68 69 static void reap(K, V) (K k, V v) {} 70 71 /*********************************************************************** 72 73 generic hash function, using the default hashing. Thanks 74 to 'mwarning' for the optimization suggestion 75 76 ***********************************************************************/ 77 78 static size_t hash(K) (K k, size_t length) 79 { 80 static if (is(K : int) || is(K : uint) || 81 is(K : long) || is(K : ulong) || 82 is(K : short) || is(K : ushort) || 83 is(K : byte) || is(K : ubyte) || 84 is(K : char) || is(K : wchar) || is (K : dchar)) 85 return cast(size_t) (k % length); 86 else 87 return (typeid(K).getHash(&k) & 0x7FFFFFFF) % length; 88 } 89 90 91 /*********************************************************************** 92 93 GC Chunk allocator 94 95 Can save approximately 30% memory for small elements (tested 96 with integer elements and a chunk size of 1000), and is at 97 least twice as fast at adding elements when compared to the 98 generic allocator (approximately 50x faster with LinkedList) 99 100 Operates safely with GC managed entities 101 102 ***********************************************************************/ 103 104 struct ChunkGC(T) 105 { 106 static assert (T.sizeof >= (T*).sizeof, "The ChunkGC allocator can only be used for data sizes of at least " ~ ((T*).sizeof).stringof[0..$-1] ~ " bytes!"); 107 108 private struct Cache {Cache* next;} 109 110 private Cache* cache; 111 private T[][] lists; 112 private size_t chunks = 256; 113 114 /*************************************************************** 115 116 allocate a T-sized memory chunk 117 118 ***************************************************************/ 119 120 T* allocate () 121 { 122 if (cache is null) 123 newlist; 124 auto p = cache; 125 cache = p.next; 126 return cast(T*) p; 127 } 128 129 /*************************************************************** 130 131 allocate an array of T* sized memory chunks 132 133 ***************************************************************/ 134 135 T*[] allocate (size_t count) 136 { 137 auto p = (cast(T**) calloc(count, (T*).sizeof)) [0 .. count]; 138 GC.addRange (cast(void*) p, count * (T*).sizeof); 139 return p; 140 } 141 142 /*************************************************************** 143 144 Invoked when a specific T*[] is discarded 145 146 ***************************************************************/ 147 148 void collect (T*[] t) 149 { 150 if (t.ptr) 151 { 152 GC.removeRange (t.ptr); 153 free (t.ptr); 154 } 155 } 156 157 /*************************************************************** 158 159 Invoked when a specific T is discarded 160 161 ***************************************************************/ 162 163 void collect (T* p) 164 { 165 verify(p !is null); 166 auto d = cast(Cache*) p; 167 //*p = T.init; 168 d.next = cache; 169 cache = d; 170 } 171 172 /*************************************************************** 173 174 Invoked when clear/reset is called on the host. 175 This is a shortcut to clear everything allocated. 176 177 Should return true if supported, or false otherwise. 178 False return will cause a series of discrete collect 179 calls 180 181 ***************************************************************/ 182 183 bool collect (bool all = true) 184 { 185 if (all) 186 { 187 foreach (ref list; lists) 188 { 189 GC.removeRange (list.ptr); 190 free (list.ptr); 191 list = null; 192 } 193 cache = null; 194 lists = null; 195 return true; 196 } 197 return false; 198 } 199 200 /*************************************************************** 201 202 set the chunk size and prepopulate with nodes 203 204 ***************************************************************/ 205 206 void config (size_t chunks, size_t allocate=0) 207 { 208 this.chunks = chunks; 209 if (allocate) 210 for (ptrdiff_t i=allocate/chunks+1; i--;) 211 newlist; 212 } 213 214 /*************************************************************** 215 216 list manager 217 218 ***************************************************************/ 219 220 private void newlist () 221 { 222 lists.length = lists.length + 1; 223 auto p = (cast(T*) calloc (chunks, T.sizeof)) [0 .. chunks]; 224 lists[$-1] = p; 225 GC.addRange (p.ptr, T.sizeof * chunks); 226 auto head = cache; 227 foreach (ref node; p) 228 { 229 auto d = cast(Cache*) &node; 230 d.next = head; 231 head = d; 232 } 233 cache = head; 234 } 235 } 236 237 238 /*********************************************************************** 239 240 Chunk allocator (non GC) 241 242 Can save approximately 30% memory for small elements (tested 243 with integer elements and a chunk size of 1000), and is at 244 least twice as fast at adding elements when compared to the 245 default allocator (approximately 50x faster with LinkedList) 246 247 Note that, due to GC behaviour, you should not configure 248 a custom allocator for containers holding anything managed 249 by the GC. For example, you cannot use a MallocAllocator 250 to manage a container of classes or strings where those 251 were allocated by the GC. Once something is owned by a GC 252 then it's lifetime must be managed by GC-managed entities 253 (otherwise the GC may think there are no live references 254 and prematurely collect container contents). 255 256 You can explicity manage the collection of keys and values 257 yourself by providing a reaper delegate. For example, if 258 you use a MallocAllocator to manage key/value pairs which 259 are themselves allocated via malloc, then you should also 260 provide a reaper delegate to collect those as required. 261 262 The primary benefit of this allocator is to avoid the GC 263 scanning the date-structures involved. Use ChunkGC where 264 that option is unwarranted, or if you have GC-managed data 265 instead 266 267 ***********************************************************************/ 268 269 struct Chunk(T) 270 { 271 static assert (T.sizeof >= (T*).sizeof, "The Chunk allocator can only be used for data sizes of at least " ~ ((T*).sizeof).stringof[0..$-1] ~ " bytes!"); 272 273 private struct Cache {Cache* next;} 274 275 private Cache* cache; 276 private T[][] lists; 277 private size_t chunks = 256; 278 279 /*************************************************************** 280 281 allocate a T-sized memory chunk 282 283 ***************************************************************/ 284 285 T* allocate () 286 { 287 if (cache is null) 288 newlist; 289 auto p = cache; 290 cache = p.next; 291 return cast(T*) p; 292 } 293 294 /*************************************************************** 295 296 allocate an array of T* sized memory chunks 297 298 ***************************************************************/ 299 300 T*[] allocate (size_t count) 301 { 302 return (cast(T**) calloc(count, (T*).sizeof)) [0 .. count]; 303 } 304 305 /*************************************************************** 306 307 Invoked when a specific T*[] is discarded 308 309 ***************************************************************/ 310 311 void collect (T*[] t) 312 { 313 if (t.ptr) 314 free (t.ptr); 315 } 316 317 /*************************************************************** 318 319 Invoked when a specific T is discarded 320 321 ***************************************************************/ 322 323 void collect (T* p) 324 { 325 verify(p !is null); 326 auto d = cast(Cache*) p; 327 d.next = cache; 328 cache = d; 329 } 330 331 /*************************************************************** 332 333 Invoked when clear/reset is called on the host. 334 This is a shortcut to clear everything allocated. 335 336 Should return true if supported, or false otherwise. 337 False return will cause a series of discrete collect 338 calls 339 340 ***************************************************************/ 341 342 bool collect (bool all = true) 343 { 344 if (all) 345 { 346 foreach (ref list; lists) 347 { 348 free (list.ptr); 349 list = null; 350 } 351 cache = null; 352 lists = null; 353 return true; 354 } 355 return false; 356 } 357 358 /*************************************************************** 359 360 set the chunk size and prepopulate with nodes 361 362 ***************************************************************/ 363 364 void config (size_t chunks, int allocate=0) 365 { 366 this.chunks = chunks; 367 if (allocate) 368 for (int i=allocate/chunks+1; i--;) 369 newlist; 370 } 371 372 /*************************************************************** 373 374 list manager 375 376 ***************************************************************/ 377 378 private void newlist () 379 { 380 lists.length = lists.length + 1; 381 auto p = (cast(T*) calloc (chunks, T.sizeof)) [0 .. chunks]; 382 lists[$-1] = p; 383 auto head = cache; 384 foreach (ref node; p) 385 { 386 auto d = cast(Cache*) &node; 387 d.next = head; 388 head = d; 389 } 390 cache = head; 391 } 392 } 393 394 395 /*********************************************************************** 396 397 generic GC allocation manager 398 399 Slow and expensive in memory costs 400 401 ***********************************************************************/ 402 403 struct Collect(T) 404 { 405 /*************************************************************** 406 407 allocate a T sized memory chunk 408 409 ***************************************************************/ 410 411 T* allocate () 412 { 413 return cast(T*) GC.calloc (T.sizeof); 414 } 415 416 /*************************************************************** 417 418 allocate an array of T sized memory chunks 419 420 ***************************************************************/ 421 422 T*[] allocate (size_t count) 423 { 424 return new T*[count]; 425 } 426 427 /*************************************************************** 428 429 Invoked when a specific T[] is discarded 430 431 ***************************************************************/ 432 433 void collect (T* p) 434 { 435 if (p) 436 delete p; 437 } 438 439 /*************************************************************** 440 441 Invoked when a specific T[] is discarded 442 443 ***************************************************************/ 444 445 void collect (T*[] t) 446 { 447 if (t) 448 delete t; 449 } 450 451 /*************************************************************** 452 453 Invoked when clear/reset is called on the host. 454 This is a shortcut to clear everything allocated. 455 456 Should return true if supported, or false otherwise. 457 False return will cause a series of discrete collect 458 calls 459 460 ***************************************************************/ 461 462 bool collect (bool all = true) 463 { 464 return false; 465 } 466 467 /*************************************************************** 468 469 set the chunk size and prepopulate with nodes 470 471 ***************************************************************/ 472 473 void config (size_t chunks, int allocate=0) 474 { 475 } 476 } 477 478 479 /*********************************************************************** 480 481 Malloc allocation manager. 482 483 Note that, due to GC behaviour, you should not configure 484 a custom allocator for containers holding anything managed 485 by the GC. For example, you cannot use a MallocAllocator 486 to manage a container of classes or strings where those 487 were allocated by the GC. Once something is owned by a GC 488 then it's lifetime must be managed by GC-managed entities 489 (otherwise the GC may think there are no live references 490 and prematurely collect container contents). 491 492 You can explicity manage the collection of keys and values 493 yourself by providing a reaper delegate. For example, if 494 you use a MallocAllocator to manage key/value pairs which 495 are themselves allocated via malloc, then you should also 496 provide a reaper delegate to collect those as required. 497 498 ***********************************************************************/ 499 500 struct Malloc(T) 501 { 502 /*************************************************************** 503 504 allocate an array of T sized memory chunks 505 506 ***************************************************************/ 507 508 T* allocate () 509 { 510 return cast(T*) calloc (1, T.sizeof); 511 } 512 513 /*************************************************************** 514 515 allocate an array of T sized memory chunks 516 517 ***************************************************************/ 518 519 T*[] allocate (size_t count) 520 { 521 return (cast(T**) calloc(count, (T*).sizeof)) [0 .. count]; 522 } 523 524 /*************************************************************** 525 526 Invoked when a specific T[] is discarded 527 528 ***************************************************************/ 529 530 void collect (T*[] t) 531 { 532 if (t.length) 533 free (t.ptr); 534 } 535 536 /*************************************************************** 537 538 Invoked when a specific T[] is discarded 539 540 ***************************************************************/ 541 542 void collect (T* p) 543 { 544 if (p) 545 free (p); 546 } 547 548 /*************************************************************** 549 550 Invoked when clear/reset is called on the host. 551 This is a shortcut to clear everything allocated. 552 553 Should return true if supported, or false otherwise. 554 False return will cause a series of discrete collect 555 calls 556 557 ***************************************************************/ 558 559 bool collect (bool all = true) 560 { 561 return false; 562 } 563 564 /*************************************************************** 565 566 set the chunk size and prepopulate with nodes 567 568 ***************************************************************/ 569 570 void config (size_t chunks, int allocate=0) 571 { 572 } 573 } 574 575 576 version (prior_allocator) 577 { 578 /*********************************************************************** 579 580 GCChunk allocator 581 582 Like the Chunk allocator, this allocates elements in chunks, 583 but allows you to allocate elements that can have GC pointers. 584 585 Tests have shown about a 60% speedup when using the GC chunk 586 allocator for a Hashmap!(int, int). 587 588 ***********************************************************************/ 589 590 struct GCChunk(T, uint chunkSize) 591 { 592 static if(T.sizeof < (void*).sizeof) 593 { 594 static assert(false, "Error, allocator for " ~ T.stringof ~ " failed to instantiate"); 595 } 596 597 /** 598 * This is the form used to link recyclable elements together. 599 */ 600 struct element 601 { 602 element *next; 603 } 604 605 /** 606 * A chunk of elements 607 */ 608 struct chunk 609 { 610 /** 611 * The next chunk in the chain 612 */ 613 chunk *next; 614 615 /** 616 * The previous chunk in the chain. Required for O(1) removal 617 * from the chain. 618 */ 619 chunk *prev; 620 621 /** 622 * The linked list of free elements in the chunk. This list is 623 * amended each time an element in this chunk is freed. 624 */ 625 element *freeList; 626 627 /** 628 * The number of free elements in the freeList. Used to determine 629 * whether this chunk can be given back to the GC 630 */ 631 uint numFree; 632 633 /** 634 * The elements in the chunk. 635 */ 636 T[chunkSize] elems; 637 638 /** 639 * Allocate a T* from the free list. 640 */ 641 T *allocateFromFree() 642 { 643 element *x = freeList; 644 freeList = x.next; 645 // 646 // clear the pointer, this clears the element as if it was 647 // newly allocated 648 // 649 x.next = null; 650 numFree--; 651 return cast(T*)x; 652 } 653 654 /** 655 * deallocate a T*, send it to the free list 656 * 657 * returns true if this chunk no longer has any used elements. 658 */ 659 bool deallocate(T *t) 660 { 661 // 662 // clear the element so the GC does not interpret the element 663 // as pointing to anything else. 664 // 665 memset(t, 0, (T).sizeof); 666 element *x = cast(element *)t; 667 x.next = freeList; 668 freeList = x; 669 return (++numFree == chunkSize); 670 } 671 } 672 673 /** 674 * The chain of used chunks. Used chunks have had all their elements 675 * allocated at least once. 676 */ 677 chunk *used; 678 679 /** 680 * The fresh chunk. This is only used if no elements are available in 681 * the used chain. 682 */ 683 chunk *fresh; 684 685 /** 686 * The next element in the fresh chunk. Because we don't worry about 687 * the free list in the fresh chunk, we need to keep track of the next 688 * fresh element to use. 689 */ 690 uint nextFresh; 691 692 /** 693 * Allocate a T* 694 */ 695 T* allocate() 696 { 697 if(used !is null && used.numFree > 0) 698 { 699 // 700 // allocate one element of the used list 701 // 702 T* result = used.allocateFromFree(); 703 if(used.numFree == 0) 704 // 705 // move used to the end of the list 706 // 707 used = used.next; 708 return result; 709 } 710 711 // 712 // no used elements are available, allocate out of the fresh 713 // elements 714 // 715 if(fresh is null) 716 { 717 fresh = new chunk; 718 nextFresh = 0; 719 } 720 721 T* result = &fresh.elems[nextFresh]; 722 if(++nextFresh == chunkSize) 723 { 724 if(used is null) 725 { 726 used = fresh; 727 fresh.next = fresh; 728 fresh.prev = fresh; 729 } 730 else 731 { 732 // 733 // insert fresh into the used chain 734 // 735 fresh.prev = used.prev; 736 fresh.next = used; 737 fresh.prev.next = fresh; 738 fresh.next.prev = fresh; 739 if(fresh.numFree != 0) 740 { 741 // 742 // can recycle elements from fresh 743 // 744 used = fresh; 745 } 746 } 747 fresh = null; 748 } 749 return result; 750 } 751 752 T*[] allocate(uint count) 753 { 754 return new T*[count]; 755 } 756 757 758 /** 759 * free a T* 760 */ 761 void collect(T* t) 762 { 763 // 764 // need to figure out which chunk t is in 765 // 766 chunk *cur = cast(chunk *)GC.addrOf(t); 767 768 if(cur !is fresh && cur.numFree == 0) 769 { 770 // 771 // move cur to the front of the used list, it has free nodes 772 // to be used. 773 // 774 if(cur !is used) 775 { 776 if(used.numFree != 0) 777 { 778 // 779 // first, unlink cur from its current location 780 // 781 cur.prev.next = cur.next; 782 cur.next.prev = cur.prev; 783 784 // 785 // now, insert cur before used. 786 // 787 cur.prev = used.prev; 788 cur.next = used; 789 used.prev = cur; 790 cur.prev.next = cur; 791 } 792 used = cur; 793 } 794 } 795 796 if(cur.deallocate(t)) 797 { 798 // 799 // cur no longer has any elements in use, it can be deleted. 800 // 801 if(cur.next is cur) 802 { 803 // 804 // only one element, don't free it. 805 // 806 } 807 else 808 { 809 // 810 // remove cur from list 811 // 812 if(used is cur) 813 { 814 // 815 // update used pointer 816 // 817 used = used.next; 818 } 819 cur.next.prev = cur.prev; 820 cur.prev.next = cur.next; 821 delete cur; 822 } 823 } 824 } 825 826 void collect(T*[] t) 827 { 828 if(t) 829 delete t; 830 } 831 832 /** 833 * Deallocate all chunks used by this allocator. Depends on the GC to do 834 * the actual collection 835 */ 836 bool collect(bool all = true) 837 { 838 used = null; 839 840 // 841 // keep fresh around 842 // 843 if(fresh !is null) 844 { 845 nextFresh = 0; 846 fresh.freeList = null; 847 } 848 849 return true; 850 } 851 852 void config (size_t chunks, int allocate=0) 853 { 854 } 855 } 856 857 /*********************************************************************** 858 859 aliases to the correct Default allocator depending on how big 860 the type is. It makes less sense to use a GCChunk allocator 861 if the type is going to be larger than a page (currently there 862 is no way to get the page size from the GC, so we assume 4096 863 bytes). If not more than one unit can fit into a page, then 864 we use the default GC allocator. 865 866 ***********************************************************************/ 867 template DefaultCollect(T) 868 { 869 static if((T).sizeof + ((void*).sizeof * 3) + uint.sizeof >= 4095 / 2) 870 { 871 alias Collect!(T) DefaultCollect; 872 } 873 else 874 { 875 alias GCChunk!(T, (4095 - ((void *).sizeof * 3) - uint.sizeof) / (T).sizeof) DefaultCollect; 876 } 877 // TODO: see if we can automatically figure out whether a type has 878 // any pointers in it, this would allow automatic usage of the 879 // Chunk allocator for added speed. 880 } 881 } 882 else 883 template DefaultCollect(T) {alias ChunkGC!(T) DefaultCollect;} 884 885 }