1 /******************************************************************************* 2 3 Template for a class implementing a set of buckets containing elements 4 indexed by unique keys. The bucket set contains both a set of buckets and a 5 pool of bucket elements. The bucket elements are structured as linked lists, 6 thus each bucket simply contains a pointer to its first element. 7 8 The number of buckets in the set is always a power of 2. In this way the 9 getBucket() method, which determines which bucket is responsible for a key, 10 can use a simple bit mask instead of a modulo operation, leading to greater 11 efficiency. 12 13 The method of bucket element allocation and pool management can be 14 customised by passing a custom IAllocator implementation to the constructor. 15 The default implementation--the BucketSet.FreeBuckets class--uses 16 'new Bucket.Element' for allocation and manages the pool as a linked list of 17 bucket elements. Possible alternative implementations include leaving the 18 pool management up to an external memory manager such as the D or C runtime 19 library using 'new'/'delete' or malloc()/free(), respectively. Also, if the 20 maximum number of elements in the map is known in advance, all elements can 21 be preallocated in a row. 22 23 Usage: 24 See ocean.util.container.map.HashMap & ocean.util.container.map.HashSet 25 26 Copyright: 27 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 28 All rights reserved. 29 30 License: 31 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 32 Alternatively, this file may be distributed under the terms of the Tango 33 3-Clause BSD License (see LICENSE_BSD.txt for details). 34 35 *******************************************************************************/ 36 37 module ocean.util.container.map.model.BucketSet; 38 39 import ocean.meta.types.Qualifiers; 40 41 import ocean.util.container.map.model.Bucket, 42 ocean.util.container.map.model.BucketInfo, 43 ocean.util.container.map.model.IAllocator; 44 45 import ocean.core.Array: clear, isClearable; 46 47 import ocean.core.BitManip: bsr; 48 49 import ocean.util.container.map.model.BucketElementGCAllocator; 50 51 version (unittest) import ocean.core.Test; 52 53 /****************************************************************************** 54 55 Generic BucketSet base class 56 57 ******************************************************************************/ 58 59 public abstract class IBucketSet 60 { 61 import ocean.core.Verify; 62 63 /************************************************************************** 64 65 Convenience type alias for subclasses. 66 67 **************************************************************************/ 68 69 alias .IAllocator IAllocator; 70 71 /************************************************************************** 72 73 Map and and bucket statistics like the map length or the number of 74 buckets. 75 76 **************************************************************************/ 77 78 public BucketInfo bucket_info; 79 80 /************************************************************************** 81 82 Bucket element allocator. 83 84 **************************************************************************/ 85 86 protected IAllocator bucket_element_allocator; 87 88 /************************************************************************** 89 90 Bit mask used by the getBucket() method to determine which bucket is 91 responsible for a key. 92 93 **************************************************************************/ 94 95 private size_t bucket_mask; 96 97 /************************************************************************** 98 99 Constructor. 100 101 Params: 102 bucket_element_allocator = bucket element allocator 103 n = expected number of elements in mapping 104 load_factor = ratio of n to the number of internal buckets. The 105 desired (approximate) number of elements per bucket. For 106 example, 0.5 sets the number of buckets to double n; for 2 the 107 number of buckets is the half of n. load_factor must be greater 108 than 0. The load factor is basically a trade-off between memory 109 usage (number of buckets) and search time (number of elements 110 per bucket). 111 112 **************************************************************************/ 113 114 protected this ( IAllocator bucket_element_allocator, size_t n, float load_factor = 0.75 ) 115 { 116 size_t num_buckets = 1 << this.calcNumBucketsExp2(n, load_factor); 117 118 this.bucket_mask = num_buckets - 1; 119 120 this.bucket_info = new BucketInfo(num_buckets); 121 this.bucket_element_allocator = bucket_element_allocator; 122 } 123 124 /************************************************************************** 125 126 Get the length of the buckets. 127 128 Note: In the D2 port we should use subtyping via 'alias this' and 129 avoid these forwarding functions. 130 131 Returns: 132 the length of the buckets. 133 134 **************************************************************************/ 135 136 public final size_t length ( ) 137 { 138 return this.bucket_info.length; 139 } 140 141 /*************************************************************************** 142 143 Removes all elements from all buckets. 144 145 Note: 146 Beware that calling this method is known to sometimes cause 147 unexpected behaviour when the bucket-set is reused afterwards (where 148 cyclic links are introduced). 149 If you are using one of the of the children *Map classes then 150 call clearErase() instead as it has been reported to properly clear 151 the map. 152 153 Returns: 154 this instance 155 156 **************************************************************************/ 157 158 public typeof(this) clear ( ) 159 { 160 this.clear_(); 161 162 return this; 163 } 164 165 /*************************************************************************** 166 167 Changes the number of buckets to 2 ^ exp2. 168 169 Params: 170 exp2 = exponent of 2 of the new number of buckets 171 172 Returns: 173 this instance. 174 175 In: 176 2 ^ exp2 must fit into size_t. 177 178 ***************************************************************************/ 179 180 abstract typeof (this) setNumBuckets ( uint exp2 ); 181 182 /*************************************************************************** 183 184 Changes the number of buckets to the lowest power of 2 that results in a 185 load factor of at least load_factor with the current number of elements. 186 187 Params: 188 load_factor = factor of n / number of buckets 189 190 Returns: 191 this instance. 192 193 In: 194 load_factor must be greater than 0. 195 196 ***************************************************************************/ 197 198 public typeof (this) redistribute ( float load_factor = 0.75 ) 199 { 200 verify (load_factor > 0); 201 202 return this.setNumBuckets(this.calcNumBucketsExp2(this.bucket_info.length, load_factor)); 203 } 204 205 /*************************************************************************** 206 207 Removes all elements from all buckets and sets the values to val_init if 208 val_init is not empty. 209 210 Params: 211 val_init = initial element value 212 213 Returns: 214 this instance 215 216 **************************************************************************/ 217 218 protected typeof(this) clear_ ( void[] val_init = null ) 219 { 220 this.clearBuckets(val_init); 221 222 this.bucket_info.clear(); 223 224 return this; 225 } 226 227 /*************************************************************************** 228 229 Removes all elements from all buckets. 230 231 Returns: 232 this instance 233 234 **************************************************************************/ 235 236 abstract protected void clearBuckets ( void[] val_init ); 237 238 /*************************************************************************** 239 240 Calculates the lowest exponent of 2 so that a power of 2 with this 241 exponent is at least n / load_factor. 242 243 Params: 244 n = number of expected elements in the set 245 load_factor = desired maximum load factor 246 247 Returns: 248 exponent of 2. 249 250 In: 251 load_factor must be greater than 0. 252 253 ***************************************************************************/ 254 255 public static uint calcNumBucketsExp2 ( size_t n, float load_factor = 0.75 ) 256 { 257 verify (load_factor > 0); 258 259 return n? bsr(cast(size_t)(n / load_factor)) + 1 : 0; 260 } 261 } 262 263 /****************************************************************************** 264 265 Bucket set class template. 266 267 Params: 268 V = value size (.sizeof of the value type), may be 0 to store no value 269 K = key type 270 271 ******************************************************************************/ 272 273 public abstract class BucketSet ( size_t V, K = hash_t ) : IBucketSet 274 { 275 import ocean.core.Verify; 276 277 /************************************************************************** 278 279 Bucket type 280 281 **************************************************************************/ 282 283 public alias .Bucket!(V, K) Bucket; 284 285 286 /*************************************************************************** 287 288 List of buckets 289 290 ***************************************************************************/ 291 292 private Bucket[] buckets; 293 294 /*************************************************************************** 295 296 Constructor, uses the default implementation for the bucket element 297 allocator: Elements are allocated by 'new' and stored in a free list. 298 299 Sets the number of buckets to n / load_factor, rounded up to the nearest 300 power or 2. 301 302 Params: 303 n = expected number of elements in bucket set 304 load_factor = ratio of n to the number of buckets. The desired 305 (approximate) number of elements per bucket. For example, 0.5 306 sets the number of buckets to double n; for 2 the number of 307 buckets is the half of n. load_factor must be greater than 0 308 (this is asserted in IBucketSet.calcNumBucketsExp2()). The load 309 factor is basically a trade-off between memory usage (number of 310 buckets) and search time (number of elements per bucket). 311 312 ***************************************************************************/ 313 314 protected this ( size_t n, float load_factor = 0.75 ) 315 { 316 auto allocator = new BucketElementGCAllocator!(Bucket)(); 317 this(allocator, n, load_factor); 318 } 319 320 /*************************************************************************** 321 322 Constructor. 323 324 Sets the number of buckets to n / load_factor, rounded up to the nearest 325 power or 2. 326 327 Params: 328 allocator = allocator to use to allocate with 329 n = expected number of elements in bucket set 330 load_factor = ratio of n to the number of buckets. The desired 331 (approximate) number of elements per bucket. For example, 0.5 332 sets the number of buckets to double n; for 2 the number of 333 buckets is the half of n. load_factor must be greater than 0 334 (this is asserted in IBucketSet.calcNumBucketsExp2()). The load 335 factor is basically a trade-off between memory usage (number of 336 buckets) and search time (number of elements per bucket). 337 338 ***************************************************************************/ 339 340 protected this ( IAllocator allocator, size_t n, float load_factor = 0.75 ) 341 { 342 super(allocator, n, load_factor); 343 344 this.buckets = new Bucket[this.bucket_info.num_buckets]; 345 } 346 347 /************************************************************************** 348 349 Ensures that Bucket.init consists only of zero bytes so that the 350 memset() method in clear() will work. 351 352 **************************************************************************/ 353 354 unittest 355 { 356 test(isClearable!(Bucket), 357 Bucket.stringof ~ ".init contains non-zero byte: " ~ 358 typeof (this).stringof ~ ".clear_() will not work"); 359 } 360 361 /*************************************************************************** 362 363 Looks up a mapping from the specified key. 364 365 Params: 366 key = key to look up mapping for 367 must_exist = true: verify that the mapping exists, false: the 368 mapping may or may not exist 369 370 Returns: 371 a pointer to the element mapped to by the specified key or null if 372 not found and must_exist is false. 373 The caller should make sure that the key is not changed. 374 375 Out: 376 - The returned array can only be null if must_exist is false. 377 - The length of the returned array is V unless the array is null. 378 379 ***************************************************************************/ 380 381 final protected Bucket.Element* get_ ( in K key, bool must_exist = false ) 382 out (element) 383 { 384 // FIXME: Disabled due to DMD bug 6417, the method parameter argument 385 // values are junk inside this contract. 386 387 version (none) 388 { 389 if (element) 390 { 391 assert (element.key == key, "key mismatch"); 392 } 393 else 394 { 395 assert (!must_exist, "element not found"); 396 } 397 } 398 } 399 do 400 { 401 auto element = this 402 .buckets[this.toHash(key) & this.bucket_mask] 403 .find(key); 404 405 if (element) 406 { 407 // cast(bool) to handle Key==Object: opEquals returns int 408 verify (cast(bool)(element.key == key), "key mismatch"); 409 } 410 else 411 { 412 verify (!must_exist, "element not found"); 413 } 414 415 return element; 416 } 417 418 /*************************************************************************** 419 420 Adds or updates a mapping from the specified key. 421 422 Params: 423 key = key to add/update mapping for 424 added = set to true if the record did not exist but was added 425 426 Returns: 427 a pointer to the element mapped to by the specified key. The caller 428 should set the value as desired and make sure that the key is not 429 changed. 430 431 ***************************************************************************/ 432 433 final protected Bucket.Element* put_ ( K key, out bool added ) 434 out (element) 435 { 436 // FIXME: Disabled due to DMD bug 6417, the method parameter argument 437 // values are junk inside this contract. 438 439 version (none) 440 { 441 assert (element !is null); 442 443 assert (element.key == key, "key mismatch"); 444 } 445 } 446 do 447 { 448 size_t bucket_index = this.toHash(key) & this.bucket_mask; 449 450 with (this.buckets[bucket_index]) 451 { 452 auto element = add(key, 453 { 454 added = true; 455 456 if (has_element) 457 { 458 this.bucket_info.put(bucket_index); 459 } 460 else 461 { 462 this.bucket_info.create(bucket_index); 463 } 464 465 return cast (Bucket.Element*) this.bucket_element_allocator.get(); 466 }()); 467 468 verify (element !is null); 469 470 // cast(bool) to handle Key==Object: opEquals returns int 471 verify (cast(bool)(element.key == key), "key mismatch"); 472 473 return element; 474 } 475 } 476 477 /*************************************************************************** 478 479 Adds or updates a mapping from the specified key. 480 481 Params: 482 key = key to add/update mapping for 483 484 Returns: 485 the element mapped to by the specified key. The caller should set 486 the value as desired and make sure that the key is not changed. 487 488 ***************************************************************************/ 489 490 final protected Bucket.Element* put_ ( K key ) 491 { 492 bool added; 493 494 return this.put_(key, added); 495 } 496 497 /*************************************************************************** 498 499 Removes the mapping for the specified key and optionally invokes dg with 500 the value that is about to be removed. 501 502 Note that, if references to GC-allocated objects (objects or dynamic 503 arrays), it is a good idea to set the value of the element referenced 504 by the element parameter of the callback delegate to null to avoid these 505 objects from being prevented from garbage collection. In general 506 pointers should be set to null for the same reason and to avoid dangling 507 pointers. 508 509 If the default allocator is used (that is, no allocator instance was 510 passed to the constructor), the value of the element referenced 511 by the element parameter of the callback delegate dg is accessible and 512 remains unchanged after dg returned until the next call to put() or 513 clear(). 514 515 Params: 516 key = key to remove mapping for 517 dg = optional delegate to call with the removed element value (not 518 called if key was not found) 519 520 Returns: 521 true if key was found in the map or false if not. In case of false 522 dg was not called. 523 524 ***************************************************************************/ 525 526 final protected bool remove_ ( K key, scope void delegate ( ref Bucket.Element element ) dg = null ) 527 { 528 size_t bucket_index = this.toHash(key) & this.bucket_mask; 529 530 Bucket.Element* element = this.buckets[bucket_index].remove(key); 531 532 scope (exit) if ( element ) 533 { 534 this.bucket_info.remove(bucket_index); 535 536 if (dg) 537 { 538 dg(*element); 539 } 540 541 this.bucket_element_allocator.recycle(element); 542 } 543 544 return !!element; 545 } 546 547 /*************************************************************************** 548 549 Removes the mapping for the specified key. 550 551 Params: 552 key = key to remove mapping for 553 554 Returns: 555 true if key was found and the mapping removed or false otherwise. 556 557 ***************************************************************************/ 558 559 public bool remove ( K key ) 560 { 561 return this.remove_(key); 562 } 563 564 /*************************************************************************** 565 566 Calculates the hash value from key. 567 568 Params: 569 key = key to hash 570 571 Returns: 572 the hash value that corresponds to key. 573 574 ***************************************************************************/ 575 576 abstract public hash_t toHash ( in K key ); 577 578 /*************************************************************************** 579 580 Changes the number of buckets to 2 ^ exp2. 581 582 Params: 583 exp2 = exponent of 2 of the new number of buckets 584 585 Returns: 586 this instance. 587 588 In: 589 2 ^ exp2 must fit into size_t. 590 591 ***************************************************************************/ 592 593 public override typeof (this) setNumBuckets ( uint exp2 ) 594 { 595 verify (exp2 < size_t.sizeof * 8); 596 597 size_t n_prev = this.buckets.length, 598 n_new = 1 << exp2; 599 600 if (n_prev != n_new) 601 { 602 // Park the bucket elements that are currently in the set. 603 604 this.bucket_element_allocator.parkElements(this.bucket_info.length, 605 (IAllocator.IParkingStack parked_elements) 606 { 607 scope Iterator it = this.new Iterator(true); 608 609 foreach (ref element; it) 610 { 611 parked_elements.push(&element); 612 } 613 614 // Resize the array of buckets and the bucket_info and calculate 615 // the new bucket_mask. 616 617 assumeSafeAppend(this.buckets); 618 this.buckets.length = n_new; 619 620 .clear(this.buckets[0 .. (n_prev < $)? n_prev : $]); 621 622 this.bucket_info.clearResize(n_new); 623 624 this.bucket_mask = n_new - 1; 625 626 // Put the parked elements back into the buckets. 627 628 foreach (element_; parked_elements) 629 { 630 auto element = cast (Bucket.Element*) element_, 631 bucket_index = this.toHash(element.key) & this.bucket_mask; 632 633 if (this.bucket_info[bucket_index]) 634 { 635 verify (this.buckets[bucket_index].has_element, 636 "bucket with non-zero length has no element"); 637 } 638 else 639 { 640 verify (!this.bucket_info[bucket_index], 641 "bucket with zero length has an element"); 642 } 643 644 this.bucket_info.put(bucket_index); 645 646 this.buckets[bucket_index].add(element); 647 } 648 }); 649 } 650 651 return this; 652 } 653 654 /*************************************************************************** 655 656 Removes all elements from all buckets and sets the values to val_init if 657 val_init is not empty. 658 659 Params: 660 val_init = initial element value, the length must be V or 0 661 662 In: 663 val_init.length must be V. 664 665 Out: 666 all the buckets.first are set to null 667 668 ***************************************************************************/ 669 670 protected override void clearBuckets ( void[] val_init = null ) 671 out 672 { 673 foreach(bucket; this.buckets) 674 { 675 assert(bucket.first == null, "non-Null first bucket element found"); 676 } 677 } 678 do 679 { 680 verify (!val_init.length || val_init.length == V); 681 682 // Recycle all bucket elements. 683 684 scope Iterator it = this.new Iterator(true); 685 686 foreach (ref element; it) 687 { 688 static if (V) if (val_init.length) 689 { 690 element.val[] = cast (ubyte[]) val_init[]; 691 } 692 693 this.bucket_element_allocator.recycle(&element); 694 } 695 696 // Clear bucket contents. 697 .clear(this.buckets); 698 } 699 700 /*************************************************************************** 701 702 'foreach' iteration over elements in the set. 703 DO NOT change the element keys during iteration because this will 704 corrupt the map (unless it is guaranteed that the element will go to the 705 same bucket). 706 707 ***************************************************************************/ 708 709 protected class Iterator 710 { 711 /*********************************************************************** 712 713 Whether to reset the counter after each foreach 714 715 ***********************************************************************/ 716 717 protected bool reset_after_foreach = true; 718 719 /*********************************************************************** 720 721 Index of the last bucket that was iterated in the last call to 722 foreach 723 724 ***********************************************************************/ 725 726 protected size_t last_bucket_index; 727 728 /*********************************************************************** 729 730 Last element within the last bucket that was iterated 731 732 ***********************************************************************/ 733 734 protected size_t last_bucket_element; 735 736 /*********************************************************************** 737 738 Total count of the elements currently iterated 739 740 ***********************************************************************/ 741 742 protected size_t counter; 743 744 /*********************************************************************** 745 746 Ctor 747 748 Params: 749 reset_after_foreach = whether to reset iteration counters 750 after a foreach (true) or not (false) 751 752 ***********************************************************************/ 753 754 public this ( bool reset_after_foreach = false ) 755 { 756 this.reset_after_foreach = reset_after_foreach; 757 } 758 759 /*********************************************************************** 760 761 Reset the counters, effectively forcing any interrupted iteration to 762 start from the beginning. 763 764 ***********************************************************************/ 765 766 public void reset ( ) 767 { 768 this.last_bucket_index = 0; 769 this.last_bucket_element = 0; 770 this.counter = 0; 771 } 772 773 /*********************************************************************** 774 775 if reset_after_foreach is true: 776 resets the counters after each foreach so the next iteration 777 starts from the beginning 778 779 if reset_after_foreach is false: 780 resets the counters only when the whole map was iterated 781 782 Params: 783 interrupted = whether the last foreach call was interrupted 784 using break (true) or not (false) 785 786 ***********************************************************************/ 787 788 protected void resetIterator ( bool interrupted ) 789 { 790 if ( reset_after_foreach || !interrupted ) 791 { 792 this.reset(); 793 } 794 } 795 796 /*********************************************************************** 797 798 Foreach support 799 800 ***********************************************************************/ 801 802 final protected int opApply ( scope int delegate ( ref Bucket.Element element ) dg ) 803 { 804 int tmpDg ( ref size_t i, ref Bucket.Element e ) 805 { 806 return dg(e); 807 } 808 809 return this.opApply(&tmpDg); 810 } 811 812 /*********************************************************************** 813 814 Foreach support with counter 815 816 Instead of remembering the exact pointer we last iterated upon a 817 break, we remember the index within the linked list and re-iterate 818 to that index next time we're called. Using a pointer for this 819 problem would be problematic, probably (alliteration!), because the 820 linked list could change drastically in the mean time. Elements 821 could be removed, especially the one we were remembering. adding 822 checks to make that safe is a lot of hassle and not worth it. 823 824 As buckets are supposed to be very small anyway, we just remember 825 the index and if the list finished before we reach that index, so be 826 it, we just use the next bucket then. 827 828 ***********************************************************************/ 829 830 final protected int opApply ( scope int delegate ( ref size_t i, 831 ref Bucket.Element element ) dg ) 832 { 833 int result = 0; 834 835 scope (exit) 836 { 837 this.resetIterator(result != 0); 838 } 839 840 if ( this.outer.bucket_info.num_filled_buckets < this.last_bucket_index ) 841 { 842 this.last_bucket_index = this.outer 843 .bucket_info 844 .num_filled_buckets; 845 } 846 847 auto remaining_buckets = this.outer 848 .bucket_info 849 .filled_buckets[this.last_bucket_index .. $]; 850 851 top: foreach (info; remaining_buckets) 852 { 853 with (this.outer.buckets[info.index]) 854 { 855 size_t bucket_element_counter = 0; 856 verify (has_element); 857 858 for ( auto element = first; element !is null; ) 859 { 860 /* element.next needs to be stored before calling dg 861 * because now dg will modifiy element.next if it 862 * returns the element to the free list. */ 863 auto next = element.next; 864 865 if ( bucket_element_counter == this.last_bucket_element ) 866 { 867 result = dg(this.counter, *element); 868 869 this.counter++; 870 this.last_bucket_element++; 871 } 872 873 bucket_element_counter++; 874 875 element = next; 876 877 if (result) break top; 878 } 879 } 880 881 this.last_bucket_index++; 882 this.last_bucket_element = 0; 883 } 884 885 return result; 886 } 887 } 888 }