1 /******************************************************************************* 2 3 Based upon Doug Lea's Java collection package 4 5 Copyright: 6 Copyright (c) 2008 Kris Bell. 7 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 8 All rights reserved. 9 10 License: 11 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 12 See LICENSE_TANGO.txt for details. 13 14 Version: Apr 2008: Initial release 15 16 Authors: Kris 17 18 *******************************************************************************/ 19 20 module ocean.util.container.HashSet; 21 22 import ocean.util.container.Slink; 23 24 public import ocean.util.container.Container; 25 26 import ocean.util.container.model.IContainer; 27 28 /******************************************************************************* 29 30 Hash table implementation of a Set 31 32 --- 33 Iterator iterator () 34 int opApply (int delegate(ref V value) dg) 35 36 bool add (V element) 37 bool contains (V element) 38 bool take (ref V element) 39 bool remove (V element) 40 size_t remove (IContainer!(V) e) 41 bool replace (V oldElement, V newElement) 42 43 size_t size () 44 bool isEmpty () 45 V[] toArray (V[] dst) 46 HashSet dup () 47 HashSet clear () 48 HashSet reset () 49 50 size_t buckets () 51 void buckets (size_t cap) 52 float threshold () 53 void threshold (float desired) 54 --- 55 56 *******************************************************************************/ 57 58 class HashSet (V, alias Hash = Container.hash, 59 alias Reap = Container.reap, 60 alias Heap = Container.DefaultCollect) 61 : IContainer!(V) 62 { 63 import ocean.core.Verify; 64 65 // use this type for Allocator configuration 66 public alias Slink!(V) Type; 67 68 private alias Type *Ref; 69 70 private alias Heap!(Type) Alloc; 71 72 // Each table entry is a list - null if no table allocated 73 private Ref[] table; 74 75 // number of elements contained 76 private size_t count; 77 78 // the threshold load factor 79 private float loadFactor; 80 81 // configured heap manager 82 private Alloc heap; 83 84 // mutation tag updates on each change 85 private size_t mutation; 86 87 /*********************************************************************** 88 89 Construct a HashSet instance 90 91 ***********************************************************************/ 92 93 this (float f = Container.defaultLoadFactor) 94 { 95 loadFactor = f; 96 } 97 98 /*********************************************************************** 99 100 Clean up when deleted 101 102 ***********************************************************************/ 103 104 ~this () 105 { 106 reset; 107 } 108 109 /*********************************************************************** 110 111 Return a generic iterator for contained elements 112 113 ***********************************************************************/ 114 115 final Iterator iterator () 116 { 117 Iterator i = void; 118 i.mutation = mutation; 119 i.table = table; 120 i.owner = this; 121 i.cell = null; 122 i.row = 0; 123 return i; 124 } 125 126 /*********************************************************************** 127 128 129 ***********************************************************************/ 130 131 final int opApply (scope int delegate(ref V value) dg) 132 { 133 return iterator.opApply (dg); 134 } 135 136 /*********************************************************************** 137 138 Return the number of elements contained 139 140 ***********************************************************************/ 141 142 final size_t size () 143 { 144 return count; 145 } 146 147 /*********************************************************************** 148 149 Add a new element to the set. Does not add if there is an 150 equivalent already present. Returns true where an element 151 is added, false where it already exists 152 153 Time complexity: O(1) average; O(n) worst. 154 155 ***********************************************************************/ 156 157 final bool add (V element) 158 { 159 if (table is null) 160 resize (Container.defaultInitialBuckets); 161 162 auto h = Hash (element, table.length); 163 auto hd = table[h]; 164 165 if (hd && hd.find (element)) 166 return false; 167 168 table[h] = allocate.set (element, hd); 169 increment; 170 171 // only check if bin was nonempty 172 if (hd) 173 checkLoad; 174 return true; 175 } 176 177 /*********************************************************************** 178 179 Does this set contain the given element? 180 181 Time complexity: O(1) average; O(n) worst 182 183 ***********************************************************************/ 184 185 final bool contains (V element) 186 { 187 if (count) 188 { 189 auto p = table[Hash (element, table.length)]; 190 if (p && p.find (element)) 191 return true; 192 } 193 return false; 194 } 195 196 /*********************************************************************** 197 198 Make an independent copy of the container. Does not clone 199 elements 200 201 Time complexity: O(n) 202 203 ***********************************************************************/ 204 205 final HashSet dup () 206 { 207 auto clone = new HashSet!(V, Hash, Reap, Heap) (loadFactor); 208 209 if (count) 210 { 211 clone.buckets (buckets); 212 213 foreach (value; iterator) 214 clone.add (value); 215 } 216 return clone; 217 } 218 219 /*********************************************************************** 220 221 Remove the provided element. Returns true if found, false 222 otherwise 223 224 Time complexity: O(1) average; O(n) worst 225 226 ***********************************************************************/ 227 228 final size_t remove (V element, bool all) 229 { 230 return remove(element) ? 1 : 0; 231 } 232 233 /*********************************************************************** 234 235 Remove the provided element. Returns true if found, false 236 otherwise 237 238 Time complexity: O(1) average; O(n) worst 239 240 ***********************************************************************/ 241 242 final bool remove (V element) 243 { 244 if (count) 245 { 246 auto h = Hash (element, table.length); 247 auto hd = table[h]; 248 auto trail = hd; 249 auto p = hd; 250 251 while (p) 252 { 253 auto n = p.next; 254 if (element == p.value) 255 { 256 decrement (p); 257 if (p is table[h]) 258 { 259 table[h] = n; 260 trail = n; 261 } 262 else 263 trail.next = n; 264 return true; 265 } 266 else 267 { 268 trail = p; 269 p = n; 270 } 271 } 272 } 273 return false; 274 } 275 276 /*********************************************************************** 277 278 Replace the first instance of oldElement with newElement. 279 Returns true if oldElement was found and replaced, false 280 otherwise. 281 282 ***********************************************************************/ 283 284 final size_t replace (V oldElement, V newElement, bool all) 285 { 286 return replace (oldElement, newElement) ? 1 : 0; 287 } 288 289 /*********************************************************************** 290 291 Replace the first instance of oldElement with newElement. 292 Returns true if oldElement was found and replaced, false 293 otherwise. 294 295 ***********************************************************************/ 296 297 final bool replace (V oldElement, V newElement) 298 { 299 300 if (count && oldElement != newElement) 301 if (contains (oldElement)) 302 { 303 remove (oldElement); 304 add (newElement); 305 return true; 306 } 307 return false; 308 } 309 310 /*********************************************************************** 311 312 Remove and expose the first element. Returns false when no 313 more elements are contained 314 315 Time complexity: O(n) 316 317 ***********************************************************************/ 318 319 final bool take (ref V element) 320 { 321 if (count) 322 foreach (ref list; table) 323 if (list) 324 { 325 auto p = list; 326 element = p.value; 327 list = p.next; 328 decrement (p); 329 return true; 330 } 331 return false; 332 } 333 334 /*********************************************************************** 335 336 ************************************************************************/ 337 338 public void add (IContainer!(V) e) 339 { 340 foreach (value; e) 341 add (value); 342 } 343 344 /*********************************************************************** 345 346 ************************************************************************/ 347 348 public size_t remove (IContainer!(V) e) 349 { 350 size_t c; 351 foreach (value; e) 352 if (remove (value)) 353 ++c; 354 return c; 355 } 356 357 /*********************************************************************** 358 359 Clears the HashMap contents. Various attributes are 360 retained, such as the internal table itself. Invoke 361 reset() to drop everything. 362 363 Time complexity: O(n) 364 365 ***********************************************************************/ 366 367 final HashSet clear () 368 { 369 return clear (false); 370 } 371 372 /*********************************************************************** 373 374 Reset the HashSet contents and optionally configure a new 375 heap manager. This releases more memory than clear() does 376 377 Time complexity: O(1) 378 379 ***********************************************************************/ 380 381 final HashSet reset () 382 { 383 clear (true); 384 heap.collect (table); 385 table = null; 386 return this; 387 } 388 389 /*********************************************************************** 390 391 Return the number of buckets 392 393 Time complexity: O(1) 394 395 ***********************************************************************/ 396 397 final size_t buckets () 398 { 399 return table ? table.length : 0; 400 } 401 402 /*********************************************************************** 403 404 Set the number of buckets and resize as required 405 406 Time complexity: O(n) 407 408 ***********************************************************************/ 409 410 final HashSet buckets (size_t cap) 411 { 412 if (cap < Container.defaultInitialBuckets) 413 cap = Container.defaultInitialBuckets; 414 415 if (cap !is buckets) 416 resize (cap); 417 return this; 418 } 419 420 /*********************************************************************** 421 422 Return the resize threshold 423 424 Time complexity: O(1) 425 426 ***********************************************************************/ 427 428 final float threshold () 429 { 430 return loadFactor; 431 } 432 433 /*********************************************************************** 434 435 Set the resize threshold, and resize as required 436 437 Time complexity: O(n) 438 439 ***********************************************************************/ 440 441 final void threshold (float desired) 442 { 443 verify (desired > 0.0); 444 loadFactor = desired; 445 if (table) 446 checkLoad; 447 } 448 449 /*********************************************************************** 450 451 Configure the assigned allocator with the size of each 452 allocation block (number of nodes allocated at one time) 453 and the number of nodes to pre-populate the cache with. 454 455 Time complexity: O(n) 456 457 ***********************************************************************/ 458 459 final HashSet cache (size_t chunk, size_t count=0) 460 { 461 heap.config (chunk, count); 462 return this; 463 } 464 465 /*********************************************************************** 466 467 Copy and return the contained set of values in an array, 468 using the optional dst as a recipient (which is resized 469 as necessary). 470 471 Returns a slice of dst representing the container values. 472 473 Time complexity: O(n) 474 475 ***********************************************************************/ 476 477 final V[] toArray (V[] dst = null) 478 { 479 if (dst.length < count) 480 dst.length = count; 481 482 size_t i = 0; 483 foreach (v; this) 484 dst[i++] = v; 485 return dst [0 .. count]; 486 } 487 488 /*********************************************************************** 489 490 Is this container empty? 491 492 Time complexity: O(1) 493 494 ***********************************************************************/ 495 496 final bool isEmpty () 497 { 498 return count is 0; 499 } 500 501 /*********************************************************************** 502 503 Sanity check 504 505 ***********************************************************************/ 506 507 final HashSet check() 508 { 509 verify(!(table is null && count !is 0)); 510 verify((table is null || table.length > 0)); 511 verify(loadFactor > 0.0f); 512 513 if (table) 514 { 515 size_t c = 0; 516 for (size_t i = 0; i < table.length; ++i) 517 { 518 for (auto p = table[i]; p; p = p.next) 519 { 520 ++c; 521 verify(contains(p.value)); 522 verify(Hash (p.value, table.length) is i); 523 } 524 } 525 verify(c is count); 526 } 527 return this; 528 } 529 530 /*********************************************************************** 531 532 Allocate a node instance. This is used as the default allocator 533 534 ***********************************************************************/ 535 536 private Ref allocate () 537 { 538 return heap.allocate; 539 } 540 541 /*********************************************************************** 542 543 Check to see if we are past load factor threshold. If so, 544 resize so that we are at half of the desired threshold. 545 546 ***********************************************************************/ 547 548 private void checkLoad () 549 { 550 float fc = count; 551 float ft = table.length; 552 if (fc / ft > loadFactor) 553 resize (2 * cast(size_t)(fc / loadFactor) + 1); 554 } 555 556 /*********************************************************************** 557 558 resize table to new capacity, rehashing all elements 559 560 ***********************************************************************/ 561 562 private void resize (size_t newCap) 563 { 564 //Stdout.formatln ("resize {}", newCap); 565 auto newtab = heap.allocate (newCap); 566 mutate; 567 568 foreach (bucket; table) 569 while (bucket) 570 { 571 auto n = bucket.next; 572 auto h = Hash (bucket.value, newCap); 573 bucket.next = newtab[h]; 574 newtab[h] = bucket; 575 bucket = n; 576 } 577 578 // release the prior table and assign new one 579 heap.collect (table); 580 table = newtab; 581 } 582 583 /*********************************************************************** 584 585 Remove the indicated node. We need to traverse buckets 586 for this, since we're singly-linked only. Better to save 587 the per-node memory than to gain a little on each remove 588 589 Used by iterators only 590 591 ***********************************************************************/ 592 593 private bool remove (Ref node, size_t row) 594 { 595 auto hd = table[row]; 596 auto trail = hd; 597 auto p = hd; 598 599 while (p) 600 { 601 auto n = p.next; 602 if (p is node) 603 { 604 decrement (p); 605 if (p is hd) 606 table[row] = n; 607 else 608 trail.next = n; 609 return true; 610 } 611 else 612 { 613 trail = p; 614 p = n; 615 } 616 } 617 return false; 618 } 619 620 /*********************************************************************** 621 622 Clears the HashSet contents. Various attributes are 623 retained, such as the internal table itself. Invoke 624 reset() to drop everything. 625 626 Time complexity: O(n) 627 628 ***********************************************************************/ 629 630 private HashSet clear (bool all) 631 { 632 mutate; 633 634 // collect each node if we can't collect all at once 635 if (heap.collect(all) is false) 636 foreach (ref v; table) 637 while (v) 638 { 639 auto n = v.next; 640 decrement (v); 641 v = n; 642 } 643 644 // retain table, but remove bucket chains 645 foreach (ref v; table) 646 v = null; 647 648 count = 0; 649 return this; 650 } 651 652 /*********************************************************************** 653 654 new element was added 655 656 ***********************************************************************/ 657 658 private void increment() 659 { 660 ++mutation; 661 ++count; 662 } 663 664 /*********************************************************************** 665 666 element was removed 667 668 ***********************************************************************/ 669 670 private void decrement (Ref p) 671 { 672 Reap (p.value); 673 heap.collect (p); 674 ++mutation; 675 --count; 676 } 677 678 /*********************************************************************** 679 680 set was changed 681 682 ***********************************************************************/ 683 684 private void mutate() 685 { 686 ++mutation; 687 } 688 689 /*********************************************************************** 690 691 Iterator with no filtering 692 693 ***********************************************************************/ 694 695 private struct Iterator 696 { 697 size_t row; 698 Ref cell, 699 prior; 700 Ref[] table; 701 HashSet owner; 702 size_t mutation; 703 704 /*************************************************************** 705 706 Did the container change underneath us? 707 708 ***************************************************************/ 709 710 bool valid () 711 { 712 return owner.mutation is mutation; 713 } 714 715 /*************************************************************** 716 717 Accesses the next value, and returns false when 718 there are no further values to traverse 719 720 ***************************************************************/ 721 722 bool next (ref V v) 723 { 724 auto n = next; 725 return (n) ? v = *n, true : false; 726 } 727 728 /*************************************************************** 729 730 Return a pointer to the next value, or null when 731 there are no further values to traverse 732 733 ***************************************************************/ 734 735 V* next () 736 { 737 while (cell is null) 738 if (row < table.length) 739 cell = table [row++]; 740 else 741 return null; 742 743 prior = cell; 744 cell = cell.next; 745 return &prior.value; 746 } 747 748 /*************************************************************** 749 750 Foreach support 751 752 ***************************************************************/ 753 754 int opApply (scope int delegate(ref V value) dg) 755 { 756 int result; 757 758 auto c = cell; 759 loop: while (true) 760 { 761 while (c is null) 762 if (row < table.length) 763 c = table [row++]; 764 else 765 break loop; 766 767 prior = c; 768 c = c.next; 769 if ((result = dg(prior.value)) != 0) 770 break loop; 771 } 772 773 cell = c; 774 return result; 775 } 776 777 /*************************************************************** 778 779 Remove value at the current iterator location 780 781 ***************************************************************/ 782 783 bool remove () 784 { 785 if (prior) 786 if (owner.remove (prior, row-1)) 787 { 788 // ignore this change 789 ++mutation; 790 return true; 791 } 792 793 prior = null; 794 return false; 795 } 796 } 797 } 798 799 800 801 /******************************************************************************* 802 803 *******************************************************************************/ 804 805 debug (HashSet) 806 { 807 import ocean.io.Stdout; 808 import ocean.core.Thread; 809 import ocean.time.StopWatch; 810 811 void main() 812 { 813 // usage examples ... 814 auto set = new HashSet!(char[]); 815 set.add ("foo"); 816 set.add ("bar"); 817 set.add ("wumpus"); 818 819 // implicit generic iteration 820 foreach (value; set) 821 Stdout (value).newline; 822 823 // explicit generic iteration 824 foreach (value; set.iterator) 825 Stdout (value).newline; 826 827 // generic iteration with optional remove 828 auto s = set.iterator; 829 foreach (value; s) 830 {} // s.remove; 831 832 // incremental iteration, with optional remove 833 char[] v; 834 auto iterator = set.iterator; 835 while (iterator.next(v)) 836 {} //iterator.remove; 837 838 // incremental iteration, with optional failfast 839 auto it = set.iterator; 840 while (it.valid && it.next(v)) 841 {} 842 843 // remove specific element 844 set.remove ("wumpus"); 845 846 // remove first element ... 847 while (set.take(v)) 848 Stdout.formatln ("taking {}, {} left", v, set.size); 849 850 851 // setup for benchmark, with a set of integers. We 852 // use a chunk allocator, and presize the bucket[] 853 auto test = new HashSet!(int, Container.hash, Container.reap, Container.Chunk); 854 test.cache (1000, 1_000_000); 855 test.buckets = 1_500_000; 856 static immutable count = 1_000_000; 857 StopWatch w; 858 859 // benchmark adding 860 w.start; 861 for (int i=count; i--;) 862 test.add(i); 863 Stdout.formatln ("{} adds: {}/s", test.size, test.size/w.stop); 864 865 // benchmark reading 866 w.start; 867 for (int i=count; i--;) 868 test.contains(i); 869 Stdout.formatln ("{} lookups: {}/s", test.size, test.size/w.stop); 870 871 // benchmark adding without allocation overhead 872 test.clear; 873 w.start; 874 for (int i=count; i--;) 875 test.add(i); 876 Stdout.formatln ("{} adds (after clear): {}/s", test.size, test.size/w.stop); 877 878 // benchmark duplication 879 w.start; 880 auto dup = test.dup; 881 Stdout.formatln ("{} element dup: {}/s", dup.size, dup.size/w.stop); 882 883 // benchmark iteration 884 w.start; 885 foreach (value; test) {} 886 Stdout.formatln ("{} element iteration: {}/s", test.size, test.size/w.stop); 887 888 test.check; 889 } 890 }