1 /*******************************************************************************
2
3 Based upon Doug Lea's Java collection package
4
5 Copyright:
6 Copyright (c) 2008 Kris Bell.
7 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
8 All rights reserved.
9
10 License:
11 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
12 See LICENSE_TANGO.txt for details.
13
14 Version: Apr 2008: Initial release
15
16 Authors: Kris
17
18 *******************************************************************************/
19
20 module ocean.util.container.HashSet;
21
22 import ocean.util.container.Slink;
23
24 public import ocean.util.container.Container;
25
26 import ocean.util.container.model.IContainer;
27
28 /*******************************************************************************
29
30 Hash table implementation of a Set
31
32 ---
33 Iterator iterator ()
34 int opApply (int delegate(ref V value) dg)
35
36 bool add (V element)
37 bool contains (V element)
38 bool take (ref V element)
39 bool remove (V element)
40 size_t remove (IContainer!(V) e)
41 bool replace (V oldElement, V newElement)
42
43 size_t size ()
44 bool isEmpty ()
45 V[] toArray (V[] dst)
46 HashSet dup ()
47 HashSet clear ()
48 HashSet reset ()
49
50 size_t buckets ()
51 void buckets (size_t cap)
52 float threshold ()
53 void threshold (float desired)
54 ---
55
56 *******************************************************************************/
57
58 class HashSet (V, alias Hash = Container.hash,
59 alias Reap = Container.reap,
60 alias Heap = Container.DefaultCollect)
61 : IContainer!(V)
62 {
63 import ocean.core.Verify;
64
65 // use this type for Allocator configuration
66 public alias Slink!(V) Type;
67
68 private alias Type *Ref;
69
70 private alias Heap!(Type) Alloc;
71
72 // Each table entry is a list - null if no table allocated
73 private Ref[] table;
74
75 // number of elements contained
76 private size_t count;
77
78 // the threshold load factor
79 private float loadFactor;
80
81 // configured heap manager
82 private Alloc heap;
83
84 // mutation tag updates on each change
85 private size_t mutation;
86
87 /***********************************************************************
88
89 Construct a HashSet instance
90
91 ***********************************************************************/
92
93 this (float f = Container.defaultLoadFactor)
94 {
95 loadFactor = f;
96 }
97
98 /***********************************************************************
99
100 Clean up when deleted
101
102 ***********************************************************************/
103
104 ~this ()
105 {
106 reset;
107 }
108
109 /***********************************************************************
110
111 Return a generic iterator for contained elements
112
113 ***********************************************************************/
114
115 final Iterator iterator ()
116 {
117 Iterator i = void;
118 i.mutation = mutation;
119 i.table = table;
120 i.owner = this;
121 i.cell = null;
122 i.row = 0;
123 return i;
124 }
125
126 /***********************************************************************
127
128
129 ***********************************************************************/
130
131 final int opApply (scope int delegate(ref V value) dg)
132 {
133 return iterator.opApply (dg);
134 }
135
136 /***********************************************************************
137
138 Return the number of elements contained
139
140 ***********************************************************************/
141
142 final size_t size ()
143 {
144 return count;
145 }
146
147 /***********************************************************************
148
149 Add a new element to the set. Does not add if there is an
150 equivalent already present. Returns true where an element
151 is added, false where it already exists
152
153 Time complexity: O(1) average; O(n) worst.
154
155 ***********************************************************************/
156
157 final bool add (V element)
158 {
159 if (table is null)
160 resize (Container.defaultInitialBuckets);
161
162 auto h = Hash (element, table.length);
163 auto hd = table[h];
164
165 if (hd && hd.find (element))
166 return false;
167
168 table[h] = allocate.set (element, hd);
169 increment;
170
171 // only check if bin was nonempty
172 if (hd)
173 checkLoad;
174 return true;
175 }
176
177 /***********************************************************************
178
179 Does this set contain the given element?
180
181 Time complexity: O(1) average; O(n) worst
182
183 ***********************************************************************/
184
185 final bool contains (V element)
186 {
187 if (count)
188 {
189 auto p = table[Hash (element, table.length)];
190 if (p && p.find (element))
191 return true;
192 }
193 return false;
194 }
195
196 /***********************************************************************
197
198 Make an independent copy of the container. Does not clone
199 elements
200
201 Time complexity: O(n)
202
203 ***********************************************************************/
204
205 final HashSet dup ()
206 {
207 auto clone = new HashSet!(V, Hash, Reap, Heap) (loadFactor);
208
209 if (count)
210 {
211 clone.buckets (buckets);
212
213 foreach (value; iterator)
214 clone.add (value);
215 }
216 return clone;
217 }
218
219 /***********************************************************************
220
221 Remove the provided element. Returns true if found, false
222 otherwise
223
224 Time complexity: O(1) average; O(n) worst
225
226 ***********************************************************************/
227
228 final size_t remove (V element, bool all)
229 {
230 return remove(element) ? 1 : 0;
231 }
232
233 /***********************************************************************
234
235 Remove the provided element. Returns true if found, false
236 otherwise
237
238 Time complexity: O(1) average; O(n) worst
239
240 ***********************************************************************/
241
242 final bool remove (V element)
243 {
244 if (count)
245 {
246 auto h = Hash (element, table.length);
247 auto hd = table[h];
248 auto trail = hd;
249 auto p = hd;
250
251 while (p)
252 {
253 auto n = p.next;
254 if (element == p.value)
255 {
256 decrement (p);
257 if (p is table[h])
258 {
259 table[h] = n;
260 trail = n;
261 }
262 else
263 trail.next = n;
264 return true;
265 }
266 else
267 {
268 trail = p;
269 p = n;
270 }
271 }
272 }
273 return false;
274 }
275
276 /***********************************************************************
277
278 Replace the first instance of oldElement with newElement.
279 Returns true if oldElement was found and replaced, false
280 otherwise.
281
282 ***********************************************************************/
283
284 final size_t replace (V oldElement, V newElement, bool all)
285 {
286 return replace (oldElement, newElement) ? 1 : 0;
287 }
288
289 /***********************************************************************
290
291 Replace the first instance of oldElement with newElement.
292 Returns true if oldElement was found and replaced, false
293 otherwise.
294
295 ***********************************************************************/
296
297 final bool replace (V oldElement, V newElement)
298 {
299
300 if (count && oldElement != newElement)
301 if (contains (oldElement))
302 {
303 remove (oldElement);
304 add (newElement);
305 return true;
306 }
307 return false;
308 }
309
310 /***********************************************************************
311
312 Remove and expose the first element. Returns false when no
313 more elements are contained
314
315 Time complexity: O(n)
316
317 ***********************************************************************/
318
319 final bool take (ref V element)
320 {
321 if (count)
322 foreach (ref list; table)
323 if (list)
324 {
325 auto p = list;
326 element = p.value;
327 list = p.next;
328 decrement (p);
329 return true;
330 }
331 return false;
332 }
333
334 /***********************************************************************
335
336 ************************************************************************/
337
338 public void add (IContainer!(V) e)
339 {
340 foreach (value; e)
341 add (value);
342 }
343
344 /***********************************************************************
345
346 ************************************************************************/
347
348 public size_t remove (IContainer!(V) e)
349 {
350 size_t c;
351 foreach (value; e)
352 if (remove (value))
353 ++c;
354 return c;
355 }
356
357 /***********************************************************************
358
359 Clears the HashMap contents. Various attributes are
360 retained, such as the internal table itself. Invoke
361 reset() to drop everything.
362
363 Time complexity: O(n)
364
365 ***********************************************************************/
366
367 final HashSet clear ()
368 {
369 return clear (false);
370 }
371
372 /***********************************************************************
373
374 Reset the HashSet contents and optionally configure a new
375 heap manager. This releases more memory than clear() does
376
377 Time complexity: O(1)
378
379 ***********************************************************************/
380
381 final HashSet reset ()
382 {
383 clear (true);
384 heap.collect (table);
385 table = null;
386 return this;
387 }
388
389 /***********************************************************************
390
391 Return the number of buckets
392
393 Time complexity: O(1)
394
395 ***********************************************************************/
396
397 final size_t buckets ()
398 {
399 return table ? table.length : 0;
400 }
401
402 /***********************************************************************
403
404 Set the number of buckets and resize as required
405
406 Time complexity: O(n)
407
408 ***********************************************************************/
409
410 final HashSet buckets (size_t cap)
411 {
412 if (cap < Container.defaultInitialBuckets)
413 cap = Container.defaultInitialBuckets;
414
415 if (cap !is buckets)
416 resize (cap);
417 return this;
418 }
419
420 /***********************************************************************
421
422 Return the resize threshold
423
424 Time complexity: O(1)
425
426 ***********************************************************************/
427
428 final float threshold ()
429 {
430 return loadFactor;
431 }
432
433 /***********************************************************************
434
435 Set the resize threshold, and resize as required
436
437 Time complexity: O(n)
438
439 ***********************************************************************/
440
441 final void threshold (float desired)
442 {
443 verify (desired > 0.0);
444 loadFactor = desired;
445 if (table)
446 checkLoad;
447 }
448
449 /***********************************************************************
450
451 Configure the assigned allocator with the size of each
452 allocation block (number of nodes allocated at one time)
453 and the number of nodes to pre-populate the cache with.
454
455 Time complexity: O(n)
456
457 ***********************************************************************/
458
459 final HashSet cache (size_t chunk, size_t count=0)
460 {
461 heap.config (chunk, count);
462 return this;
463 }
464
465 /***********************************************************************
466
467 Copy and return the contained set of values in an array,
468 using the optional dst as a recipient (which is resized
469 as necessary).
470
471 Returns a slice of dst representing the container values.
472
473 Time complexity: O(n)
474
475 ***********************************************************************/
476
477 final V[] toArray (V[] dst = null)
478 {
479 if (dst.length < count)
480 dst.length = count;
481
482 size_t i = 0;
483 foreach (v; this)
484 dst[i++] = v;
485 return dst [0 .. count];
486 }
487
488 /***********************************************************************
489
490 Is this container empty?
491
492 Time complexity: O(1)
493
494 ***********************************************************************/
495
496 final bool isEmpty ()
497 {
498 return count is 0;
499 }
500
501 /***********************************************************************
502
503 Sanity check
504
505 ***********************************************************************/
506
507 final HashSet check()
508 {
509 verify(!(table is null && count !is 0));
510 verify((table is null || table.length > 0));
511 verify(loadFactor > 0.0f);
512
513 if (table)
514 {
515 size_t c = 0;
516 for (size_t i = 0; i < table.length; ++i)
517 {
518 for (auto p = table[i]; p; p = p.next)
519 {
520 ++c;
521 verify(contains(p.value));
522 verify(Hash (p.value, table.length) is i);
523 }
524 }
525 verify(c is count);
526 }
527 return this;
528 }
529
530 /***********************************************************************
531
532 Allocate a node instance. This is used as the default allocator
533
534 ***********************************************************************/
535
536 private Ref allocate ()
537 {
538 return heap.allocate;
539 }
540
541 /***********************************************************************
542
543 Check to see if we are past load factor threshold. If so,
544 resize so that we are at half of the desired threshold.
545
546 ***********************************************************************/
547
548 private void checkLoad ()
549 {
550 float fc = count;
551 float ft = table.length;
552 if (fc / ft > loadFactor)
553 resize (2 * cast(size_t)(fc / loadFactor) + 1);
554 }
555
556 /***********************************************************************
557
558 resize table to new capacity, rehashing all elements
559
560 ***********************************************************************/
561
562 private void resize (size_t newCap)
563 {
564 //Stdout.formatln ("resize {}", newCap);
565 auto newtab = heap.allocate (newCap);
566 mutate;
567
568 foreach (bucket; table)
569 while (bucket)
570 {
571 auto n = bucket.next;
572 auto h = Hash (bucket.value, newCap);
573 bucket.next = newtab[h];
574 newtab[h] = bucket;
575 bucket = n;
576 }
577
578 // release the prior table and assign new one
579 heap.collect (table);
580 table = newtab;
581 }
582
583 /***********************************************************************
584
585 Remove the indicated node. We need to traverse buckets
586 for this, since we're singly-linked only. Better to save
587 the per-node memory than to gain a little on each remove
588
589 Used by iterators only
590
591 ***********************************************************************/
592
593 private bool remove (Ref node, size_t row)
594 {
595 auto hd = table[row];
596 auto trail = hd;
597 auto p = hd;
598
599 while (p)
600 {
601 auto n = p.next;
602 if (p is node)
603 {
604 decrement (p);
605 if (p is hd)
606 table[row] = n;
607 else
608 trail.next = n;
609 return true;
610 }
611 else
612 {
613 trail = p;
614 p = n;
615 }
616 }
617 return false;
618 }
619
620 /***********************************************************************
621
622 Clears the HashSet contents. Various attributes are
623 retained, such as the internal table itself. Invoke
624 reset() to drop everything.
625
626 Time complexity: O(n)
627
628 ***********************************************************************/
629
630 private HashSet clear (bool all)
631 {
632 mutate;
633
634 // collect each node if we can't collect all at once
635 if (heap.collect(all) is false)
636 foreach (ref v; table)
637 while (v)
638 {
639 auto n = v.next;
640 decrement (v);
641 v = n;
642 }
643
644 // retain table, but remove bucket chains
645 foreach (ref v; table)
646 v = null;
647
648 count = 0;
649 return this;
650 }
651
652 /***********************************************************************
653
654 new element was added
655
656 ***********************************************************************/
657
658 private void increment()
659 {
660 ++mutation;
661 ++count;
662 }
663
664 /***********************************************************************
665
666 element was removed
667
668 ***********************************************************************/
669
670 private void decrement (Ref p)
671 {
672 Reap (p.value);
673 heap.collect (p);
674 ++mutation;
675 --count;
676 }
677
678 /***********************************************************************
679
680 set was changed
681
682 ***********************************************************************/
683
684 private void mutate()
685 {
686 ++mutation;
687 }
688
689 /***********************************************************************
690
691 Iterator with no filtering
692
693 ***********************************************************************/
694
695 private struct Iterator
696 {
697 size_t row;
698 Ref cell,
699 prior;
700 Ref[] table;
701 HashSet owner;
702 size_t mutation;
703
704 /***************************************************************
705
706 Did the container change underneath us?
707
708 ***************************************************************/
709
710 bool valid ()
711 {
712 return owner.mutation is mutation;
713 }
714
715 /***************************************************************
716
717 Accesses the next value, and returns false when
718 there are no further values to traverse
719
720 ***************************************************************/
721
722 bool next (ref V v)
723 {
724 auto n = next;
725 return (n) ? v = *n, true : false;
726 }
727
728 /***************************************************************
729
730 Return a pointer to the next value, or null when
731 there are no further values to traverse
732
733 ***************************************************************/
734
735 V* next ()
736 {
737 while (cell is null)
738 if (row < table.length)
739 cell = table [row++];
740 else
741 return null;
742
743 prior = cell;
744 cell = cell.next;
745 return &prior.value;
746 }
747
748 /***************************************************************
749
750 Foreach support
751
752 ***************************************************************/
753
754 int opApply (scope int delegate(ref V value) dg)
755 {
756 int result;
757
758 auto c = cell;
759 loop: while (true)
760 {
761 while (c is null)
762 if (row < table.length)
763 c = table [row++];
764 else
765 break loop;
766
767 prior = c;
768 c = c.next;
769 if ((result = dg(prior.value)) != 0)
770 break loop;
771 }
772
773 cell = c;
774 return result;
775 }
776
777 /***************************************************************
778
779 Remove value at the current iterator location
780
781 ***************************************************************/
782
783 bool remove ()
784 {
785 if (prior)
786 if (owner.remove (prior, row-1))
787 {
788 // ignore this change
789 ++mutation;
790 return true;
791 }
792
793 prior = null;
794 return false;
795 }
796 }
797 }
798
799
800
801 /*******************************************************************************
802
803 *******************************************************************************/
804
805 debug (HashSet)
806 {
807 import ocean.io.Stdout;
808 import ocean.core.Thread;
809 import ocean.time.StopWatch;
810
811 void main()
812 {
813 // usage examples ...
814 auto set = new HashSet!(char[]);
815 set.add ("foo");
816 set.add ("bar");
817 set.add ("wumpus");
818
819 // implicit generic iteration
820 foreach (value; set)
821 Stdout (value).newline;
822
823 // explicit generic iteration
824 foreach (value; set.iterator)
825 Stdout (value).newline;
826
827 // generic iteration with optional remove
828 auto s = set.iterator;
829 foreach (value; s)
830 {} // s.remove;
831
832 // incremental iteration, with optional remove
833 char[] v;
834 auto iterator = set.iterator;
835 while (iterator.next(v))
836 {} //iterator.remove;
837
838 // incremental iteration, with optional failfast
839 auto it = set.iterator;
840 while (it.valid && it.next(v))
841 {}
842
843 // remove specific element
844 set.remove ("wumpus");
845
846 // remove first element ...
847 while (set.take(v))
848 Stdout.formatln ("taking {}, {} left", v, set.size);
849
850
851 // setup for benchmark, with a set of integers. We
852 // use a chunk allocator, and presize the bucket[]
853 auto test = new HashSet!(int, Container.hash, Container.reap, Container.Chunk);
854 test.cache (1000, 1_000_000);
855 test.buckets = 1_500_000;
856 static immutable count = 1_000_000;
857 StopWatch w;
858
859 // benchmark adding
860 w.start;
861 for (int i=count; i--;)
862 test.add(i);
863 Stdout.formatln ("{} adds: {}/s", test.size, test.size/w.stop);
864
865 // benchmark reading
866 w.start;
867 for (int i=count; i--;)
868 test.contains(i);
869 Stdout.formatln ("{} lookups: {}/s", test.size, test.size/w.stop);
870
871 // benchmark adding without allocation overhead
872 test.clear;
873 w.start;
874 for (int i=count; i--;)
875 test.add(i);
876 Stdout.formatln ("{} adds (after clear): {}/s", test.size, test.size/w.stop);
877
878 // benchmark duplication
879 w.start;
880 auto dup = test.dup;
881 Stdout.formatln ("{} element dup: {}/s", dup.size, dup.size/w.stop);
882
883 // benchmark iteration
884 w.start;
885 foreach (value; test) {}
886 Stdout.formatln ("{} element iteration: {}/s", test.size, test.size/w.stop);
887
888 test.check;
889 }
890 }