1 /*******************************************************************************
2 
3         Based upon Doug Lea's Java collection package
4 
5         Copyright:
6             Copyright (c) 2008 Kris Bell.
7             Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
8             All rights reserved.
9 
10         License:
11             Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
12             See LICENSE_TANGO.txt for details.
13 
14         Version: Apr 2008: Initial release
15 
16         Authors: Kris
17 
18 *******************************************************************************/
19 
20 module ocean.util.container.HashSet;
21 
22 import ocean.util.container.Slink;
23 
24 public  import ocean.util.container.Container;
25 
26 import ocean.util.container.model.IContainer;
27 
28 /*******************************************************************************
29 
30         Hash table implementation of a Set
31 
32         ---
33         Iterator iterator ()
34         int opApply (int delegate(ref V value) dg)
35 
36         bool add (V element)
37         bool contains (V element)
38         bool take (ref V element)
39         bool remove (V element)
40         size_t remove (IContainer!(V) e)
41         bool replace (V oldElement, V newElement)
42 
43         size_t size ()
44         bool isEmpty ()
45         V[] toArray (V[] dst)
46         HashSet dup ()
47         HashSet clear ()
48         HashSet reset ()
49 
50         size_t buckets ()
51         void buckets (size_t cap)
52         float threshold ()
53         void threshold (float desired)
54         ---
55 
56 *******************************************************************************/
57 
58 class HashSet (V, alias Hash = Container.hash,
59                   alias Reap = Container.reap,
60                   alias Heap = Container.DefaultCollect)
61                   : IContainer!(V)
62 {
63         import ocean.core.Verify;
64 
65         // use this type for Allocator configuration
66         public alias Slink!(V)  Type;
67 
68         private alias Type      *Ref;
69 
70         private alias Heap!(Type) Alloc;
71 
72         // Each table entry is a list - null if no table allocated
73         private Ref[]            table;
74 
75         // number of elements contained
76         private size_t          count;
77 
78         // the threshold load factor
79         private float           loadFactor;
80 
81         // configured heap manager
82         private Alloc           heap;
83 
84         // mutation tag updates on each change
85         private size_t            mutation;
86 
87         /***********************************************************************
88 
89                 Construct a HashSet instance
90 
91         ***********************************************************************/
92 
93         this (float f = Container.defaultLoadFactor)
94         {
95                 loadFactor = f;
96         }
97 
98         /***********************************************************************
99 
100                 Clean up when deleted
101 
102         ***********************************************************************/
103 
104         ~this ()
105         {
106                 reset;
107         }
108 
109         /***********************************************************************
110 
111                 Return a generic iterator for contained elements
112 
113         ***********************************************************************/
114 
115         final Iterator iterator ()
116         {
117                 Iterator i = void;
118                 i.mutation = mutation;
119                 i.table = table;
120                 i.owner = this;
121                 i.cell = null;
122                 i.row = 0;
123                 return i;
124         }
125 
126         /***********************************************************************
127 
128 
129         ***********************************************************************/
130 
131         final int opApply (scope int delegate(ref V value) dg)
132         {
133                 return iterator.opApply (dg);
134         }
135 
136         /***********************************************************************
137 
138                 Return the number of elements contained
139 
140         ***********************************************************************/
141 
142         final size_t size ()
143         {
144                 return count;
145         }
146 
147         /***********************************************************************
148 
149                 Add a new element to the set. Does not add if there is an
150                 equivalent already present. Returns true where an element
151                 is added, false where it already exists
152 
153                 Time complexity: O(1) average; O(n) worst.
154 
155         ***********************************************************************/
156 
157         final bool add (V element)
158         {
159                 if (table is null)
160                     resize (Container.defaultInitialBuckets);
161 
162                 auto h = Hash  (element, table.length);
163                 auto hd = table[h];
164 
165                 if (hd && hd.find (element))
166                     return false;
167 
168                 table[h] = allocate.set (element, hd);
169                 increment;
170 
171                 // only check if bin was nonempty
172                 if (hd)
173                     checkLoad;
174                 return true;
175         }
176 
177         /***********************************************************************
178 
179                 Does this set contain the given element?
180 
181                 Time complexity: O(1) average; O(n) worst
182 
183         ***********************************************************************/
184 
185         final bool contains (V element)
186         {
187                 if (count)
188                    {
189                    auto p = table[Hash (element, table.length)];
190                    if (p && p.find (element))
191                        return true;
192                    }
193                 return false;
194         }
195 
196         /***********************************************************************
197 
198                 Make an independent copy of the container. Does not clone
199                 elements
200 
201                 Time complexity: O(n)
202 
203         ***********************************************************************/
204 
205         final HashSet dup ()
206         {
207                 auto clone = new HashSet!(V, Hash, Reap, Heap) (loadFactor);
208 
209                 if (count)
210                    {
211                    clone.buckets (buckets);
212 
213                    foreach (value; iterator)
214                             clone.add (value);
215                    }
216                 return clone;
217         }
218 
219         /***********************************************************************
220 
221                 Remove the provided element. Returns true if found, false
222                 otherwise
223 
224                 Time complexity: O(1) average; O(n) worst
225 
226         ***********************************************************************/
227 
228         final size_t remove (V element, bool all)
229         {
230                 return remove(element) ? 1 : 0;
231         }
232 
233         /***********************************************************************
234 
235                 Remove the provided element. Returns true if found, false
236                 otherwise
237 
238                 Time complexity: O(1) average; O(n) worst
239 
240         ***********************************************************************/
241 
242         final bool remove (V element)
243         {
244                 if (count)
245                    {
246                    auto h = Hash (element, table.length);
247                    auto hd = table[h];
248                    auto trail = hd;
249                    auto p = hd;
250 
251                    while (p)
252                          {
253                          auto n = p.next;
254                          if (element == p.value)
255                             {
256                             decrement (p);
257                             if (p is table[h])
258                                {
259                                table[h] = n;
260                                trail = n;
261                                }
262                             else
263                                trail.next = n;
264                             return true;
265                             }
266                          else
267                             {
268                             trail = p;
269                             p = n;
270                             }
271                          }
272                    }
273                 return false;
274         }
275 
276         /***********************************************************************
277 
278                 Replace the first instance of oldElement with newElement.
279                 Returns true if oldElement was found and replaced, false
280                 otherwise.
281 
282         ***********************************************************************/
283 
284         final size_t replace (V oldElement, V newElement, bool all)
285         {
286                 return replace (oldElement, newElement) ? 1 : 0;
287         }
288 
289         /***********************************************************************
290 
291                 Replace the first instance of oldElement with newElement.
292                 Returns true if oldElement was found and replaced, false
293                 otherwise.
294 
295         ***********************************************************************/
296 
297         final bool replace (V oldElement, V newElement)
298         {
299 
300                 if (count && oldElement != newElement)
301                    if (contains (oldElement))
302                       {
303                       remove (oldElement);
304                       add (newElement);
305                       return true;
306                       }
307                 return false;
308         }
309 
310         /***********************************************************************
311 
312                 Remove and expose the first element. Returns false when no
313                 more elements are contained
314 
315                 Time complexity: O(n)
316 
317         ***********************************************************************/
318 
319         final bool take (ref V element)
320         {
321                 if (count)
322                     foreach (ref list; table)
323                              if (list)
324                                 {
325                                 auto p = list;
326                                 element = p.value;
327                                 list = p.next;
328                                 decrement (p);
329                                 return true;
330                                 }
331                 return false;
332         }
333 
334         /***********************************************************************
335 
336         ************************************************************************/
337 
338         public void add (IContainer!(V) e)
339         {
340                 foreach (value; e)
341                          add (value);
342         }
343 
344         /***********************************************************************
345 
346         ************************************************************************/
347 
348         public size_t remove (IContainer!(V) e)
349         {
350                 size_t c;
351                 foreach (value; e)
352                          if (remove (value))
353                              ++c;
354                 return c;
355         }
356 
357         /***********************************************************************
358 
359                 Clears the HashMap contents. Various attributes are
360                 retained, such as the internal table itself. Invoke
361                 reset() to drop everything.
362 
363                 Time complexity: O(n)
364 
365         ***********************************************************************/
366 
367         final HashSet clear ()
368         {
369                 return clear (false);
370         }
371 
372         /***********************************************************************
373 
374                 Reset the HashSet contents and optionally configure a new
375                 heap manager. This releases more memory than clear() does
376 
377                 Time complexity: O(1)
378 
379         ***********************************************************************/
380 
381         final HashSet reset ()
382         {
383                 clear (true);
384                 heap.collect (table);
385                 table = null;
386                 return this;
387         }
388 
389         /***********************************************************************
390 
391                 Return the number of buckets
392 
393                 Time complexity: O(1)
394 
395         ***********************************************************************/
396 
397         final size_t buckets ()
398         {
399                 return table ? table.length : 0;
400         }
401 
402         /***********************************************************************
403 
404                 Set the number of buckets and resize as required
405 
406                 Time complexity: O(n)
407 
408         ***********************************************************************/
409 
410         final HashSet buckets (size_t cap)
411         {
412                 if (cap < Container.defaultInitialBuckets)
413                     cap = Container.defaultInitialBuckets;
414 
415                 if (cap !is buckets)
416                     resize (cap);
417                 return this;
418         }
419 
420         /***********************************************************************
421 
422                 Return the resize threshold
423 
424                 Time complexity: O(1)
425 
426         ***********************************************************************/
427 
428         final float threshold ()
429         {
430                 return loadFactor;
431         }
432 
433         /***********************************************************************
434 
435                 Set the resize threshold, and resize as required
436 
437                 Time complexity: O(n)
438 
439         ***********************************************************************/
440 
441         final void threshold (float desired)
442         {
443                 verify (desired > 0.0);
444                 loadFactor = desired;
445                 if (table)
446                     checkLoad;
447         }
448 
449         /***********************************************************************
450 
451                 Configure the assigned allocator with the size of each
452                 allocation block (number of nodes allocated at one time)
453                 and the number of nodes to pre-populate the cache with.
454 
455                 Time complexity: O(n)
456 
457         ***********************************************************************/
458 
459         final HashSet cache (size_t chunk, size_t count=0)
460         {
461                 heap.config (chunk, count);
462                 return this;
463         }
464 
465         /***********************************************************************
466 
467                 Copy and return the contained set of values in an array,
468                 using the optional dst as a recipient (which is resized
469                 as necessary).
470 
471                 Returns a slice of dst representing the container values.
472 
473                 Time complexity: O(n)
474 
475         ***********************************************************************/
476 
477         final V[] toArray (V[] dst = null)
478         {
479                 if (dst.length < count)
480                     dst.length = count;
481 
482                 size_t i = 0;
483                 foreach (v; this)
484                          dst[i++] = v;
485                 return dst [0 .. count];
486         }
487 
488         /***********************************************************************
489 
490                 Is this container empty?
491 
492                 Time complexity: O(1)
493 
494         ***********************************************************************/
495 
496         final bool isEmpty ()
497         {
498                 return count is 0;
499         }
500 
501         /***********************************************************************
502 
503                 Sanity check
504 
505         ***********************************************************************/
506 
507         final HashSet check()
508         {
509                 verify(!(table is null && count !is 0));
510                 verify((table is null || table.length > 0));
511                 verify(loadFactor > 0.0f);
512 
513                 if (table)
514                    {
515                    size_t c = 0;
516                    for (size_t i = 0; i < table.length; ++i)
517                        {
518                        for (auto p = table[i]; p; p = p.next)
519                            {
520                            ++c;
521                            verify(contains(p.value));
522                            verify(Hash (p.value, table.length) is i);
523                            }
524                        }
525                    verify(c is count);
526                    }
527                 return this;
528         }
529 
530         /***********************************************************************
531 
532                 Allocate a node instance. This is used as the default allocator
533 
534         ***********************************************************************/
535 
536         private Ref allocate ()
537         {
538                 return heap.allocate;
539         }
540 
541         /***********************************************************************
542 
543                  Check to see if we are past load factor threshold. If so,
544                  resize so that we are at half of the desired threshold.
545 
546         ***********************************************************************/
547 
548         private void checkLoad ()
549         {
550                 float fc = count;
551                 float ft = table.length;
552                 if (fc / ft > loadFactor)
553                     resize (2 * cast(size_t)(fc / loadFactor) + 1);
554         }
555 
556         /***********************************************************************
557 
558                 resize table to new capacity, rehashing all elements
559 
560         ***********************************************************************/
561 
562         private void resize (size_t newCap)
563         {
564                 //Stdout.formatln ("resize {}", newCap);
565                 auto newtab = heap.allocate (newCap);
566                 mutate;
567 
568                 foreach (bucket; table)
569                          while (bucket)
570                                {
571                                auto n = bucket.next;
572                                auto h = Hash (bucket.value, newCap);
573                                bucket.next = newtab[h];
574                                newtab[h] = bucket;
575                                bucket = n;
576                                }
577 
578                 // release the prior table and assign new one
579                 heap.collect (table);
580                 table = newtab;
581         }
582 
583         /***********************************************************************
584 
585                 Remove the indicated node. We need to traverse buckets
586                 for this, since we're singly-linked only. Better to save
587                 the per-node memory than to gain a little on each remove
588 
589                 Used by iterators only
590 
591         ***********************************************************************/
592 
593         private bool remove (Ref node, size_t row)
594         {
595                 auto hd = table[row];
596                 auto trail = hd;
597                 auto p = hd;
598 
599                 while (p)
600                       {
601                       auto n = p.next;
602                       if (p is node)
603                          {
604                          decrement (p);
605                          if (p is hd)
606                              table[row] = n;
607                          else
608                             trail.next = n;
609                          return true;
610                          }
611                       else
612                          {
613                          trail = p;
614                          p = n;
615                          }
616                       }
617                 return false;
618         }
619 
620         /***********************************************************************
621 
622                 Clears the HashSet contents. Various attributes are
623                 retained, such as the internal table itself. Invoke
624                 reset() to drop everything.
625 
626                 Time complexity: O(n)
627 
628         ***********************************************************************/
629 
630         private HashSet clear (bool all)
631         {
632                 mutate;
633 
634                 // collect each node if we can't collect all at once
635                 if (heap.collect(all) is false)
636                     foreach (ref v; table)
637                              while (v)
638                                    {
639                                    auto n = v.next;
640                                    decrement (v);
641                                    v = n;
642                                    }
643 
644                 // retain table, but remove bucket chains
645                 foreach (ref v; table)
646                          v = null;
647 
648                 count = 0;
649                 return this;
650         }
651 
652         /***********************************************************************
653 
654                 new element was added
655 
656         ***********************************************************************/
657 
658         private void increment()
659         {
660                 ++mutation;
661                 ++count;
662         }
663 
664         /***********************************************************************
665 
666                 element was removed
667 
668         ***********************************************************************/
669 
670         private void decrement (Ref p)
671         {
672                 Reap (p.value);
673                 heap.collect (p);
674                 ++mutation;
675                 --count;
676         }
677 
678         /***********************************************************************
679 
680                 set was changed
681 
682         ***********************************************************************/
683 
684         private void mutate()
685         {
686                 ++mutation;
687         }
688 
689         /***********************************************************************
690 
691                 Iterator with no filtering
692 
693         ***********************************************************************/
694 
695         private struct Iterator
696         {
697                 size_t  row;
698                 Ref     cell,
699                         prior;
700                 Ref[]   table;
701                 HashSet owner;
702                 size_t  mutation;
703 
704                 /***************************************************************
705 
706                         Did the container change underneath us?
707 
708                 ***************************************************************/
709 
710                 bool valid ()
711                 {
712                         return owner.mutation is mutation;
713                 }
714 
715                 /***************************************************************
716 
717                         Accesses the next value, and returns false when
718                         there are no further values to traverse
719 
720                 ***************************************************************/
721 
722                 bool next (ref V v)
723                 {
724                         auto n = next;
725                         return (n) ? v = *n, true : false;
726                 }
727 
728                 /***************************************************************
729 
730                         Return a pointer to the next value, or null when
731                         there are no further values to traverse
732 
733                 ***************************************************************/
734 
735                 V* next ()
736                 {
737                         while (cell is null)
738                                if (row < table.length)
739                                    cell = table [row++];
740                                else
741                                   return null;
742 
743                         prior = cell;
744                         cell = cell.next;
745                         return &prior.value;
746                 }
747 
748                 /***************************************************************
749 
750                         Foreach support
751 
752                 ***************************************************************/
753 
754                 int opApply (scope int delegate(ref V value) dg)
755                 {
756                         int result;
757 
758                         auto c = cell;
759                         loop: while (true)
760                               {
761                               while (c is null)
762                                      if (row < table.length)
763                                          c = table [row++];
764                                      else
765                                         break loop;
766 
767                               prior = c;
768                               c = c.next;
769                               if ((result = dg(prior.value)) != 0)
770                                    break loop;
771                               }
772 
773                         cell = c;
774                         return result;
775                 }
776 
777                 /***************************************************************
778 
779                         Remove value at the current iterator location
780 
781                 ***************************************************************/
782 
783                 bool remove ()
784                 {
785                         if (prior)
786                             if (owner.remove (prior, row-1))
787                                {
788                                // ignore this change
789                                ++mutation;
790                                return true;
791                                }
792 
793                         prior = null;
794                         return false;
795                 }
796         }
797 }
798 
799 
800 
801 /*******************************************************************************
802 
803 *******************************************************************************/
804 
805 debug (HashSet)
806 {
807         import ocean.io.Stdout;
808         import ocean.core.Thread;
809         import ocean.time.StopWatch;
810 
811         void main()
812         {
813                 // usage examples ...
814                 auto set = new HashSet!(char[]);
815                 set.add ("foo");
816                 set.add ("bar");
817                 set.add ("wumpus");
818 
819                 // implicit generic iteration
820                 foreach (value; set)
821                          Stdout (value).newline;
822 
823                 // explicit generic iteration
824                 foreach (value; set.iterator)
825                          Stdout (value).newline;
826 
827                 // generic iteration with optional remove
828                 auto s = set.iterator;
829                 foreach (value; s)
830                         {} // s.remove;
831 
832                 // incremental iteration, with optional remove
833                 char[] v;
834                 auto iterator = set.iterator;
835                 while (iterator.next(v))
836                       {} //iterator.remove;
837 
838                 // incremental iteration, with optional failfast
839                 auto it = set.iterator;
840                 while (it.valid && it.next(v))
841                       {}
842 
843                 // remove specific element
844                 set.remove ("wumpus");
845 
846                 // remove first element ...
847                 while (set.take(v))
848                        Stdout.formatln ("taking {}, {} left", v, set.size);
849 
850 
851                 // setup for benchmark, with a set of integers. We
852                 // use a chunk allocator, and presize the bucket[]
853                 auto test = new HashSet!(int, Container.hash, Container.reap, Container.Chunk);
854                 test.cache (1000, 1_000_000);
855                 test.buckets = 1_500_000;
856                 static immutable count = 1_000_000;
857                 StopWatch w;
858 
859                 // benchmark adding
860                 w.start;
861                 for (int i=count; i--;)
862                      test.add(i);
863                 Stdout.formatln ("{} adds: {}/s", test.size, test.size/w.stop);
864 
865                 // benchmark reading
866                 w.start;
867                 for (int i=count; i--;)
868                      test.contains(i);
869                 Stdout.formatln ("{} lookups: {}/s", test.size, test.size/w.stop);
870 
871                 // benchmark adding without allocation overhead
872                 test.clear;
873                 w.start;
874                 for (int i=count; i--;)
875                      test.add(i);
876                 Stdout.formatln ("{} adds (after clear): {}/s", test.size, test.size/w.stop);
877 
878                 // benchmark duplication
879                 w.start;
880                 auto dup = test.dup;
881                 Stdout.formatln ("{} element dup: {}/s", dup.size, dup.size/w.stop);
882 
883                 // benchmark iteration
884                 w.start;
885                 foreach (value; test) {}
886                 Stdout.formatln ("{} element iteration: {}/s", test.size, test.size/w.stop);
887 
888                 test.check;
889         }
890 }