1 /*******************************************************************************
2 
3     Copyright:
4         Copyright (c) 2004-2009 Tango contributors.
5         Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
6         All rights reserved.
7     License:
8         Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
9         See LICENSE_TANGO.txt for details.
10 *******************************************************************************/
11 
12 
13 module ocean.util.container.more.HashFile;
14 
15 import ocean.io.device.FileMap : MappedFile;
16 
17 /******************************************************************************
18 
19         HashFile implements a simple mechanism to store and recover a
20         large quantity of data for the duration of the hosting process.
21         It is intended to act as a local-cache for a remote data-source,
22         or as a spillover area for large in-memory cache instances.
23 
24         Note that any and all stored data is rendered invalid the moment
25         a HashFile object is garbage-collected.
26 
27         The implementation follows a fixed-capacity record scheme, where
28         content can be rewritten in-place until said capacity is reached.
29         At such time, the altered content is moved to a larger capacity
30         record at end-of-file, and a hole remains at the prior location.
31         These holes are not collected, since the lifespan of a HashFile
32         is limited to that of the host process.
33 
34         All index keys must be unique. Writing to the HashFile with an
35         existing key will overwrite any previous content. What follows
36         is a contrived example:
37 
38         ---
39         alias HashFile!(char[], char[]) Bucket;
40 
41         auto bucket = new Bucket ("bucket.bin", HashFile.HalfK);
42 
43         // insert some data, and retrieve it again
44         auto text = "this is a test";
45         bucket.put ("a key", text);
46         auto b = cast(char[]) bucket.get ("a key");
47 
48         assert (b == text);
49         bucket.close;
50         ---
51 
52 ******************************************************************************/
53 
54 class HashFile(K, V)
55 {
56         /**********************************************************************
57 
58                 Define the capacity (block-size) of each record
59 
60         **********************************************************************/
61 
62         struct BlockSize
63         {
64                 int capacity;
65         }
66 
67         // backing storage
68         private MappedFile              file;
69 
70         // memory-mapped content
71         private ubyte[]                 heap;
72 
73         // basic capacity for each record
74         private BlockSize               block;
75 
76         // pointers to file records
77         private Record[K]               map;
78 
79         // current file size
80         private ulong                   fileSize;
81 
82         // current file usage
83         private ulong                   waterLine;
84 
85         // supported block sizes
86         public static const(BlockSize) EighthK  = {128-1},
87                                         QuarterK = {256-1},
88                                         HalfK    = {512-1},
89                                         OneK     = {1024*1-1},
90                                         TwoK     = {1024*2-1},
91                                         FourK    = {1024*4-1},
92                                         EightK   = {1024*8-1},
93                                         SixteenK = {1024*16-1},
94                                         ThirtyTwoK = {1024*32-1},
95                                         SixtyFourK = {1024*64-1};
96 
97 
98         /**********************************************************************
99 
100                 Construct a HashFile with the provided path, record-size,
101                 and inital record count. The latter causes records to be
102                 pre-allocated, saving a certain amount of growth activity.
103                 Selecting a record size that roughly matches the serialized
104                 content will limit 'thrashing'.
105 
106         **********************************************************************/
107 
108         this (char[] path, BlockSize block, uint initialRecords = 100)
109         {
110                 this.block = block;
111 
112                 // open a storage file
113                 file = new MappedFile (path);
114 
115                 // set initial file size (cannot be zero)
116                 fileSize = initialRecords * (block.capacity + 1);
117 
118                 // map the file content
119                 heap = file.resize (fileSize);
120         }
121 
122         /**********************************************************************
123 
124                 Return where the HashFile is located
125 
126         **********************************************************************/
127 
128         final char[] path ()
129         {
130                 return file.path;
131         }
132 
133         /**********************************************************************
134 
135                 Return the currently populated size of this HashFile
136 
137         **********************************************************************/
138 
139         final ulong length ()
140         {
141                 return waterLine;
142         }
143 
144         /**********************************************************************
145 
146                 Return the serialized data for the provided key. Returns
147                 null if the key was not found.
148 
149                 Be sure to synchronize access by multiple threads
150 
151         **********************************************************************/
152 
153         final V get (K key, bool clear = false)
154         {
155                 auto p = key in map;
156 
157                 if (p)
158                     return p.read (this, clear);
159                 return V.init;
160         }
161 
162         /**********************************************************************
163 
164                 Remove the provided key from this HashFile. Leaves a
165                 hole in the backing file
166 
167                 Be sure to synchronize access by multiple threads
168 
169         **********************************************************************/
170 
171         final void remove (K key)
172         {
173                 map.remove (key);
174         }
175 
176         /**********************************************************************
177 
178                 Write a serialized block of data, and associate it with
179                 the provided key. All keys must be unique, and it is the
180                 responsibility of the programmer to ensure this. Reusing
181                 an existing key will overwrite previous data.
182 
183                 Note that data is allowed to grow within the occupied
184                 bucket until it becomes larger than the allocated space.
185                 When this happens, the data is moved to a larger bucket
186                 at the file tail.
187 
188                 Be sure to synchronize access by multiple threads
189 
190         **********************************************************************/
191 
192         final void put (K key, V data, K function(K) retain = null)
193         {
194                 auto r = key in map;
195 
196                 if (r)
197                     r.write (this, data, block);
198                 else
199                    {
200                    Record rr;
201                    rr.write (this, data, block);
202                    if (retain)
203                        key = retain (key);
204                    map [key] = rr;
205                    }
206         }
207 
208         /**********************************************************************
209 
210                 Close this HashFile -- all content is lost.
211 
212         **********************************************************************/
213 
214         final void close ()
215         {
216                 if (file)
217                    {
218                    file.close;
219                    file = null;
220                    map = null;
221                    }
222         }
223 
224         /**********************************************************************
225 
226                 Each Record takes up a number of 'pages' within the file.
227                 The size of these pages is determined by the BlockSize
228                 provided during HashFile construction. Additional space
229                 at the end of each block is potentially wasted, but enables
230                 content to grow in size without creating a myriad of holes.
231 
232         **********************************************************************/
233 
234         private struct Record
235         {
236                 private ulong           offset;
237                 private int             used,
238                                         capacity = -1;
239 
240                 /**************************************************************
241 
242                         This should be protected from thread-contention at
243                         a higher level.
244 
245                 **************************************************************/
246 
247                 V read (HashFile bucket, bool clear)
248                 {
249                         if (used)
250                            {
251                            auto ret = cast(V) bucket.heap [offset .. offset + used];
252                            if (clear)
253                                used = 0;
254                            return ret;
255                            }
256                         return V.init;
257                 }
258 
259                 /**************************************************************
260 
261                         This should be protected from thread-contention at
262                         a higher level.
263 
264                 **************************************************************/
265 
266                 void write (HashFile bucket, V data, BlockSize block)
267                 {
268                         this.used = data.length;
269 
270                         // create new slot if we exceed capacity
271                         if (this.used > this.capacity)
272                             createBucket (bucket, this.used, block);
273 
274                         bucket.heap [offset .. offset+used] = cast(ubyte[]) data;
275                 }
276 
277                 /**************************************************************
278 
279                 **************************************************************/
280 
281                 void createBucket (HashFile bucket, int bytes, BlockSize block)
282                 {
283                         this.offset = bucket.waterLine;
284                         this.capacity = (bytes + block.capacity) & ~block.capacity;
285 
286                         bucket.waterLine += this.capacity;
287                         if (bucket.waterLine > bucket.fileSize)
288                            {
289                            auto target = bucket.waterLine * 2;
290                            debug(HashFile)
291                                  printf ("growing file from %lld, %lld, to %lld\n",
292                                           bucket.fileSize, bucket.waterLine, target);
293 
294                            // expand the physical file size and remap the heap
295                            bucket.heap = bucket.file.resize (bucket.fileSize = target);
296                            }
297                 }
298         }
299 }
300 
301 
302 /******************************************************************************
303 
304 ******************************************************************************/
305 
306 debug (HashFile)
307 {
308         import core.stdc.stdio;
309 
310         import ocean.io.Path;
311         import ocean.io.Stdout;
312         import ocean.text.convert.Integer_tango;
313 
314         void main()
315         {
316                 alias HashFile!(char[], char[]) Bucket;
317 
318                 auto file = new Bucket ("foo.map", Bucket.QuarterK, 1);
319 
320                 char[16] tmp;
321                 for (int i=1; i < 1024; ++i)
322                      file.put (format(tmp, i).dup, "blah");
323 
324                 auto s = file.get ("1", true);
325                 if (s.length)
326                     Stdout.formatln ("result '{}'", s);
327                 s = file.get ("1");
328                 if (s.length)
329                     Stdout.formatln ("result '{}'", s);
330                 file.close;
331                 remove ("foo.map");
332         }
333 }