1 /******************************************************************************* 2 3 Copyright: 4 Copyright (c) 2004-2009 Tango contributors. 5 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 6 All rights reserved. 7 License: 8 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 9 See LICENSE_TANGO.txt for details. 10 *******************************************************************************/ 11 12 13 module ocean.util.container.more.HashFile; 14 15 import ocean.io.device.FileMap : MappedFile; 16 17 /****************************************************************************** 18 19 HashFile implements a simple mechanism to store and recover a 20 large quantity of data for the duration of the hosting process. 21 It is intended to act as a local-cache for a remote data-source, 22 or as a spillover area for large in-memory cache instances. 23 24 Note that any and all stored data is rendered invalid the moment 25 a HashFile object is garbage-collected. 26 27 The implementation follows a fixed-capacity record scheme, where 28 content can be rewritten in-place until said capacity is reached. 29 At such time, the altered content is moved to a larger capacity 30 record at end-of-file, and a hole remains at the prior location. 31 These holes are not collected, since the lifespan of a HashFile 32 is limited to that of the host process. 33 34 All index keys must be unique. Writing to the HashFile with an 35 existing key will overwrite any previous content. What follows 36 is a contrived example: 37 38 --- 39 alias HashFile!(char[], char[]) Bucket; 40 41 auto bucket = new Bucket ("bucket.bin", HashFile.HalfK); 42 43 // insert some data, and retrieve it again 44 auto text = "this is a test"; 45 bucket.put ("a key", text); 46 auto b = cast(char[]) bucket.get ("a key"); 47 48 assert (b == text); 49 bucket.close; 50 --- 51 52 ******************************************************************************/ 53 54 class HashFile(K, V) 55 { 56 /********************************************************************** 57 58 Define the capacity (block-size) of each record 59 60 **********************************************************************/ 61 62 struct BlockSize 63 { 64 int capacity; 65 } 66 67 // backing storage 68 private MappedFile file; 69 70 // memory-mapped content 71 private ubyte[] heap; 72 73 // basic capacity for each record 74 private BlockSize block; 75 76 // pointers to file records 77 private Record[K] map; 78 79 // current file size 80 private ulong fileSize; 81 82 // current file usage 83 private ulong waterLine; 84 85 // supported block sizes 86 public static const(BlockSize) EighthK = {128-1}, 87 QuarterK = {256-1}, 88 HalfK = {512-1}, 89 OneK = {1024*1-1}, 90 TwoK = {1024*2-1}, 91 FourK = {1024*4-1}, 92 EightK = {1024*8-1}, 93 SixteenK = {1024*16-1}, 94 ThirtyTwoK = {1024*32-1}, 95 SixtyFourK = {1024*64-1}; 96 97 98 /********************************************************************** 99 100 Construct a HashFile with the provided path, record-size, 101 and inital record count. The latter causes records to be 102 pre-allocated, saving a certain amount of growth activity. 103 Selecting a record size that roughly matches the serialized 104 content will limit 'thrashing'. 105 106 **********************************************************************/ 107 108 this (char[] path, BlockSize block, uint initialRecords = 100) 109 { 110 this.block = block; 111 112 // open a storage file 113 file = new MappedFile (path); 114 115 // set initial file size (cannot be zero) 116 fileSize = initialRecords * (block.capacity + 1); 117 118 // map the file content 119 heap = file.resize (fileSize); 120 } 121 122 /********************************************************************** 123 124 Return where the HashFile is located 125 126 **********************************************************************/ 127 128 final char[] path () 129 { 130 return file.path; 131 } 132 133 /********************************************************************** 134 135 Return the currently populated size of this HashFile 136 137 **********************************************************************/ 138 139 final ulong length () 140 { 141 return waterLine; 142 } 143 144 /********************************************************************** 145 146 Return the serialized data for the provided key. Returns 147 null if the key was not found. 148 149 Be sure to synchronize access by multiple threads 150 151 **********************************************************************/ 152 153 final V get (K key, bool clear = false) 154 { 155 auto p = key in map; 156 157 if (p) 158 return p.read (this, clear); 159 return V.init; 160 } 161 162 /********************************************************************** 163 164 Remove the provided key from this HashFile. Leaves a 165 hole in the backing file 166 167 Be sure to synchronize access by multiple threads 168 169 **********************************************************************/ 170 171 final void remove (K key) 172 { 173 map.remove (key); 174 } 175 176 /********************************************************************** 177 178 Write a serialized block of data, and associate it with 179 the provided key. All keys must be unique, and it is the 180 responsibility of the programmer to ensure this. Reusing 181 an existing key will overwrite previous data. 182 183 Note that data is allowed to grow within the occupied 184 bucket until it becomes larger than the allocated space. 185 When this happens, the data is moved to a larger bucket 186 at the file tail. 187 188 Be sure to synchronize access by multiple threads 189 190 **********************************************************************/ 191 192 final void put (K key, V data, K function(K) retain = null) 193 { 194 auto r = key in map; 195 196 if (r) 197 r.write (this, data, block); 198 else 199 { 200 Record rr; 201 rr.write (this, data, block); 202 if (retain) 203 key = retain (key); 204 map [key] = rr; 205 } 206 } 207 208 /********************************************************************** 209 210 Close this HashFile -- all content is lost. 211 212 **********************************************************************/ 213 214 final void close () 215 { 216 if (file) 217 { 218 file.close; 219 file = null; 220 map = null; 221 } 222 } 223 224 /********************************************************************** 225 226 Each Record takes up a number of 'pages' within the file. 227 The size of these pages is determined by the BlockSize 228 provided during HashFile construction. Additional space 229 at the end of each block is potentially wasted, but enables 230 content to grow in size without creating a myriad of holes. 231 232 **********************************************************************/ 233 234 private struct Record 235 { 236 private ulong offset; 237 private int used, 238 capacity = -1; 239 240 /************************************************************** 241 242 This should be protected from thread-contention at 243 a higher level. 244 245 **************************************************************/ 246 247 V read (HashFile bucket, bool clear) 248 { 249 if (used) 250 { 251 auto ret = cast(V) bucket.heap [offset .. offset + used]; 252 if (clear) 253 used = 0; 254 return ret; 255 } 256 return V.init; 257 } 258 259 /************************************************************** 260 261 This should be protected from thread-contention at 262 a higher level. 263 264 **************************************************************/ 265 266 void write (HashFile bucket, V data, BlockSize block) 267 { 268 this.used = data.length; 269 270 // create new slot if we exceed capacity 271 if (this.used > this.capacity) 272 createBucket (bucket, this.used, block); 273 274 bucket.heap [offset .. offset+used] = cast(ubyte[]) data; 275 } 276 277 /************************************************************** 278 279 **************************************************************/ 280 281 void createBucket (HashFile bucket, int bytes, BlockSize block) 282 { 283 this.offset = bucket.waterLine; 284 this.capacity = (bytes + block.capacity) & ~block.capacity; 285 286 bucket.waterLine += this.capacity; 287 if (bucket.waterLine > bucket.fileSize) 288 { 289 auto target = bucket.waterLine * 2; 290 debug(HashFile) 291 printf ("growing file from %lld, %lld, to %lld\n", 292 bucket.fileSize, bucket.waterLine, target); 293 294 // expand the physical file size and remap the heap 295 bucket.heap = bucket.file.resize (bucket.fileSize = target); 296 } 297 } 298 } 299 } 300 301 302 /****************************************************************************** 303 304 ******************************************************************************/ 305 306 debug (HashFile) 307 { 308 import core.stdc.stdio; 309 310 import ocean.io.Path; 311 import ocean.io.Stdout; 312 import ocean.text.convert.Integer_tango; 313 314 void main() 315 { 316 alias HashFile!(char[], char[]) Bucket; 317 318 auto file = new Bucket ("foo.map", Bucket.QuarterK, 1); 319 320 char[16] tmp; 321 for (int i=1; i < 1024; ++i) 322 file.put (format(tmp, i).dup, "blah"); 323 324 auto s = file.get ("1", true); 325 if (s.length) 326 Stdout.formatln ("result '{}'", s); 327 s = file.get ("1"); 328 if (s.length) 329 Stdout.formatln ("result '{}'", s); 330 file.close; 331 remove ("foo.map"); 332 } 333 }