1 /******************************************************************************* 2 3 An integer, binary logarithmic statistics counter 4 5 Collects the following statistical information: 6 - a binary logarithmic histogram with bins from ≥1 to <2^MaxPow2 7 (see template documentation, below), one bin per power of two, plus one 8 bin for each 0 and ≥2^MaxPow2, 9 - the total number of entries and the aggregated total amount. 10 11 To reset all counters to zero use `BinaryHistogram.init`. 12 13 Copyright: 14 Copyright (c) 2017-2018 dunnhumby Germany GmbH. All rights reserved. 15 16 License: 17 Boost Software License Version 1.0. See LICENSE.txt for details. 18 19 *******************************************************************************/ 20 21 module ocean.math.BinaryHistogram; 22 23 import ocean.meta.types.Qualifiers; 24 import ocean.core.Verify; 25 26 /******************************************************************************* 27 28 An integer, binary logarithmic statistics counter 29 30 Params: 31 MaxPow2 = the maximum power of two that is tracked in the lower bins of 32 the counter. Any values >= 2^MaxPow2 will be aggregated in the upper 33 bin. 34 Suffix = suffix for the names of the fields of the Bins accessor struct 35 36 *******************************************************************************/ 37 38 public struct BinaryHistogram ( uint MaxPow2, istring Suffix = "" ) 39 { 40 // Values above the range of ulong are not supported. 41 static assert(MaxPow2 < 64, 42 "Buckets for 2^64-bit values or larger unsupported"); 43 44 import core.bitop: bsr; 45 46 /// The number of bins. (The specified number of powers of two, plus one bin 47 /// for 0B and one for ≥1<<MaxPow2.) 48 private enum NumBins = MaxPow2 + 2; 49 50 /// The aggregated total from all calls to `add()`. 51 public ulong total; 52 53 /// The total number of calls to `add()`. 54 public uint count; 55 56 /*************************************************************************** 57 58 The bins of the byte count histogram. Given a number, the index i of the 59 bin to increment is calculated as follows: 60 61 - If 1 ≤ n and n < 2^MaxPow2: i = floor(log_2(n)) + 1, 62 - otherwise, if t < 1: i = 0, 63 - otherwise, t ≥ 2^MaxPow2: i = NumBins - 1. 64 65 For example, for MaxPow2 == 16, the following bins exist: 66 0: 0, 67 1: 1, 2: 2 - 3, 3: 4 - 7, 4: 8 - 15, 5: 16 - 31, 68 6: 32 - 63, 7: 64 - 127, 8: 128 - 255, 9: 256 - 511, 10: 512 - 1023, 69 11: 1ki - (2ki-1), 12: 2ki - (4ki-1), 13: 4ki - (8ki-1), 70 14: 8ki - (16ki-1), 15: 16ki - (32ki-1), 16: 32ki - (64ki-1), 71 17: 64ki - ∞ 72 73 ***************************************************************************/ 74 75 private uint[NumBins] bins; 76 77 /*************************************************************************** 78 79 Struct with one uint field per bin (see this.bins), named as follows 80 (assuming Suffix == ""): 81 from_0, from_1, from_2, from_4, from_8, 82 from_16, ..., 83 from_1Ki, from_2Ki, ..., 84 etc 85 86 Useful, for example, for logging the whole histogram. An instance of 87 this struct is returned by `stats()`. 88 89 ***************************************************************************/ 90 91 private struct Bins 92 { 93 import CTFE = ocean.meta.codegen.CTFE : toString; 94 95 /*********************************************************************** 96 97 Interprets the passed bins array as a Bins instance. 98 99 Params: 100 array = bins array to reinterpret 101 102 Returns: 103 the passed bins array as a Bins instance 104 105 ***********************************************************************/ 106 107 public static Bins fromArray ( typeof(BinaryHistogram.bins) array ) 108 { 109 return *(cast(Bins*)array.ptr); 110 } 111 112 /*********************************************************************** 113 114 Sanity check that the offset of the fields of this struct match the 115 offsets of the elements of a BinaryHistogram.bins array. (i.e. 116 that the fromArray() function can work as intended.) 117 118 ***********************************************************************/ 119 120 static assert(fieldOffsetsCorrect()); 121 122 /*********************************************************************** 123 124 Returns: 125 true if the offset of the fields of this struct match the 126 offsets of the elements of a BinaryHistogram.bins array 127 128 ***********************************************************************/ 129 130 private static bool fieldOffsetsCorrect ( ) 131 { 132 foreach ( i, field; typeof(Bins.tupleof) ) 133 if ( Bins.tupleof[i].offsetof != i * BinaryHistogram.init.bins[i].sizeof ) 134 return false; 135 return true; 136 } 137 138 /*********************************************************************** 139 140 CTFE generator of the fields of this struct. 141 142 Returns: 143 code for the series of fields for bins up to the specified 144 maximum power of 2. (See unittest for examples.) 145 146 ***********************************************************************/ 147 148 private static istring divisionBinVariables ( ) 149 { 150 enum type = typeof(BinaryHistogram.bins[0]).stringof; 151 152 istring res; 153 154 istring formatCount ( ulong count ) 155 { 156 enum prefixes = [""[], "Ki", "Mi", "Gi", "Ti", "Pi", "Ei"]; 157 158 uint prefix; 159 while ( (prefix < prefixes.length - 1) && count >= 1024 ) 160 { 161 count /= 1024; 162 prefix++; 163 } 164 return CTFE.toString(count) ~ prefixes[prefix] ~ Suffix; 165 } 166 167 res ~= type ~ " from_" ~ formatCount(0) ~ ";"; 168 169 for ( size_t power = 0; power <= MaxPow2; power++ ) 170 res ~= type ~ " from_" ~ formatCount(1UL << power) ~ ";"; 171 172 return res; 173 } 174 175 /// Fields. 176 mixin(divisionBinVariables()); 177 } 178 179 /// The number of fields of Bins must equal the length of the fixed-length 180 /// array this.bins. 181 static assert(Bins.tupleof.length == bins.length); 182 183 /*************************************************************************** 184 185 Adds the specified value to the histogram, incrementing the 186 corresponding bin and the total number of transactions and adding `n` to 187 the total count. 188 189 Params: 190 n = the value to add to the histogram 191 192 Returns: 193 n 194 195 ***************************************************************************/ 196 197 public ulong add ( ulong n ) 198 { 199 this.count++; 200 this.total += n; 201 this.bins[n? (n < (1UL << MaxPow2))? bsr(n) + 1 : $ - 1 : 0]++; 202 return n; 203 } 204 205 /*************************************************************************** 206 207 Returns: 208 the mean value over all calls to `add`. 209 May be NaN if this.count == 0. 210 211 ***************************************************************************/ 212 213 public double mean ( ) 214 { 215 verify(this.count || !this.total); 216 217 return this.total / cast(double)this.count; 218 } 219 220 /*************************************************************************** 221 222 Gets the count of transactions in the specified bin. 223 224 Params: 225 bin_name = string name of the bin to get the count for. Must match 226 the name of one of the fields of Bins 227 228 Returns: 229 the number of transactions in the specified bin 230 231 ***************************************************************************/ 232 233 public ulong countFor ( istring bin_name ) ( ) 234 { 235 mixin("static assert(is(typeof(Bins.init." ~ bin_name ~ ")));"); 236 237 mixin("const offset = Bins.init." ~ bin_name ~ ".offsetof;"); 238 enum index = offset / this.bins[0].sizeof; 239 return this.bins[index]; 240 } 241 242 /*************************************************************************** 243 244 Returns: 245 the complete histogram as a Bins struct 246 247 ***************************************************************************/ 248 249 public Bins stats ( ) 250 { 251 return Bins.fromArray(this.bins); 252 } 253 } 254 255 version (unittest) 256 { 257 import ocean.core.Test; 258 import ocean.util.log.Stats; 259 } 260 261 /// 262 unittest 263 { 264 // Histogram of bytes (suffix B) with buckets up to 1MiB. 265 BinaryHistogram!(20, "B") hist; 266 267 // Count some transactions. 268 hist.add(12); 269 hist.add(0); 270 hist.add(0); 271 hist.add(1_000_000); 272 273 // Get the stats from the histogram. 274 auto stats = hist.stats(); 275 276 // Extract counts from individual bins. 277 auto zeros = stats.from_0B; 278 auto too_big = stats.from_1MiB; 279 280 // Log compete histogram stats. 281 void logHistogram ( StatsLog stats_log ) 282 { 283 stats_log.add(stats); 284 } 285 } 286 287 // Tests for divisionBinVariables function. 288 unittest 289 { 290 test!("==")(BinaryHistogram!(0, "B").Bins.divisionBinVariables(), 291 "uint from_0B;uint from_1B;"); 292 test!("==")(BinaryHistogram!(16, "B").Bins.divisionBinVariables(), 293 "uint from_0B;uint from_1B;uint from_2B;uint from_4B;uint from_8B;uint from_16B;uint from_32B;uint from_64B;uint from_128B;uint from_256B;uint from_512B;uint from_1KiB;uint from_2KiB;uint from_4KiB;uint from_8KiB;uint from_16KiB;uint from_32KiB;uint from_64KiB;"); 294 } 295 296 // Corner-case test: MaxPow2 == 0 297 unittest 298 { 299 BinaryHistogram!(0) hist; 300 hist.add(ulong.max); 301 test!("==")(hist.stats.from_0, 0); 302 test!("==")(hist.stats.from_1, 1); 303 } 304 305 // Corner-case test: MaxPow2 == 63 306 unittest 307 { 308 BinaryHistogram!(63) hist; 309 hist.add(ulong.max); 310 test!("==")(hist.stats.from_0, 0); 311 test!("==")(hist.stats.from_8Ei, 1); 312 } 313 314 // Corner-case test: MaxPow2 > 63 does not compile 315 unittest 316 { 317 static assert(!is(typeof({ BinaryHistogram!(64) hist; }))); 318 } 319 320 unittest 321 { 322 BinaryHistogram!(16, "B") hist; 323 324 // Tests if hist.count is `expected` and matches the sum of all bin 325 // counters. 326 void checkBinSum ( uint expected, istring f = __FILE__, int ln = __LINE__ ) 327 { 328 test!("==")(hist.count, expected, f, ln); 329 uint sum = 0; 330 foreach (bin; hist.bins) 331 sum += bin; 332 test!("==")(sum, hist.count, f, ln); 333 } 334 335 // 0 Bytes: Should increment bins[0] and `count` to 1 and leave 336 // `total == 0`. All other bins should remain 0. 337 hist.add(0); 338 test!("==")(hist.total, 0); 339 checkBinSum(1); 340 test!("==")(hist.bins[0], 1); 341 test!("==")(hist.stats.from_0B, 1); 342 test!("==")(hist.countFor!("from_0B"), 1); 343 foreach (bin; hist.bins[1 .. $]) 344 test!("==")(bin, 0); 345 346 // 1500 Bytes: Should increment bins[11] to 1, `count` to 2 and `total` to 347 // 1500. bins[0] should stay at 1. All other bins should remain 0. 348 hist.add(1500); 349 test!("==")(hist.total, 1500); 350 checkBinSum(2); 351 test!("==")(hist.bins[0], 1); 352 test!("==")(hist.stats.from_0B, 1); 353 test!("==")(hist.countFor!("from_0B"), 1); 354 test!("==")(hist.bins[11], 1); 355 test!("==")(hist.stats.from_1KiB, 1); 356 test!("==")(hist.countFor!("from_1KiB"), 1); 357 foreach (i, bin; hist.bins) 358 { 359 switch (i) 360 { 361 default: 362 test!("==")(hist.bins[i], 0); 363 goto case; 364 case 0, 11: 365 } 366 } 367 368 // 1,234,567,890 (more than 65,535) Bytes: Should increment bins[$ - 1] 369 // to 1, `count` to 3 and `total` to 1500 + 1,234,567,890. bins[0] and 370 // bins[11] should stay at 1. All other bins should remain 0. 371 hist.add(1_234_567_890); 372 test!("==")(hist.total, 1_234_567_890 + 1500); 373 checkBinSum(3); 374 test!("==")(hist.bins[0], 1); 375 test!("==")(hist.stats.from_0B, 1); 376 test!("==")(hist.countFor!("from_0B"), 1); 377 test!("==")(hist.bins[11], 1); 378 test!("==")(hist.stats.from_1KiB, 1); 379 test!("==")(hist.countFor!("from_1KiB"), 1); 380 test!("==")(hist.bins[$ - 1], 1); 381 test!("==")(hist.stats.from_64KiB, 1); 382 test!("==")(hist.countFor!("from_64KiB"), 1); 383 foreach (i, bin; hist.bins) 384 { 385 switch (i) 386 { 387 default: 388 test!("==")(hist.bins[i], 0); 389 goto case; 390 case 0, 11, hist.bins.length - 1: 391 } 392 } 393 }