1 /*******************************************************************************
2 
3     An integer, binary logarithmic statistics counter
4 
5     Collects the following statistical information:
6     - a binary logarithmic histogram with bins from ≥1 to <2^MaxPow2
7       (see template documentation, below), one bin per power of two, plus one
8       bin for each 0 and ≥2^MaxPow2,
9     - the total number of entries and the aggregated total amount.
10 
11     To reset all counters to zero use `BinaryHistogram.init`.
12 
13     Copyright:
14         Copyright (c) 2017-2018 dunnhumby Germany GmbH. All rights reserved.
15 
16     License:
17         Boost Software License Version 1.0. See LICENSE.txt for details.
18 
19 *******************************************************************************/
20 
21 module ocean.math.BinaryHistogram;
22 
23 import ocean.meta.types.Qualifiers;
24 import ocean.core.Verify;
25 
26 /*******************************************************************************
27 
28     An integer, binary logarithmic statistics counter
29 
30     Params:
31         MaxPow2 = the maximum power of two that is tracked in the lower bins of
32             the counter. Any values >= 2^MaxPow2 will be aggregated in the upper
33             bin.
34         Suffix = suffix for the names of the fields of the Bins accessor struct
35 
36 *******************************************************************************/
37 
38 public struct BinaryHistogram ( uint MaxPow2, istring Suffix = "" )
39 {
40     // Values above the range of ulong are not supported.
41     static assert(MaxPow2 < 64,
42         "Buckets for 2^64-bit values or larger unsupported");
43 
44     import core.bitop: bsr;
45 
46     /// The number of bins. (The specified number of powers of two, plus one bin
47     /// for 0B and one for ≥1<<MaxPow2.)
48     private enum NumBins = MaxPow2 + 2;
49 
50     /// The aggregated total from all calls to `add()`.
51     public ulong total;
52 
53     /// The total number of calls to `add()`.
54     public uint count;
55 
56     /***************************************************************************
57 
58         The bins of the byte count histogram. Given a number, the index i of the
59         bin to increment is calculated as follows:
60 
61         - If 1 ≤ n and n < 2^MaxPow2: i = floor(log_2(n)) + 1,
62         - otherwise, if t < 1: i = 0,
63         - otherwise, t ≥ 2^MaxPow2: i = NumBins - 1.
64 
65         For example, for MaxPow2 == 16, the following bins exist:
66         0: 0,
67         1:       1, 2:  2 -   3, 3:   4 -   7, 4:   8 -  15,  5:  16 -   31,
68         6: 32 - 63, 7: 64 - 127, 8: 128 - 255, 9: 256 - 511, 10: 512 - 1023,
69         11: 1ki -  (2ki-1), 12:  2ki -  (4ki-1), 13:  4ki -  (8ki-1),
70         14: 8ki - (16ki-1), 15: 16ki - (32ki-1), 16: 32ki - (64ki-1),
71         17: 64ki - ∞
72 
73     ***************************************************************************/
74 
75     private uint[NumBins] bins;
76 
77     /***************************************************************************
78 
79         Struct with one uint field per bin (see this.bins), named as follows
80         (assuming Suffix == ""):
81             from_0, from_1, from_2, from_4, from_8,
82             from_16, ...,
83             from_1Ki, from_2Ki, ...,
84             etc
85 
86         Useful, for example, for logging the whole histogram. An instance of
87         this struct is returned by `stats()`.
88 
89     ***************************************************************************/
90 
91     private struct Bins
92     {
93         import CTFE = ocean.meta.codegen.CTFE : toString;
94 
95         /***********************************************************************
96 
97             Interprets the passed bins array as a Bins instance.
98 
99             Params:
100                 array = bins array to reinterpret
101 
102             Returns:
103                 the passed bins array as a Bins instance
104 
105         ***********************************************************************/
106 
107         public static Bins fromArray ( typeof(BinaryHistogram.bins) array )
108         {
109             return *(cast(Bins*)array.ptr);
110         }
111 
112         /***********************************************************************
113 
114             Sanity check that the offset of the fields of this struct match the
115             offsets of the elements of a BinaryHistogram.bins array. (i.e.
116             that the fromArray() function can work as intended.)
117 
118         ***********************************************************************/
119 
120         static assert(fieldOffsetsCorrect());
121 
122         /***********************************************************************
123 
124             Returns:
125                 true if the offset of the fields of this struct match the
126                 offsets of the elements of a BinaryHistogram.bins array
127 
128         ***********************************************************************/
129 
130         private static bool fieldOffsetsCorrect ( )
131         {
132             foreach ( i, field; typeof(Bins.tupleof) )
133                 if ( Bins.tupleof[i].offsetof != i * BinaryHistogram.init.bins[i].sizeof )
134                     return false;
135             return true;
136         }
137 
138         /***********************************************************************
139 
140             CTFE generator of the fields of this struct.
141 
142             Returns:
143                 code for the series of fields for bins up to the specified
144                 maximum power of 2. (See unittest for examples.)
145 
146         ***********************************************************************/
147 
148         private static istring divisionBinVariables ( )
149         {
150             enum type = typeof(BinaryHistogram.bins[0]).stringof;
151 
152             istring res;
153 
154             istring formatCount ( ulong count )
155             {
156                 enum prefixes = [""[], "Ki", "Mi", "Gi", "Ti", "Pi", "Ei"];
157 
158                 uint prefix;
159                 while ( (prefix < prefixes.length - 1) && count >= 1024 )
160                 {
161                     count /= 1024;
162                     prefix++;
163                 }
164                 return CTFE.toString(count) ~ prefixes[prefix] ~ Suffix;
165             }
166 
167             res ~= type ~ " from_" ~ formatCount(0) ~ ";";
168 
169             for ( size_t power = 0; power <= MaxPow2; power++ )
170                 res ~= type ~ " from_" ~ formatCount(1UL << power) ~ ";";
171 
172             return res;
173         }
174 
175         /// Fields.
176         mixin(divisionBinVariables());
177     }
178 
179     /// The number of fields of Bins must equal the length of the fixed-length
180     /// array this.bins.
181     static assert(Bins.tupleof.length == bins.length);
182 
183     /***************************************************************************
184 
185         Adds the specified value to the histogram, incrementing the
186         corresponding bin and the total number of transactions and adding `n` to
187         the total count.
188 
189         Params:
190             n = the value to add to the histogram
191 
192         Returns:
193             n
194 
195     ***************************************************************************/
196 
197     public ulong add ( ulong n )
198     {
199         this.count++;
200         this.total += n;
201         this.bins[n? (n < (1UL << MaxPow2))? bsr(n) + 1 : $ - 1 : 0]++;
202         return n;
203     }
204 
205     /***************************************************************************
206 
207         Returns:
208             the mean value over all calls to `add`.
209             May be NaN if this.count == 0.
210 
211     ***************************************************************************/
212 
213     public double mean ( )
214     {
215         verify(this.count || !this.total);
216 
217         return this.total / cast(double)this.count;
218     }
219 
220     /***************************************************************************
221 
222         Gets the count of transactions in the specified bin.
223 
224         Params:
225             bin_name = string name of the bin to get the count for. Must match
226                 the name of one of the fields of Bins
227 
228         Returns:
229             the number of transactions in the specified bin
230 
231     ***************************************************************************/
232 
233     public ulong countFor ( istring bin_name ) ( )
234     {
235         mixin("static assert(is(typeof(Bins.init." ~ bin_name ~ ")));");
236 
237         mixin("const offset = Bins.init." ~ bin_name ~ ".offsetof;");
238         enum index = offset / this.bins[0].sizeof;
239         return this.bins[index];
240     }
241 
242     /***************************************************************************
243 
244         Returns:
245             the complete histogram as a Bins struct
246 
247     ***************************************************************************/
248 
249     public Bins stats ( )
250     {
251         return Bins.fromArray(this.bins);
252     }
253 }
254 
255 version (unittest)
256 {
257     import ocean.core.Test;
258     import ocean.util.log.Stats;
259 }
260 
261 ///
262 unittest
263 {
264     // Histogram of bytes (suffix B) with buckets up to 1MiB.
265     BinaryHistogram!(20, "B") hist;
266 
267     // Count some transactions.
268     hist.add(12);
269     hist.add(0);
270     hist.add(0);
271     hist.add(1_000_000);
272 
273     // Get the stats from the histogram.
274     auto stats = hist.stats();
275 
276     // Extract counts from individual bins.
277     auto zeros = stats.from_0B;
278     auto too_big = stats.from_1MiB;
279 
280     // Log compete histogram stats.
281     void logHistogram ( StatsLog stats_log )
282     {
283         stats_log.add(stats);
284     }
285 }
286 
287 // Tests for divisionBinVariables function.
288 unittest
289 {
290     test!("==")(BinaryHistogram!(0, "B").Bins.divisionBinVariables(),
291         "uint from_0B;uint from_1B;");
292     test!("==")(BinaryHistogram!(16, "B").Bins.divisionBinVariables(),
293         "uint from_0B;uint from_1B;uint from_2B;uint from_4B;uint from_8B;uint from_16B;uint from_32B;uint from_64B;uint from_128B;uint from_256B;uint from_512B;uint from_1KiB;uint from_2KiB;uint from_4KiB;uint from_8KiB;uint from_16KiB;uint from_32KiB;uint from_64KiB;");
294 }
295 
296 // Corner-case test: MaxPow2 == 0
297 unittest
298 {
299     BinaryHistogram!(0) hist;
300     hist.add(ulong.max);
301     test!("==")(hist.stats.from_0, 0);
302     test!("==")(hist.stats.from_1, 1);
303 }
304 
305 // Corner-case test: MaxPow2 == 63
306 unittest
307 {
308     BinaryHistogram!(63) hist;
309     hist.add(ulong.max);
310     test!("==")(hist.stats.from_0, 0);
311     test!("==")(hist.stats.from_8Ei, 1);
312 }
313 
314 // Corner-case test: MaxPow2 > 63 does not compile
315 unittest
316 {
317     static assert(!is(typeof({ BinaryHistogram!(64) hist; })));
318 }
319 
320 unittest
321 {
322     BinaryHistogram!(16, "B") hist;
323 
324     // Tests if hist.count is `expected` and matches the sum of all bin
325     // counters.
326     void checkBinSum ( uint expected, istring f = __FILE__, int ln = __LINE__ )
327     {
328         test!("==")(hist.count, expected, f, ln);
329         uint sum = 0;
330         foreach (bin; hist.bins)
331             sum += bin;
332         test!("==")(sum, hist.count, f, ln);
333     }
334 
335     // 0 Bytes: Should increment bins[0] and `count` to 1 and leave
336     // `total == 0`. All other bins should remain 0.
337     hist.add(0);
338     test!("==")(hist.total, 0);
339     checkBinSum(1);
340     test!("==")(hist.bins[0], 1);
341     test!("==")(hist.stats.from_0B, 1);
342     test!("==")(hist.countFor!("from_0B"), 1);
343     foreach (bin; hist.bins[1 .. $])
344         test!("==")(bin, 0);
345 
346     // 1500 Bytes: Should increment bins[11] to 1, `count` to 2 and `total` to
347     // 1500. bins[0] should stay at 1. All other bins should remain 0.
348     hist.add(1500);
349     test!("==")(hist.total, 1500);
350     checkBinSum(2);
351     test!("==")(hist.bins[0], 1);
352     test!("==")(hist.stats.from_0B, 1);
353     test!("==")(hist.countFor!("from_0B"), 1);
354     test!("==")(hist.bins[11], 1);
355     test!("==")(hist.stats.from_1KiB, 1);
356     test!("==")(hist.countFor!("from_1KiB"), 1);
357     foreach (i, bin; hist.bins)
358     {
359         switch (i)
360         {
361             default:
362                 test!("==")(hist.bins[i], 0);
363                 goto case;
364             case 0, 11:
365         }
366     }
367 
368     // 1,234,567,890 (more than 65,535) Bytes: Should increment bins[$ - 1]
369     // to 1, `count` to 3 and `total` to 1500 + 1,234,567,890. bins[0] and
370     // bins[11] should stay at 1. All other bins should remain 0.
371     hist.add(1_234_567_890);
372     test!("==")(hist.total, 1_234_567_890 + 1500);
373     checkBinSum(3);
374     test!("==")(hist.bins[0], 1);
375     test!("==")(hist.stats.from_0B, 1);
376     test!("==")(hist.countFor!("from_0B"), 1);
377     test!("==")(hist.bins[11], 1);
378     test!("==")(hist.stats.from_1KiB, 1);
379     test!("==")(hist.countFor!("from_1KiB"), 1);
380     test!("==")(hist.bins[$ - 1], 1);
381     test!("==")(hist.stats.from_64KiB, 1);
382     test!("==")(hist.countFor!("from_64KiB"), 1);
383     foreach (i, bin; hist.bins)
384     {
385         switch (i)
386         {
387             default:
388                 test!("==")(hist.bins[i], 0);
389                 goto case;
390             case 0, 11, hist.bins.length - 1:
391         }
392     }
393 }