1 /*******************************************************************************
2 
3     Helper class useful for producing apache bench style output about value
4     distributions. For example:
5 
6     ---
7 
8         Time distribution of 10000 requests:
9          50.0% <= 234μs
10          66.0% <= 413μs
11          75.0% <= 498μs
12          80.0% <= 575μs
13          90.0% <= 754μs
14          95.0% <= 787μs
15          98.0% <= 943μs
16          99.0% <= 1054μs
17          99.5% <= 1183μs
18          99.9% <= 7755μs
19         100.0% <= 8807μs (longest request)
20 
21         146 requests (1.5%) took longer than 1000μs
22 
23     ---
24 
25     Performance note: the lessThanCount(), greaterThanCount() and percentValue()
26     methods all sort the list of values stored in the Distribution instance. In
27     general it is thus best to add all the values you're interested in, then
28     call the results methods, so the list only needs to be sorted once.
29 
30     Usage example:
31 
32     ---
33 
34         import ocean.math.Distribution;
35 
36         import ocean.io.Stdout;
37 
38         import ocean.time.StopWatch;
39 
40         // Stopwatch instance.
41         StopWatch sw;
42 
43         // Create a distribution instance initialised to contain 10_000 values.
44         // (The size can be extended, but it's set initially for the sake of
45         // pre-allocation.)
46         const num_requests = 10_000;
47         auto dist = new Distribution!(ulong)(num_requests);
48 
49         // Perform a series of imaginary requests, timing each one and adding
50         // the time value to the distribution
51         for ( int i; i < num_requests; i++ )
52         {
53             sw.start;
54             doRequest();
55             auto time = sw.microsec;
56 
57             dist ~= time;
58         }
59 
60         // Display the times taken by 50%, 66% etc of the requests.
61         // (This produces output like apache bench.)
62         const percentages = [0.5, 0.66, 0.75, 0.8, 0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 1];
63 
64         foreach ( i, percentage; percentages )
65         {
66             auto value = dist.percentValue(percentage);
67 
68             Stdout.formatln("{,5:1}% <= {}μs", percentage * 100, value);
69         }
70 
71         // Display the number of requests which took longer than 1ms.
72         const timeout = 1_000; // 1ms
73         auto timed_out = dist.greaterThanCount(timeout);
74 
75         Stdout.formatln("{} requests ({,3:1}%) took longer than {}μs",
76                 timed_out,
77                 (cast(float)timed_out / cast(float)dist.length) * 100.0,
78                 timeout);
79 
80         // Clear distribution ready for next test.
81         dist.clear;
82 
83     ---
84 
85     Copyright:
86         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
87         All rights reserved.
88 
89     License:
90         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
91         Alternatively, this file may be distributed under the terms of the Tango
92         3-Clause BSD License (see LICENSE_BSD.txt for details).
93 
94 *******************************************************************************/
95 
96 module ocean.math.Distribution;
97 
98 
99 
100 
101 import ocean.meta.types.Qualifiers;
102 
103 import ocean.core.Enforce;
104 import ocean.core.Array : bsearch, sort;
105 
106 import ocean.util.container.AppendBuffer;
107 
108 version (unittest) import ocean.core.Test;
109 
110 
111 /*******************************************************************************
112 
113     Class to report on the distribution of a series of values.
114 
115     Template params:
116         T = The type of the values in the distribution
117 
118 *******************************************************************************/
119 
120 public class Distribution ( T )
121 {
122     /***************************************************************************
123 
124         List of values, appended to by the opOpAssign!"~" method.
125 
126     ***************************************************************************/
127 
128     private AppendBuffer!(T) values;
129 
130 
131     /***************************************************************************
132 
133         Indicates whether the list of values has been sorted (which is required
134         by the methods: lessThanCount, greaterThanCount, percentValue).
135         opOpAssign() and clear() reset the sorted flag to false.
136 
137         TODO: it might be better to always maintain the list in sorted order?
138 
139     ***************************************************************************/
140 
141     private bool sorted;
142 
143 
144     /***************************************************************************
145 
146         Constructor.
147 
148         Params:
149             num_values = initial size of list (for pre-allocation)
150 
151     ***************************************************************************/
152 
153     public this ( size_t num_values = 0 )
154     {
155         this.values = new AppendBuffer!(T)(num_values);
156     }
157 
158 
159     /***************************************************************************
160 
161         Adds a value to the list.
162 
163         Params:
164             op = operation to perform
165             value = value to add
166 
167     ***************************************************************************/
168 
169     public void opOpAssign ( string op ) ( T value ) if (op == "~")
170     {
171         this.values.append(value);
172         this.sorted = false;
173     }
174 
175 
176     /***************************************************************************
177 
178         Clears all values from the list.
179 
180     ***************************************************************************/
181 
182     public void clear ( )
183     {
184         this.values.length = 0;
185         this.sorted = false;
186     }
187 
188 
189     /***************************************************************************
190 
191         Returns:
192             the number of values in the list
193 
194         Note: aliased as length.
195 
196     ***************************************************************************/
197 
198     public ulong count ( )
199     {
200         return this.values.length;
201     }
202 
203     public alias count length;
204 
205 
206     /***************************************************************************
207 
208         Gets the count of values in the list which are less than the specified
209         value.
210 
211         Params:
212             max = value to count less than
213 
214         Returns:
215             number of values less than max
216 
217     ***************************************************************************/
218 
219     public size_t lessThanCount ( T max )
220     {
221         if ( this.values.length == 0 )
222         {
223             return 0;
224         }
225 
226         this.sort;
227 
228         size_t less;
229         bsearch(this.values[], max, less);
230         return less;
231     }
232 
233 
234     /***************************************************************************
235 
236         Gets the count of values in the list which are greater than the
237         specified value.
238 
239         Params:
240             min = value to count greater than
241 
242         Returns:
243             number of values greater than min
244 
245     ***************************************************************************/
246 
247     public size_t greaterThanCount ( T min )
248     {
249         auto less = this.lessThanCount(min);
250         return this.values.length - less;
251     }
252 
253 
254     /***************************************************************************
255 
256         Gets the value which X% of the values in the list are less than or equal
257         to.
258 
259         For example, if values contains [1, 2, 3, 4, 5, 6, 7, 8], then
260         percentValue(0.5) returns 4, while percentValue(0.25) returns 2, and
261         percentValue(1.0) returns 8.
262 
263         Throws an error if passed a value <= 0 or >= 1.
264 
265         Params:
266             fraction = percentage as a fraction
267 
268         Returns:
269             value which X% of the values in the list are less than or equal to
270 
271     ***************************************************************************/
272 
273     public T percentValue ( double fraction )
274     out ( result )
275     {
276         if ( this.values.length == 0 )
277         {
278             assert(result == 0, "percentValue should be 0 for empty distributions");
279         }
280     }
281     do
282     {
283         enforce(fraction >= 0.0 && fraction <= 1.0,
284             "fraction must be within [0.0 ... 1.0]");
285 
286         if ( this.values.length == 0 )
287         {
288             return 0;
289         }
290 
291         this.sort;
292 
293         auto index = cast(size_t)(fraction * (this.values.length - 1));
294 
295         return this.values[index];
296     }
297 
298 
299     unittest
300     {
301         // test with ulong
302         percentValueTests!(ulong)([1, 2, 3, 4, 5, 6, 7, 8], 4);
303 
304         // test with odd amount of numbers
305         percentValueTests!(ulong)([1, 2, 3, 4, 5, 6, 7, 8, 9], 5);
306 
307         // test with signed int
308         percentValueTests!(int)([-8, -7, -6, -5, -4, -3, -2, -1], -5);
309 
310         // test with double
311         percentValueTests!(double)([1.5, 2.5, 3.5, 4.5, 5.5, 7.5, 8.5], 4.5);
312     }
313 
314 
315     /***************************************************************************
316 
317         Calculates the mean (average) value of this distribution
318 
319         Returns:
320             The average of the values contained in this distribution
321 
322     ***************************************************************************/
323 
324     public double mean ( )
325     {
326         if ( this.values.length == 0 )
327         {
328             return 0;
329         }
330 
331         double total = 0.0;
332 
333         for ( int i = 0; i < this.values.length; i++ )
334         {
335             total += this.values[i];
336         }
337 
338         return total / this.values.length;
339     }
340 
341 
342     unittest
343     {
344         // test with ulong
345         meanTests!(ulong)([2, 3, 4, 5, 6], 4);
346 
347         // test with signed int
348         meanTests!(int)([-2, -3, -4, -5, -6], -4);
349 
350         // test with double
351         meanTests!(double)([2.4, 5.0, 7.6], 5.0);
352     }
353 
354 
355     /***************************************************************************
356 
357         Calculates the median value of this distribution
358         For an odd number of values, the middle value is returned
359         For an even number, the average of the 2 middle values is returned
360 
361         Returns:
362             The median of the values contained in this distribution
363 
364     ***************************************************************************/
365 
366     public double median ( )
367     {
368         if ( this.values.length == 0 )
369         {
370             return 0;
371         }
372 
373         this.sort;
374 
375         auto count = this.values.length;
376         double median;
377 
378         if ( count % 2 == 0 )
379         {
380             double lval = this.values[( count / 2 ) - 1];
381             double rval = this.values[count / 2];
382             median = ( lval + rval ) / 2;
383         }
384         else
385         {
386             median = this.values[count / 2];
387         }
388 
389         return median;
390     }
391 
392 
393     unittest
394     {
395         // test with ulong
396         medianTests!(ulong)([2, 3, 4, 5, 6], 4);
397 
398         // test with even amount of numbers
399         medianTests!(ulong)([2, 3, 4, 5, 6, 7], 4.5);
400 
401         // test with signed int
402         medianTests!(int)([-2, -3, -4, -5, -6], -4);
403 
404         // test with double
405         medianTests!(double)([2.4, 5.0, 7.6], 5.0);
406     }
407 
408 
409     /***************************************************************************
410 
411         Sorts the values in the list, if they are not already sorted.
412 
413     ***************************************************************************/
414 
415     private void sort ( )
416     {
417         if ( !this.sorted )
418         {
419             .sort(this.values[]);
420             this.sorted = true;
421         }
422     }
423 }
424 
425 unittest
426 {
427     alias Distribution!(size_t) Instance;
428 }
429 
430 
431 /*******************************************************************************
432 
433     Functions that are common to the unittests in this module
434     These functions are template functions, so they will not
435     generate any code unless compiled with -unittest.
436 
437 *******************************************************************************/
438 
439 
440 
441 /*******************************************************************************
442 
443     Appends the given list of values to the given distribution
444 
445     Template params:
446         T = the type used by the distribution
447 
448     Params:
449         dist = the distribution to append to
450         values = the values to put into the distribution
451 
452 *******************************************************************************/
453 
454 private void appendDist ( T ) ( Distribution!(T) dist, T[] values )
455 {
456     foreach ( val; values )
457     {
458         dist ~= val;
459     }
460 }
461 
462 
463 /*******************************************************************************
464 
465     Tests if an error was caught or not when calling the given delegate
466 
467     Template params:
468         dummy = dummy template parameter to avoid generating
469             code for this function if it is not used
470 
471     Params:
472         dg = a delegate that calls an error throwing function
473         error = whether an error should have been thrown or not
474         msg = the error message should the test fail
475 
476 *******************************************************************************/
477 
478 private void testForError ( bool dummy = false )
479                           ( scope void delegate ( ) dg, bool error, istring msg )
480 {
481     bool caught = false;
482 
483     try
484     {
485         dg();
486     }
487     catch ( Exception e )
488     {
489         caught = true;
490     }
491 
492     test(error == caught, "Error " ~ (!error ? "not " : "")  ~ "expected: " ~ msg);
493 }
494 
495 
496 /*******************************************************************************
497 
498     Runs a standard set of percentValue tests on the given distribution
499     Tests will be checked against the given expected middle value
500 
501     Template params:
502         T = the type used by the distribution
503 
504     Params:
505         values = the values to test a distribution of
506         middle_value = the expected middle value to check against
507 
508 *******************************************************************************/
509 
510 private void percentValueTests ( T ) ( T[] values, T middle_value )
511 {
512     auto dist = new Distribution!(T);
513 
514     // test that percentValue always returns 0 for empty distributions regardless of type
515     test!("==")(dist.percentValue(0.25), 0, "percentValue should always return 0 for an empty distribution");
516 
517     appendDist!(T)(dist, values);
518 
519     // test that exceeding the boundaries throws an error
520     testForError({ dist.percentValue(-1.0); }, true, "fraction < 0 is out of bounds");
521     testForError({ dist.percentValue(2.0); }, true, "fraction > 1 is out of bounds");
522 
523     // test middle value
524     test!("==")(dist.percentValue(0.5), middle_value);
525 }
526 
527 
528 /*******************************************************************************
529  *
530     Runs a standard set of mean tests on the given distribution
531     Tests will be checked against the given expected average value
532 
533     Template params:
534         T = the type used by the distribution
535 
536     Params:
537         values = the values to test a distribution of
538         average_value = the expected average value to check against
539 
540 *******************************************************************************/
541 
542 private void meanTests ( T ) ( T[] values, T average_value )
543 {
544     auto dist = new Distribution!(T);
545 
546     // test that mean always returns 0 for empty distributions regardless of type
547     test!("==")(dist.mean(), 0, "mean should always return 0 for an empty distribution");
548 
549     appendDist!(T)(dist, values);
550 
551     // test average value
552     test!("==")(dist.mean(), average_value, "mean returned the wrong average value");
553 }
554 
555 
556 /*******************************************************************************
557  *
558     Runs a standard set of median tests on the given distribution
559     Tests will be checked against the given expected median value
560 
561     Template params:
562         T = the type used by the distribution
563 
564     Params:
565         values = the values to test a distribution of
566         median_value = the expected median value to check against
567 
568 *******************************************************************************/
569 
570 private void medianTests ( T ) ( T[] values, double median_value )
571 {
572     auto dist = new Distribution!(T);
573 
574     // test that median always returns 0 for empty distributions regardless of type
575     test!("==")(dist.median(), 0, "median should always return 0 for an empty distribution");
576 
577     appendDist!(T)(dist, values);
578 
579     // test median value
580     test!("==")(dist.median(), median_value, "median returned the wrong median value");
581 }