1 /*******************************************************************************
2 
3     Helper class useful for producing apache bench style output about value
4     distributions. For example:
5 
6     ---
7 
8         Time distribution of 10000 requests:
9          50.0% <= 234μs
10          66.0% <= 413μs
11          75.0% <= 498μs
12          80.0% <= 575μs
13          90.0% <= 754μs
14          95.0% <= 787μs
15          98.0% <= 943μs
16          99.0% <= 1054μs
17          99.5% <= 1183μs
18          99.9% <= 7755μs
19         100.0% <= 8807μs (longest request)
20 
21         146 requests (1.5%) took longer than 1000μs
22 
23     ---
24 
25     Performance note: the lessThanCount(), greaterThanCount() and percentValue()
26     methods all sort the list of values stored in the Distribution instance. In
27     general it is thus best to add all the values you're interested in, then
28     call the results methods, so the list only needs to be sorted once.
29 
30     Usage example:
31 
32     ---
33 
34         import ocean.math.Distribution;
35 
36         import ocean.io.Stdout;
37 
38         import ocean.time.StopWatch;
39 
40         // Stopwatch instance.
41         StopWatch sw;
42 
43         // Create a distribution instance initialised to contain 10_000 values.
44         // (The size can be extended, but it's set initially for the sake of
45         // pre-allocation.)
46         const num_requests = 10_000;
47         auto dist = new Distribution!(ulong)(num_requests);
48 
49         // Perform a series of imaginary requests, timing each one and adding
50         // the time value to the distribution
51         for ( int i; i < num_requests; i++ )
52         {
53             sw.start;
54             doRequest();
55             auto time = sw.microsec;
56 
57             dist ~= time;
58         }
59 
60         // Display the times taken by 50%, 66% etc of the requests.
61         // (This produces output like apache bench.)
62         const percentages = [0.5, 0.66, 0.75, 0.8, 0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 1];
63 
64         foreach ( i, percentage; percentages )
65         {
66             auto value = dist.percentValue(percentage);
67 
68             Stdout.formatln("{,5:1}% <= {}μs", percentage * 100, value);
69         }
70 
71         // Display the number of requests which took longer than 1ms.
72         const timeout = 1_000; // 1ms
73         auto timed_out = dist.greaterThanCount(timeout);
74 
75         Stdout.formatln("{} requests ({,3:1}%) took longer than {}μs",
76                 timed_out,
77                 (cast(float)timed_out / cast(float)dist.length) * 100.0,
78                 timeout);
79 
80         // Clear distribution ready for next test.
81         dist.clear;
82 
83     ---
84 
85     Copyright:
86         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
87         All rights reserved.
88 
89     License:
90         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
91         Alternatively, this file may be distributed under the terms of the Tango
92         3-Clause BSD License (see LICENSE_BSD.txt for details).
93 
94 *******************************************************************************/
95 
96 module ocean.math.Distribution;
97 
98 
99 
100 
101 import ocean.transition;
102 
103 import ocean.core.Enforce;
104 import ocean.core.Array : bsearch, sort;
105 
106 import ocean.util.container.AppendBuffer;
107 
108 version(UnitTest) import ocean.core.Test;
109 
110 
111 /*******************************************************************************
112 
113     Class to report on the distribution of a series of values.
114 
115     Template params:
116         T = The type of the values in the distribution
117 
118 *******************************************************************************/
119 
120 public class Distribution ( T )
121 {
122     /***************************************************************************
123 
124         List of values, appended to by the opAddAssign method.
125 
126     ***************************************************************************/
127 
128     private AppendBuffer!(T) values;
129 
130 
131     /***************************************************************************
132 
133         Indicates whether the list of values has been sorted (which is required
134         by the methods: lessThanCount, greaterThanCount, percentValue).
135         opAddAssign() and clear() reset the sorted flag to false.
136 
137         TODO: it might be better to always maintain the list in sorted order?
138 
139     ***************************************************************************/
140 
141     private bool sorted;
142 
143 
144     /***************************************************************************
145 
146         Constructor.
147 
148         Params:
149             num_values = initial size of list (for pre-allocation)
150 
151     ***************************************************************************/
152 
153     public this ( size_t num_values = 0 )
154     {
155         this.values = new AppendBuffer!(T)(num_values);
156     }
157 
158 
159     /***************************************************************************
160 
161         Adds a value to the list.
162 
163         Params:
164             value = value to add
165 
166     ***************************************************************************/
167 
168     public void opCatAssign ( T value )
169     {
170         this.values.append(value);
171         this.sorted = false;
172     }
173 
174 
175     /***************************************************************************
176 
177         Clears all values from the list.
178 
179     ***************************************************************************/
180 
181     public void clear ( )
182     {
183         this.values.length = 0;
184         this.sorted = false;
185     }
186 
187 
188     /***************************************************************************
189 
190         Returns:
191             the number of values in the list
192 
193         Note: aliased as length.
194 
195     ***************************************************************************/
196 
197     public ulong count ( )
198     {
199         return this.values.length;
200     }
201 
202     public alias count length;
203 
204 
205     /***************************************************************************
206 
207         Gets the count of values in the list which are less than the specified
208         value.
209 
210         Params:
211             max = value to count less than
212 
213         Returns:
214             number of values less than max
215 
216     ***************************************************************************/
217 
218     public size_t lessThanCount ( T max )
219     {
220         if ( this.values.length == 0 )
221         {
222             return 0;
223         }
224 
225         this.sort;
226 
227         size_t less;
228         bsearch(this.values[], max, less);
229         return less;
230     }
231 
232 
233     /***************************************************************************
234 
235         Gets the count of values in the list which are greater than the
236         specified value.
237 
238         Params:
239             min = value to count greater than
240 
241         Returns:
242             number of values greater than min
243 
244     ***************************************************************************/
245 
246     public size_t greaterThanCount ( T min )
247     {
248         auto less = this.lessThanCount(min);
249         return this.values.length - less;
250     }
251 
252 
253     /***************************************************************************
254 
255         Gets the value which X% of the values in the list are less than or equal
256         to.
257 
258         For example, if values contains [1, 2, 3, 4, 5, 6, 7, 8], then
259         percentValue(0.5) returns 4, while percentValue(0.25) returns 2, and
260         percentValue(1.0) returns 8.
261 
262         Throws an error if passed a value <= 0 or >= 1.
263 
264         Params:
265             fraction = percentage as a fraction
266 
267         Returns:
268             value which X% of the values in the list are less than or equal to
269 
270     ***************************************************************************/
271 
272     public T percentValue ( double fraction )
273     out ( result )
274     {
275         if ( this.values.length == 0 )
276         {
277             assert(result == 0, "percentValue should be 0 for empty distributions");
278         }
279     }
280     body
281     {
282         enforce(fraction >= 0.0 && fraction <= 1.0,
283             "fraction must be within [0.0 ... 1.0]");
284 
285         if ( this.values.length == 0 )
286         {
287             return 0;
288         }
289 
290         this.sort;
291 
292         auto index = cast(size_t)(fraction * (this.values.length - 1));
293 
294         return this.values[index];
295     }
296 
297 
298     unittest
299     {
300         // test with ulong
301         percentValueTests!(ulong)([1, 2, 3, 4, 5, 6, 7, 8], 4);
302 
303         // test with odd amount of numbers
304         percentValueTests!(ulong)([1, 2, 3, 4, 5, 6, 7, 8, 9], 5);
305 
306         // test with signed int
307         percentValueTests!(int)([-8, -7, -6, -5, -4, -3, -2, -1], -5);
308 
309         // test with double
310         percentValueTests!(double)([1.5, 2.5, 3.5, 4.5, 5.5, 7.5, 8.5], 4.5);
311     }
312 
313 
314     /***************************************************************************
315 
316         Calculates the mean (average) value of this distribution
317 
318         Returns:
319             The average of the values contained in this distribution
320 
321     ***************************************************************************/
322 
323     public double mean ( )
324     {
325         if ( this.values.length == 0 )
326         {
327             return 0;
328         }
329 
330         double total = 0.0;
331 
332         for ( int i = 0; i < this.values.length; i++ )
333         {
334             total += this.values[i];
335         }
336 
337         return total / this.values.length;
338     }
339 
340 
341     unittest
342     {
343         // test with ulong
344         meanTests!(ulong)([2, 3, 4, 5, 6], 4);
345 
346         // test with signed int
347         meanTests!(int)([-2, -3, -4, -5, -6], -4);
348 
349         // test with double
350         meanTests!(double)([2.4, 5.0, 7.6], 5.0);
351     }
352 
353 
354     /***************************************************************************
355 
356         Calculates the median value of this distribution
357         For an odd number of values, the middle value is returned
358         For an even number, the average of the 2 middle values is returned
359 
360         Returns:
361             The median of the values contained in this distribution
362 
363     ***************************************************************************/
364 
365     public double median ( )
366     {
367         if ( this.values.length == 0 )
368         {
369             return 0;
370         }
371 
372         this.sort;
373 
374         auto count = this.values.length;
375         double median;
376 
377         if ( count % 2 == 0 )
378         {
379             double lval = this.values[( count / 2 ) - 1];
380             double rval = this.values[count / 2];
381             median = ( lval + rval ) / 2;
382         }
383         else
384         {
385             median = this.values[count / 2];
386         }
387 
388         return median;
389     }
390 
391 
392     unittest
393     {
394         // test with ulong
395         medianTests!(ulong)([2, 3, 4, 5, 6], 4);
396 
397         // test with even amount of numbers
398         medianTests!(ulong)([2, 3, 4, 5, 6, 7], 4.5);
399 
400         // test with signed int
401         medianTests!(int)([-2, -3, -4, -5, -6], -4);
402 
403         // test with double
404         medianTests!(double)([2.4, 5.0, 7.6], 5.0);
405     }
406 
407 
408     /***************************************************************************
409 
410         Sorts the values in the list, if they are not already sorted.
411 
412     ***************************************************************************/
413 
414     private void sort ( )
415     {
416         if ( !this.sorted )
417         {
418             .sort(this.values[]);
419             this.sorted = true;
420         }
421     }
422 }
423 
424 unittest
425 {
426     alias Distribution!(size_t) Instance;
427 }
428 
429 
430 /*******************************************************************************
431 
432     Functions that are common to the unittests in this module
433     These functions are template functions, so they will not
434     generate any code unless compiled with -unittest.
435 
436 *******************************************************************************/
437 
438 
439 
440 /*******************************************************************************
441 
442     Appends the given list of values to the given distribution
443 
444     Template params:
445         T = the type used by the distribution
446 
447     Params:
448         dist = the distribution to append to
449         values = the values to put into the distribution
450 
451 *******************************************************************************/
452 
453 private void appendDist ( T ) ( Distribution!(T) dist, T[] values )
454 {
455     foreach ( val; values )
456     {
457         dist ~= val;
458     }
459 }
460 
461 
462 /*******************************************************************************
463 
464     Tests if an error was caught or not when calling the given delegate
465 
466     Template params:
467         dummy = dummy template parameter to avoid generating
468             code for this function if it is not used
469 
470     Params:
471         dg = a delegate that calls an error throwing function
472         error = whether an error should have been thrown or not
473         msg = the error message should the test fail
474 
475 *******************************************************************************/
476 
477 private void testForError ( bool dummy = false )
478                           ( scope void delegate ( ) dg, bool error, istring msg )
479 {
480     bool caught = false;
481 
482     try
483     {
484         dg();
485     }
486     catch ( Exception e )
487     {
488         caught = true;
489     }
490 
491     test(error == caught, "Error " ~ (!error ? "not " : "")  ~ "expected: " ~ msg);
492 }
493 
494 
495 /*******************************************************************************
496 
497     Runs a standard set of percentValue tests on the given distribution
498     Tests will be checked against the given expected middle value
499 
500     Template params:
501         T = the type used by the distribution
502 
503     Params:
504         values = the values to test a distribution of
505         middle_value = the expected middle value to check against
506 
507 *******************************************************************************/
508 
509 private void percentValueTests ( T ) ( T[] values, T middle_value )
510 {
511     auto dist = new Distribution!(T);
512 
513     // test that percentValue always returns 0 for empty distributions regardless of type
514     test!("==")(dist.percentValue(0.25), 0, "percentValue should always return 0 for an empty distribution");
515 
516     appendDist!(T)(dist, values);
517 
518     // test that exceeding the boundaries throws an error
519     testForError({ dist.percentValue(-1.0); }, true, "fraction < 0 is out of bounds");
520     testForError({ dist.percentValue(2.0); }, true, "fraction > 1 is out of bounds");
521 
522     // test middle value
523     test!("==")(dist.percentValue(0.5), middle_value);
524 }
525 
526 
527 /*******************************************************************************
528  *
529     Runs a standard set of mean tests on the given distribution
530     Tests will be checked against the given expected average value
531 
532     Template params:
533         T = the type used by the distribution
534 
535     Params:
536         values = the values to test a distribution of
537         average_value = the expected average value to check against
538 
539 *******************************************************************************/
540 
541 private void meanTests ( T ) ( T[] values, T average_value )
542 {
543     auto dist = new Distribution!(T);
544 
545     // test that mean always returns 0 for empty distributions regardless of type
546     test!("==")(dist.mean(), 0, "mean should always return 0 for an empty distribution");
547 
548     appendDist!(T)(dist, values);
549 
550     // test average value
551     test!("==")(dist.mean(), average_value, "mean returned the wrong average value");
552 }
553 
554 
555 /*******************************************************************************
556  *
557     Runs a standard set of median tests on the given distribution
558     Tests will be checked against the given expected median value
559 
560     Template params:
561         T = the type used by the distribution
562 
563     Params:
564         values = the values to test a distribution of
565         median_value = the expected median value to check against
566 
567 *******************************************************************************/
568 
569 private void medianTests ( T ) ( T[] values, double median_value )
570 {
571     auto dist = new Distribution!(T);
572 
573     // test that median always returns 0 for empty distributions regardless of type
574     test!("==")(dist.median(), 0, "median should always return 0 for an empty distribution");
575 
576     appendDist!(T)(dist, values);
577 
578     // test median value
579     test!("==")(dist.median(), median_value, "median returned the wrong median value");
580 }