1 /*******************************************************************************
2 
3     Calculates the average using an accumulative technique (i.e, not all the
4     values are provided at once).
5     The struct doesn't store any previous values.
6 
7     Copyright:
8         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
9         All rights reserved.
10 
11     License:
12         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
13         Alternatively, this file may be distributed under the terms of the Tango
14         3-Clause BSD License (see LICENSE_BSD.txt for details).
15 
16 *******************************************************************************/
17 
18 module ocean.math.IncrementalAverage;
19 
20 import ocean.core.Verify;
21 
22 /*******************************************************************************
23 
24     Calculates the average using an accumulative technique (i.e, not all the
25     values are provided at once).
26     The struct doesn't store any previous values.
27 
28 *******************************************************************************/
29 
30 public struct IncrementalAverage
31 {
32     import ocean.math.Math : sqrt;
33 
34     /***************************************************************************
35 
36         Counts the number of averages that has been previously performed.
37         This helps in giving the correct weight to a new number being added
38         to the average in comparison to the previous numbers.
39 
40     ***************************************************************************/
41 
42     private ulong count_ = 0;
43 
44 
45     /***************************************************************************
46 
47         Holds the average value calculated so far.
48 
49     ***************************************************************************/
50 
51     private double average_ = 0;
52 
53 
54     /***************************************************************************
55 
56         Holds the mean of square of the added elements used by the variance()
57         and stdDeviation() methods.
58 
59     ***************************************************************************/
60 
61     private double mean_of_square = 0;
62 
63 
64     /***************************************************************************
65 
66         Adds a new number (giving it an appropriate weight) to the average.
67 
68         Note that if too many numbers were added (more than ulong.max) then the
69         the internal counter will overflow (and as a result the average value
70         would be corrupt).
71 
72         Params:
73             value = the new value to add to the current average.
74             count = if that value represent in itself the average of other
75                 numbers, then this param should define the number of elements
76                 that this average stands for. A count = 0 has no effect on the
77                 average and gets discarded.
78 
79     ***************************************************************************/
80 
81     public void addToAverage (T)(T value, ulong count = 1)
82     {
83         if (count == 0)
84             return;
85 
86         (&this).count_ += count;
87 
88         auto delta = (value - (&this).average_) * count;
89 
90         (&this).average_ += delta / (&this).count_;
91 
92         (&this).mean_of_square += delta * (value - (&this).average_);
93     }
94 
95 
96     /***************************************************************************
97 
98         Returns:
99             the average calculated so far.
100 
101     ***************************************************************************/
102 
103     public double average ()
104     {
105         return (&this).average_;
106     }
107 
108 
109     /***************************************************************************
110 
111         Returns:
112             the count of elements added.
113 
114     ***************************************************************************/
115 
116     public ulong count ()
117     {
118         return (&this).count_;
119     }
120 
121 
122     /***************************************************************************
123 
124         Resets the average incremental instance.
125 
126     ***************************************************************************/
127 
128     public void clear ()
129     {
130         (&this).average_ = 0;
131         (&this).count_ = 0;
132         (&this).mean_of_square = 0;
133     }
134 
135     /***************************************************************************
136 
137         Computes the variance, a measure of the spread of a distribution to
138         quantify how far a set of data values is spread out.
139 
140         Note that the Welford's algorithm implementation is used to compute
141         the spread of a distribution incrementally when a value is added
142         through addToAverage().
143         http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
144         #Online_algorithm
145 
146         In standard statistical practice, a correction factor of 0 provides a
147         maximum likelihood estimate of the variance for normally distributed
148         variables, while a correction factor of 1 provides an unbiased
149         estimator of the variance of a hypothetical infinite population.
150         The correction factor will be subtracted from the number of elements
151         added (sample points) when working out the divisor.
152         For more information about correction factors that could be set please
153         see http://en.wikipedia.org/wiki/Standard_deviation#Estimation
154 
155         Params:
156             correction_factor = correction factor to normalise the divisor used
157                                 in the calculation
158 
159         Returns:
160             The variance of the set of data values added
161 
162     ***************************************************************************/
163 
164     public double variance (double correction_factor = 0)
165     {
166         if ((&this).count_ < 2)
167             return 0;
168 
169         verify((&this).count_ > correction_factor, "Correction factor is same "
170                ~ "size or bigger than the number of elements added.");
171 
172         return (&this).mean_of_square / ((&this).count_ - correction_factor);
173     }
174 
175     /***************************************************************************
176 
177         Returns the standard deviation, a measure of the spread of a
178         distribution to quantify the amount of variation or dispersion of a
179         set of data values.
180 
181         See notes on variance()
182 
183         Params:
184             correction_factor = correction factor to normalise the divisor used
185                                 in the calculation
186 
187         Returns:
188             The standard deviation of the set of data values added
189 
190     ***************************************************************************/
191 
192     public double stdDeviation (double correction_factor = 0)
193     {
194         return sqrt((&this).variance(correction_factor));
195     }
196 }
197 
198 
199 version (UnitTest)
200 {
201     import ocean.core.Test : NamedTest;
202 
203     import ocean.math.IEEE;
204 
205     // Bit count to avoid deliberate loss of precision.
206     // Ensures only 8 bits of precision could be lost.
207     static immutable PRECISION = double.mant_dig - 8;
208 }
209 
210 unittest
211 {
212     auto t = new NamedTest("Incremental Average - basic unit tests");
213 
214 	IncrementalAverage inc_avg;
215 
216     t.test!("==")(inc_avg.count, 0);
217     t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
218 
219 	inc_avg.addToAverage(1);
220     t.test!(">=")(feqrel(inc_avg.average(), 1.0), PRECISION);
221 
222 	inc_avg.clear();
223 	t.test!("==")(inc_avg.count, 0);
224     t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
225 
226 	inc_avg.addToAverage(10);
227 	inc_avg.addToAverage(20);
228     t.test!("==")(inc_avg.count, 2);
229     t.test!(">=")(feqrel(inc_avg.average(), 15.0), PRECISION);
230 
231 	inc_avg.clear();
232 	inc_avg.addToAverage(-10);
233     t.test!(">=")(feqrel(inc_avg.average(), -10.0), PRECISION);
234 	inc_avg.addToAverage(-20);
235     t.test!(">=")(feqrel(inc_avg.average(), -15.0), PRECISION);
236 
237 	inc_avg.clear();
238 	inc_avg.addToAverage(-10, uint.max);
239 	inc_avg.addToAverage(10, uint.max);
240     t.test!("==")(inc_avg.count, 2UL * uint.max);
241     t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
242 
243 	inc_avg.clear();
244 	inc_avg.addToAverage(long.max);
245     t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max), PRECISION);
246 	inc_avg.addToAverage(cast(ulong)long.max + 10);
247     t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max) + 5,
248                   PRECISION);
249 
250 	inc_avg.clear();
251 	inc_avg.addToAverage(long.max / 2.0);
252 	inc_avg.addToAverage(long.max * 1.25);
253     // (0.5 + 1.25) / 2 = 0.875
254     t.test!(">=")(feqrel(inc_avg.average(), long.max * 0.875), PRECISION);
255 
256 	inc_avg.clear();
257 	inc_avg.addToAverage(long.min);
258     t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min), PRECISION);
259 	inc_avg.addToAverage(cast(double)long.min - 10);
260     t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min - 5),
261                   PRECISION);
262 
263 	inc_avg.clear();
264 	static immutable ADD = ulong.max/1_000_000;
265 	for (ulong i = 0; i < ulong.max; i += (ADD < ulong.max - i ? ADD : 1))
266 		inc_avg.addToAverage(i%2); // 1 or 0
267 	inc_avg.addToAverage(1); // One more add is missing
268     t.test!(">=")(feqrel(inc_avg.average(), 0.5), PRECISION);
269 }
270 
271 unittest
272 {
273     auto t = new NamedTest("Initialise to zero");
274 
275     IncrementalAverage distribution;
276     t.test!("==")(distribution.count(), 0);
277     t.test!(">=")(feqrel(distribution.average(), 0.0), PRECISION);
278     t.test!(">=")(feqrel(distribution.stdDeviation(), 0.0), PRECISION);
279     t.test!(">=")(feqrel(distribution.variance(), 0.0), PRECISION);
280 }
281 
282 unittest
283 {
284     auto t = new NamedTest("Mean (average)");
285 
286     IncrementalAverage distribution;
287 
288     // test with signed int
289     distribution.clear();
290     int[] int_values = [-2, -3, -4, -5, -6];
291     foreach (value; int_values)
292     {
293         distribution.addToAverage(value);
294     }
295     t.test!(">=")(feqrel(distribution.average(), -4.0), PRECISION);
296 
297     // test with unsigned long
298     distribution.clear();
299     ulong[] ulong_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
300     foreach (value; ulong_values)
301     {
302         distribution.addToAverage(value);
303     }
304     t.test!(">=")(feqrel(distribution.average(), 4.5), PRECISION);
305 
306     // test with double
307     distribution.clear();
308     double[] double_values = [2.4, 5.0, 7.6];
309     foreach (value; double_values)
310     {
311         distribution.addToAverage(value);
312     }
313     t.test!(">=")(feqrel(distribution.average(), 5.0), PRECISION);
314 }
315 
316 unittest
317 {
318     auto t = new NamedTest("Standard Deviation & Variance");
319 
320     IncrementalAverage distribution;
321 
322     int[] values = [1, 2, 3, 4];
323     distribution.clear();
324     foreach (value; values)
325     {
326         distribution.addToAverage(value);
327     }
328 
329     t.test!(">=")(feqrel(distribution.average(), 2.5), PRECISION);
330 
331     t.test!(">=")(feqrel(distribution.stdDeviation(0), 1.1180339887498949),
332                   PRECISION);
333     t.test!(">=")(feqrel(distribution.stdDeviation(1), 1.2909944487358056),
334                   PRECISION);
335 
336     t.test!(">=")(feqrel(distribution.variance(0), 1.25), PRECISION);
337     t.test!(">=")(feqrel(distribution.variance(1), 1.66666666666666666),
338                   PRECISION);
339 
340     values = [10, 20, 30, 40, 50, 60];
341     distribution.clear();
342     foreach (value; values)
343     {
344         distribution.addToAverage(value);
345     }
346 
347     t.test!(">=")(feqrel(distribution.average(), 35.0), PRECISION);
348 
349     t.test!(">=")(feqrel(distribution.stdDeviation(0), 17.0782512765993317),
350                   PRECISION);
351     t.test!(">=")(feqrel(distribution.stdDeviation(1), 18.708286933869708),
352                   PRECISION);
353 
354     t.test!(">=")(feqrel(distribution.variance(0), 291.6666666666666),
355                   PRECISION);
356     t.test!(">=")(feqrel(distribution.variance(1), 350.0), PRECISION);
357 }
358 
359 unittest
360 {
361     auto t = new NamedTest("Multiple insertion precision");
362 
363     static immutable value_1 = 3;
364     static immutable value_2 = 7;
365     static immutable value_3 = 11;
366     static immutable freq = 1001;
367 
368     IncrementalAverage single, multiple;
369 
370     multiple.addToAverage(value_1);
371     single.addToAverage(value_1);
372 
373     multiple.addToAverage(value_2, freq);
374     for (auto i = 0; i < freq; ++i)
375     {
376         single.addToAverage(value_2);
377     }
378 
379     multiple.addToAverage(value_3, freq);
380     for (auto i = 0; i < freq; ++i)
381     {
382         single.addToAverage(value_3);
383     }
384 
385     t.test!(">=")(feqrel(single.average(), multiple.average()), PRECISION);
386     t.test!(">=")(feqrel(single.stdDeviation(0), multiple.stdDeviation(0)),
387                   PRECISION);
388     t.test!(">=")(feqrel(single.stdDeviation(1), multiple.stdDeviation(1)),
389                   PRECISION);
390     t.test!(">=")(feqrel(single.variance(0), multiple.variance(0)), PRECISION);
391     t.test!(">=")(feqrel(single.variance(1), multiple.variance(1)), PRECISION);
392 }
393 
394 unittest
395 {
396     auto t = new NamedTest("Add only one element");
397 
398     IncrementalAverage distribution;
399 
400     static immutable value = 8.0;
401 
402     distribution.addToAverage(value);
403 
404     t.test!("==")(distribution.average(), value);
405 
406     t.test!("==")(distribution.stdDeviation(0), 0.0);
407     t.test!("==")(distribution.stdDeviation(1), 0.0);
408 
409     t.test!("==")(distribution.variance(0), 0.0);
410     t.test!("==")(distribution.variance(1), 0.0);
411 }