1 /*******************************************************************************
2 
3     Calculates the average using an accumulative technique (i.e, not all the
4     values are provided at once).
5     The struct doesn't store any previous values.
6 
7     Copyright:
8         Copyright (c) 2009-2016 dunnhumby Germany GmbH.
9         All rights reserved.
10 
11     License:
12         Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
13         Alternatively, this file may be distributed under the terms of the Tango
14         3-Clause BSD License (see LICENSE_BSD.txt for details).
15 
16 *******************************************************************************/
17 
18 module ocean.math.IncrementalAverage;
19 
20 import ocean.core.Verify;
21 
22 /*******************************************************************************
23 
24     Calculates the average using an accumulative technique (i.e, not all the
25     values are provided at once).
26     The struct doesn't store any previous values.
27 
28 *******************************************************************************/
29 
30 public struct IncrementalAverage
31 {
32     import ocean.math.Math : sqrt;
33 
34     /***************************************************************************
35 
36         Counts the number of averages that has been previously performed.
37         This helps in giving the correct weight to a new number being added
38         to the average in comparison to the previous numbers.
39 
40     ***************************************************************************/
41 
42     private ulong count_ = 0;
43 
44 
45     /***************************************************************************
46 
47         Holds the average value calculated so far.
48 
49     ***************************************************************************/
50 
51     private double average_ = 0;
52 
53 
54     /***************************************************************************
55 
56         Holds the mean of square of the added elements used by the variance()
57         and stdDeviation() methods.
58 
59     ***************************************************************************/
60 
61     private double mean_of_square = 0;
62 
63 
64     /***************************************************************************
65 
66         Adds a new number (giving it an appropriate weight) to the average.
67 
68         Note that if too many numbers were added (more than ulong.max) then the
69         the internal counter will overflow (and as a result the average value
70         would be corrupt).
71 
72         Params:
73             value = the new value to add to the current average.
74             count = if that value represent in itself the average of other
75                 numbers, then this param should define the number of elements
76                 that this average stands for. A count = 0 has no effect on the
77                 average and gets discarded.
78 
79     ***************************************************************************/
80 
81     public void addToAverage (T)(T value, ulong count = 1)
82     {
83         if (count == 0)
84             return;
85 
86         this.count_ += count;
87 
88         auto delta = (value - this.average_) * count;
89 
90         this.average_ += delta / this.count_;
91 
92         this.mean_of_square += delta * (value - this.average_);
93     }
94 
95 
96     /***************************************************************************
97 
98         Returns:
99             the average calculated so far.
100 
101     ***************************************************************************/
102 
103     public double average ()
104     {
105         return this.average_;
106     }
107 
108 
109     /***************************************************************************
110 
111         Returns:
112             the count of elements added.
113 
114     ***************************************************************************/
115 
116     public ulong count ()
117     {
118         return this.count_;
119     }
120 
121 
122     /***************************************************************************
123 
124         Resets the average incremental instance.
125 
126     ***************************************************************************/
127 
128     public void clear ()
129     {
130         this.average_ = 0;
131         this.count_ = 0;
132         this.mean_of_square = 0;
133     }
134 
135     /***************************************************************************
136 
137         Computes the variance, a measure of the spread of a distribution to
138         quantify how far a set of data values is spread out.
139 
140         Note that the Welford's algorithm implementation is used to compute
141         the spread of a distribution incrementally when a value is added
142         through addToAverage().
143         http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
144         #Online_algorithm
145 
146         In standard statistical practice, a correction factor of 0 provides a
147         maximum likelihood estimate of the variance for normally distributed
148         variables, while a correction factor of 1 provides an unbiased
149         estimator of the variance of a hypothetical infinite population.
150         The correction factor will be subtracted from the number of elements
151         added (sample points) when working out the divisor.
152         For more information about correction factors that could be set please
153         see http://en.wikipedia.org/wiki/Standard_deviation#Estimation
154 
155         Params:
156             correction_factor = correction factor to normalise the divisor used
157                                 in the calculation
158 
159         Returns:
160             The variance of the set of data values added
161 
162     ***************************************************************************/
163 
164     public double variance (double correction_factor = 0)
165     {
166         if (this.count_ < 2)
167             return 0;
168 
169         verify(this.count_ > correction_factor, "Correction factor is same "
170                ~ "size or bigger than the number of elements added.");
171 
172         return this.mean_of_square / (this.count_ - correction_factor);
173     }
174 
175     /***************************************************************************
176 
177         Returns the standard deviation, a measure of the spread of a
178         distribution to quantify the amount of variation or dispersion of a
179         set of data values.
180 
181         See notes on variance()
182 
183         Params:
184             correction_factor = correction factor to normalise the divisor used
185                                 in the calculation
186 
187         Returns:
188             The standard deviation of the set of data values added
189 
190     ***************************************************************************/
191 
192     public double stdDeviation (double correction_factor = 0)
193     {
194         return sqrt(this.variance(correction_factor));
195     }
196 }
197 
198 
199 version (unittest)
200 {
201     import ocean.core.Test : NamedTest;
202 
203     import ocean.math.IEEE;
204 
205     // Bit count to avoid deliberate loss of precision.
206     // Ensures only 8 bits of precision could be lost.
207     static immutable PRECISION = double.mant_dig - 8;
208 }
209 
210 unittest
211 {
212     auto t = new NamedTest("Incremental Average - basic unit tests");
213 
214     IncrementalAverage inc_avg;
215 
216     t.test!("==")(inc_avg.count, 0);
217     t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
218 
219     inc_avg.addToAverage(1);
220     t.test!(">=")(feqrel(inc_avg.average(), 1.0), PRECISION);
221 
222     inc_avg.clear();
223     t.test!("==")(inc_avg.count, 0);
224     t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
225 
226     inc_avg.addToAverage(10);
227     inc_avg.addToAverage(20);
228     t.test!("==")(inc_avg.count, 2);
229     t.test!(">=")(feqrel(inc_avg.average(), 15.0), PRECISION);
230 
231     inc_avg.clear();
232     inc_avg.addToAverage(-10);
233     t.test!(">=")(feqrel(inc_avg.average(), -10.0), PRECISION);
234     inc_avg.addToAverage(-20);
235     t.test!(">=")(feqrel(inc_avg.average(), -15.0), PRECISION);
236 
237     inc_avg.clear();
238     inc_avg.addToAverage(-10, uint.max);
239     inc_avg.addToAverage(10, uint.max);
240     t.test!("==")(inc_avg.count, 2UL * uint.max);
241     t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
242 
243     inc_avg.clear();
244     inc_avg.addToAverage(long.max);
245     t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max), PRECISION);
246     inc_avg.addToAverage(cast(ulong)long.max + 10);
247     t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max) + 5,
248                   PRECISION);
249 
250     inc_avg.clear();
251     inc_avg.addToAverage(long.max / 2.0);
252     inc_avg.addToAverage(long.max * 1.25);
253     // (0.5 + 1.25) / 2 = 0.875
254     t.test!(">=")(feqrel(inc_avg.average(), long.max * 0.875), PRECISION);
255 
256     inc_avg.clear();
257     inc_avg.addToAverage(long.min);
258     t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min), PRECISION);
259     inc_avg.addToAverage(cast(double)long.min - 10);
260     t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min - 5),
261                   PRECISION);
262 
263     inc_avg.clear();
264     static immutable ADD = ulong.max/1_000_000;
265     for (ulong i = 0; i < ulong.max;)
266     {
267         inc_avg.addToAverage(i%2); // 1 or 0
268 
269         if (ADD < (ulong.max - i))
270             i += ADD;
271         else
272             i++;
273     }
274     inc_avg.addToAverage(1); // One more add is missing
275     t.test!(">=")(feqrel(inc_avg.average(), 0.5), PRECISION);
276 }
277 
278 unittest
279 {
280     auto t = new NamedTest("Initialise to zero");
281 
282     IncrementalAverage distribution;
283     t.test!("==")(distribution.count(), 0);
284     t.test!(">=")(feqrel(distribution.average(), 0.0), PRECISION);
285     t.test!(">=")(feqrel(distribution.stdDeviation(), 0.0), PRECISION);
286     t.test!(">=")(feqrel(distribution.variance(), 0.0), PRECISION);
287 }
288 
289 unittest
290 {
291     auto t = new NamedTest("Mean (average)");
292 
293     IncrementalAverage distribution;
294 
295     // test with signed int
296     distribution.clear();
297     int[] int_values = [-2, -3, -4, -5, -6];
298     foreach (value; int_values)
299     {
300         distribution.addToAverage(value);
301     }
302     t.test!(">=")(feqrel(distribution.average(), -4.0), PRECISION);
303 
304     // test with unsigned long
305     distribution.clear();
306     ulong[] ulong_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
307     foreach (value; ulong_values)
308     {
309         distribution.addToAverage(value);
310     }
311     t.test!(">=")(feqrel(distribution.average(), 4.5), PRECISION);
312 
313     // test with double
314     distribution.clear();
315     double[] double_values = [2.4, 5.0, 7.6];
316     foreach (value; double_values)
317     {
318         distribution.addToAverage(value);
319     }
320     t.test!(">=")(feqrel(distribution.average(), 5.0), PRECISION);
321 }
322 
323 unittest
324 {
325     auto t = new NamedTest("Standard Deviation & Variance");
326 
327     IncrementalAverage distribution;
328 
329     int[] values = [1, 2, 3, 4];
330     distribution.clear();
331     foreach (value; values)
332     {
333         distribution.addToAverage(value);
334     }
335 
336     t.test!(">=")(feqrel(distribution.average(), 2.5), PRECISION);
337 
338     t.test!(">=")(feqrel(distribution.stdDeviation(0), 1.1180339887498949),
339                   PRECISION);
340     t.test!(">=")(feqrel(distribution.stdDeviation(1), 1.2909944487358056),
341                   PRECISION);
342 
343     t.test!(">=")(feqrel(distribution.variance(0), 1.25), PRECISION);
344     t.test!(">=")(feqrel(distribution.variance(1), 1.66666666666666666),
345                   PRECISION);
346 
347     values = [10, 20, 30, 40, 50, 60];
348     distribution.clear();
349     foreach (value; values)
350     {
351         distribution.addToAverage(value);
352     }
353 
354     t.test!(">=")(feqrel(distribution.average(), 35.0), PRECISION);
355 
356     t.test!(">=")(feqrel(distribution.stdDeviation(0), 17.0782512765993317),
357                   PRECISION);
358     t.test!(">=")(feqrel(distribution.stdDeviation(1), 18.708286933869708),
359                   PRECISION);
360 
361     t.test!(">=")(feqrel(distribution.variance(0), 291.6666666666666),
362                   PRECISION);
363     t.test!(">=")(feqrel(distribution.variance(1), 350.0), PRECISION);
364 }
365 
366 unittest
367 {
368     auto t = new NamedTest("Multiple insertion precision");
369 
370     static immutable value_1 = 3;
371     static immutable value_2 = 7;
372     static immutable value_3 = 11;
373     static immutable freq = 1001;
374 
375     IncrementalAverage single, multiple;
376 
377     multiple.addToAverage(value_1);
378     single.addToAverage(value_1);
379 
380     multiple.addToAverage(value_2, freq);
381     for (auto i = 0; i < freq; ++i)
382     {
383         single.addToAverage(value_2);
384     }
385 
386     multiple.addToAverage(value_3, freq);
387     for (auto i = 0; i < freq; ++i)
388     {
389         single.addToAverage(value_3);
390     }
391 
392     t.test!(">=")(feqrel(single.average(), multiple.average()), PRECISION);
393     t.test!(">=")(feqrel(single.stdDeviation(0), multiple.stdDeviation(0)),
394                   PRECISION);
395     t.test!(">=")(feqrel(single.stdDeviation(1), multiple.stdDeviation(1)),
396                   PRECISION);
397     t.test!(">=")(feqrel(single.variance(0), multiple.variance(0)), PRECISION);
398     t.test!(">=")(feqrel(single.variance(1), multiple.variance(1)), PRECISION);
399 }
400 
401 unittest
402 {
403     auto t = new NamedTest("Add only one element");
404 
405     IncrementalAverage distribution;
406 
407     static immutable value = 8.0;
408 
409     distribution.addToAverage(value);
410 
411     t.test!("==")(distribution.average(), value);
412 
413     t.test!("==")(distribution.stdDeviation(0), 0.0);
414     t.test!("==")(distribution.stdDeviation(1), 0.0);
415 
416     t.test!("==")(distribution.variance(0), 0.0);
417     t.test!("==")(distribution.variance(1), 0.0);
418 }