1 /*******************************************************************************
2 3 Calculates the average using an accumulative technique (i.e, not all the
4 values are provided at once).
5 The struct doesn't store any previous values.
6 7 Copyright:
8 Copyright (c) 2009-2016 dunnhumby Germany GmbH.
9 All rights reserved.
10 11 License:
12 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details.
13 Alternatively, this file may be distributed under the terms of the Tango
14 3-Clause BSD License (see LICENSE_BSD.txt for details).
15 16 *******************************************************************************/17 18 moduleocean.math.IncrementalAverage;
19 20 importocean.core.Verify;
21 22 /*******************************************************************************
23 24 Calculates the average using an accumulative technique (i.e, not all the
25 values are provided at once).
26 The struct doesn't store any previous values.
27 28 *******************************************************************************/29 30 publicstructIncrementalAverage31 {
32 importocean.math.Math : sqrt;
33 34 /***************************************************************************
35 36 Counts the number of averages that has been previously performed.
37 This helps in giving the correct weight to a new number being added
38 to the average in comparison to the previous numbers.
39 40 ***************************************************************************/41 42 privateulongcount_ = 0;
43 44 45 /***************************************************************************
46 47 Holds the average value calculated so far.
48 49 ***************************************************************************/50 51 privatedoubleaverage_ = 0;
52 53 54 /***************************************************************************
55 56 Holds the mean of square of the added elements used by the variance()
57 and stdDeviation() methods.
58 59 ***************************************************************************/60 61 privatedoublemean_of_square = 0;
62 63 64 /***************************************************************************
65 66 Adds a new number (giving it an appropriate weight) to the average.
67 68 Note that if too many numbers were added (more than ulong.max) then the
69 the internal counter will overflow (and as a result the average value
70 would be corrupt).
71 72 Params:
73 value = the new value to add to the current average.
74 count = if that value represent in itself the average of other
75 numbers, then this param should define the number of elements
76 that this average stands for. A count = 0 has no effect on the
77 average and gets discarded.
78 79 ***************************************************************************/80 81 publicvoidaddToAverage (T)(Tvalue, ulongcount = 1)
82 {
83 if (count == 0)
84 return;
85 86 this.count_ += count;
87 88 autodelta = (value - this.average_) * count;
89 90 this.average_ += delta / this.count_;
91 92 this.mean_of_square += delta * (value - this.average_);
93 }
94 95 96 /***************************************************************************
97 98 Returns:
99 the average calculated so far.
100 101 ***************************************************************************/102 103 publicdoubleaverage ()
104 {
105 returnthis.average_;
106 }
107 108 109 /***************************************************************************
110 111 Returns:
112 the count of elements added.
113 114 ***************************************************************************/115 116 publiculongcount ()
117 {
118 returnthis.count_;
119 }
120 121 122 /***************************************************************************
123 124 Resets the average incremental instance.
125 126 ***************************************************************************/127 128 publicvoidclear ()
129 {
130 this.average_ = 0;
131 this.count_ = 0;
132 this.mean_of_square = 0;
133 }
134 135 /***************************************************************************
136 137 Computes the variance, a measure of the spread of a distribution to
138 quantify how far a set of data values is spread out.
139 140 Note that the Welford's algorithm implementation is used to compute
141 the spread of a distribution incrementally when a value is added
142 through addToAverage().
143 http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
144 #Online_algorithm
145 146 In standard statistical practice, a correction factor of 0 provides a
147 maximum likelihood estimate of the variance for normally distributed
148 variables, while a correction factor of 1 provides an unbiased
149 estimator of the variance of a hypothetical infinite population.
150 The correction factor will be subtracted from the number of elements
151 added (sample points) when working out the divisor.
152 For more information about correction factors that could be set please
153 see http://en.wikipedia.org/wiki/Standard_deviation#Estimation
154 155 Params:
156 correction_factor = correction factor to normalise the divisor used
157 in the calculation
158 159 Returns:
160 The variance of the set of data values added
161 162 ***************************************************************************/163 164 publicdoublevariance (doublecorrection_factor = 0)
165 {
166 if (this.count_ < 2)
167 return0;
168 169 verify(this.count_ > correction_factor, "Correction factor is same "170 ~ "size or bigger than the number of elements added.");
171 172 returnthis.mean_of_square / (this.count_ - correction_factor);
173 }
174 175 /***************************************************************************
176 177 Returns the standard deviation, a measure of the spread of a
178 distribution to quantify the amount of variation or dispersion of a
179 set of data values.
180 181 See notes on variance()
182 183 Params:
184 correction_factor = correction factor to normalise the divisor used
185 in the calculation
186 187 Returns:
188 The standard deviation of the set of data values added
189 190 ***************************************************************************/191 192 publicdoublestdDeviation (doublecorrection_factor = 0)
193 {
194 returnsqrt(this.variance(correction_factor));
195 }
196 }
197 198 199 version (unittest)
200 {
201 importocean.core.Test : NamedTest;
202 203 importocean.math.IEEE;
204 205 // Bit count to avoid deliberate loss of precision.206 // Ensures only 8 bits of precision could be lost.207 staticimmutablePRECISION = double.mant_dig - 8;
208 }
209 210 unittest211 {
212 autot = newNamedTest("Incremental Average - basic unit tests");
213 214 IncrementalAverageinc_avg;
215 216 t.test!("==")(inc_avg.count, 0);
217 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
218 219 inc_avg.addToAverage(1);
220 t.test!(">=")(feqrel(inc_avg.average(), 1.0), PRECISION);
221 222 inc_avg.clear();
223 t.test!("==")(inc_avg.count, 0);
224 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
225 226 inc_avg.addToAverage(10);
227 inc_avg.addToAverage(20);
228 t.test!("==")(inc_avg.count, 2);
229 t.test!(">=")(feqrel(inc_avg.average(), 15.0), PRECISION);
230 231 inc_avg.clear();
232 inc_avg.addToAverage(-10);
233 t.test!(">=")(feqrel(inc_avg.average(), -10.0), PRECISION);
234 inc_avg.addToAverage(-20);
235 t.test!(">=")(feqrel(inc_avg.average(), -15.0), PRECISION);
236 237 inc_avg.clear();
238 inc_avg.addToAverage(-10, uint.max);
239 inc_avg.addToAverage(10, uint.max);
240 t.test!("==")(inc_avg.count, 2UL * uint.max);
241 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION);
242 243 inc_avg.clear();
244 inc_avg.addToAverage(long.max);
245 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max), PRECISION);
246 inc_avg.addToAverage(cast(ulong)long.max + 10);
247 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max) + 5,
248 PRECISION);
249 250 inc_avg.clear();
251 inc_avg.addToAverage(long.max / 2.0);
252 inc_avg.addToAverage(long.max * 1.25);
253 // (0.5 + 1.25) / 2 = 0.875254 t.test!(">=")(feqrel(inc_avg.average(), long.max * 0.875), PRECISION);
255 256 inc_avg.clear();
257 inc_avg.addToAverage(long.min);
258 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min), PRECISION);
259 inc_avg.addToAverage(cast(double)long.min - 10);
260 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min - 5),
261 PRECISION);
262 263 inc_avg.clear();
264 staticimmutableADD = ulong.max/1_000_000;
265 for (ulongi = 0; i < ulong.max;)
266 {
267 inc_avg.addToAverage(i%2); // 1 or 0268 269 if (ADD < (ulong.max - i))
270 i += ADD;
271 else272 i++;
273 }
274 inc_avg.addToAverage(1); // One more add is missing275 t.test!(">=")(feqrel(inc_avg.average(), 0.5), PRECISION);
276 }
277 278 unittest279 {
280 autot = newNamedTest("Initialise to zero");
281 282 IncrementalAveragedistribution;
283 t.test!("==")(distribution.count(), 0);
284 t.test!(">=")(feqrel(distribution.average(), 0.0), PRECISION);
285 t.test!(">=")(feqrel(distribution.stdDeviation(), 0.0), PRECISION);
286 t.test!(">=")(feqrel(distribution.variance(), 0.0), PRECISION);
287 }
288 289 unittest290 {
291 autot = newNamedTest("Mean (average)");
292 293 IncrementalAveragedistribution;
294 295 // test with signed int296 distribution.clear();
297 int[] int_values = [-2, -3, -4, -5, -6];
298 foreach (value; int_values)
299 {
300 distribution.addToAverage(value);
301 }
302 t.test!(">=")(feqrel(distribution.average(), -4.0), PRECISION);
303 304 // test with unsigned long305 distribution.clear();
306 ulong[] ulong_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
307 foreach (value; ulong_values)
308 {
309 distribution.addToAverage(value);
310 }
311 t.test!(">=")(feqrel(distribution.average(), 4.5), PRECISION);
312 313 // test with double314 distribution.clear();
315 double[] double_values = [2.4, 5.0, 7.6];
316 foreach (value; double_values)
317 {
318 distribution.addToAverage(value);
319 }
320 t.test!(">=")(feqrel(distribution.average(), 5.0), PRECISION);
321 }
322 323 unittest324 {
325 autot = newNamedTest("Standard Deviation & Variance");
326 327 IncrementalAveragedistribution;
328 329 int[] values = [1, 2, 3, 4];
330 distribution.clear();
331 foreach (value; values)
332 {
333 distribution.addToAverage(value);
334 }
335 336 t.test!(">=")(feqrel(distribution.average(), 2.5), PRECISION);
337 338 t.test!(">=")(feqrel(distribution.stdDeviation(0), 1.1180339887498949),
339 PRECISION);
340 t.test!(">=")(feqrel(distribution.stdDeviation(1), 1.2909944487358056),
341 PRECISION);
342 343 t.test!(">=")(feqrel(distribution.variance(0), 1.25), PRECISION);
344 t.test!(">=")(feqrel(distribution.variance(1), 1.66666666666666666),
345 PRECISION);
346 347 values = [10, 20, 30, 40, 50, 60];
348 distribution.clear();
349 foreach (value; values)
350 {
351 distribution.addToAverage(value);
352 }
353 354 t.test!(">=")(feqrel(distribution.average(), 35.0), PRECISION);
355 356 t.test!(">=")(feqrel(distribution.stdDeviation(0), 17.0782512765993317),
357 PRECISION);
358 t.test!(">=")(feqrel(distribution.stdDeviation(1), 18.708286933869708),
359 PRECISION);
360 361 t.test!(">=")(feqrel(distribution.variance(0), 291.6666666666666),
362 PRECISION);
363 t.test!(">=")(feqrel(distribution.variance(1), 350.0), PRECISION);
364 }
365 366 unittest367 {
368 autot = newNamedTest("Multiple insertion precision");
369 370 staticimmutablevalue_1 = 3;
371 staticimmutablevalue_2 = 7;
372 staticimmutablevalue_3 = 11;
373 staticimmutablefreq = 1001;
374 375 IncrementalAveragesingle, multiple;
376 377 multiple.addToAverage(value_1);
378 single.addToAverage(value_1);
379 380 multiple.addToAverage(value_2, freq);
381 for (autoi = 0; i < freq; ++i)
382 {
383 single.addToAverage(value_2);
384 }
385 386 multiple.addToAverage(value_3, freq);
387 for (autoi = 0; i < freq; ++i)
388 {
389 single.addToAverage(value_3);
390 }
391 392 t.test!(">=")(feqrel(single.average(), multiple.average()), PRECISION);
393 t.test!(">=")(feqrel(single.stdDeviation(0), multiple.stdDeviation(0)),
394 PRECISION);
395 t.test!(">=")(feqrel(single.stdDeviation(1), multiple.stdDeviation(1)),
396 PRECISION);
397 t.test!(">=")(feqrel(single.variance(0), multiple.variance(0)), PRECISION);
398 t.test!(">=")(feqrel(single.variance(1), multiple.variance(1)), PRECISION);
399 }
400 401 unittest402 {
403 autot = newNamedTest("Add only one element");
404 405 IncrementalAveragedistribution;
406 407 staticimmutablevalue = 8.0;
408 409 distribution.addToAverage(value);
410 411 t.test!("==")(distribution.average(), value);
412 413 t.test!("==")(distribution.stdDeviation(0), 0.0);
414 t.test!("==")(distribution.stdDeviation(1), 0.0);
415 416 t.test!("==")(distribution.variance(0), 0.0);
417 t.test!("==")(distribution.variance(1), 0.0);
418 }