1 /******************************************************************************* 2 3 Calculates the average using an accumulative technique (i.e, not all the 4 values are provided at once). 5 The struct doesn't store any previous values. 6 7 Copyright: 8 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 9 All rights reserved. 10 11 License: 12 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 13 Alternatively, this file may be distributed under the terms of the Tango 14 3-Clause BSD License (see LICENSE_BSD.txt for details). 15 16 *******************************************************************************/ 17 18 module ocean.math.IncrementalAverage; 19 20 import ocean.core.Verify; 21 22 /******************************************************************************* 23 24 Calculates the average using an accumulative technique (i.e, not all the 25 values are provided at once). 26 The struct doesn't store any previous values. 27 28 *******************************************************************************/ 29 30 public struct IncrementalAverage 31 { 32 import ocean.math.Math : sqrt; 33 34 /*************************************************************************** 35 36 Counts the number of averages that has been previously performed. 37 This helps in giving the correct weight to a new number being added 38 to the average in comparison to the previous numbers. 39 40 ***************************************************************************/ 41 42 private ulong count_ = 0; 43 44 45 /*************************************************************************** 46 47 Holds the average value calculated so far. 48 49 ***************************************************************************/ 50 51 private double average_ = 0; 52 53 54 /*************************************************************************** 55 56 Holds the mean of square of the added elements used by the variance() 57 and stdDeviation() methods. 58 59 ***************************************************************************/ 60 61 private double mean_of_square = 0; 62 63 64 /*************************************************************************** 65 66 Adds a new number (giving it an appropriate weight) to the average. 67 68 Note that if too many numbers were added (more than ulong.max) then the 69 the internal counter will overflow (and as a result the average value 70 would be corrupt). 71 72 Params: 73 value = the new value to add to the current average. 74 count = if that value represent in itself the average of other 75 numbers, then this param should define the number of elements 76 that this average stands for. A count = 0 has no effect on the 77 average and gets discarded. 78 79 ***************************************************************************/ 80 81 public void addToAverage (T)(T value, ulong count = 1) 82 { 83 if (count == 0) 84 return; 85 86 (&this).count_ += count; 87 88 auto delta = (value - (&this).average_) * count; 89 90 (&this).average_ += delta / (&this).count_; 91 92 (&this).mean_of_square += delta * (value - (&this).average_); 93 } 94 95 96 /*************************************************************************** 97 98 Returns: 99 the average calculated so far. 100 101 ***************************************************************************/ 102 103 public double average () 104 { 105 return (&this).average_; 106 } 107 108 109 /*************************************************************************** 110 111 Returns: 112 the count of elements added. 113 114 ***************************************************************************/ 115 116 public ulong count () 117 { 118 return (&this).count_; 119 } 120 121 122 /*************************************************************************** 123 124 Resets the average incremental instance. 125 126 ***************************************************************************/ 127 128 public void clear () 129 { 130 (&this).average_ = 0; 131 (&this).count_ = 0; 132 (&this).mean_of_square = 0; 133 } 134 135 /*************************************************************************** 136 137 Computes the variance, a measure of the spread of a distribution to 138 quantify how far a set of data values is spread out. 139 140 Note that the Welford's algorithm implementation is used to compute 141 the spread of a distribution incrementally when a value is added 142 through addToAverage(). 143 http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 144 #Online_algorithm 145 146 In standard statistical practice, a correction factor of 0 provides a 147 maximum likelihood estimate of the variance for normally distributed 148 variables, while a correction factor of 1 provides an unbiased 149 estimator of the variance of a hypothetical infinite population. 150 The correction factor will be subtracted from the number of elements 151 added (sample points) when working out the divisor. 152 For more information about correction factors that could be set please 153 see http://en.wikipedia.org/wiki/Standard_deviation#Estimation 154 155 Params: 156 correction_factor = correction factor to normalise the divisor used 157 in the calculation 158 159 Returns: 160 The variance of the set of data values added 161 162 ***************************************************************************/ 163 164 public double variance (double correction_factor = 0) 165 { 166 if ((&this).count_ < 2) 167 return 0; 168 169 verify((&this).count_ > correction_factor, "Correction factor is same " 170 ~ "size or bigger than the number of elements added."); 171 172 return (&this).mean_of_square / ((&this).count_ - correction_factor); 173 } 174 175 /*************************************************************************** 176 177 Returns the standard deviation, a measure of the spread of a 178 distribution to quantify the amount of variation or dispersion of a 179 set of data values. 180 181 See notes on variance() 182 183 Params: 184 correction_factor = correction factor to normalise the divisor used 185 in the calculation 186 187 Returns: 188 The standard deviation of the set of data values added 189 190 ***************************************************************************/ 191 192 public double stdDeviation (double correction_factor = 0) 193 { 194 return sqrt((&this).variance(correction_factor)); 195 } 196 } 197 198 199 version (UnitTest) 200 { 201 import ocean.core.Test : NamedTest; 202 203 import ocean.math.IEEE; 204 205 // Bit count to avoid deliberate loss of precision. 206 // Ensures only 8 bits of precision could be lost. 207 static immutable PRECISION = double.mant_dig - 8; 208 } 209 210 unittest 211 { 212 auto t = new NamedTest("Incremental Average - basic unit tests"); 213 214 IncrementalAverage inc_avg; 215 216 t.test!("==")(inc_avg.count, 0); 217 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION); 218 219 inc_avg.addToAverage(1); 220 t.test!(">=")(feqrel(inc_avg.average(), 1.0), PRECISION); 221 222 inc_avg.clear(); 223 t.test!("==")(inc_avg.count, 0); 224 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION); 225 226 inc_avg.addToAverage(10); 227 inc_avg.addToAverage(20); 228 t.test!("==")(inc_avg.count, 2); 229 t.test!(">=")(feqrel(inc_avg.average(), 15.0), PRECISION); 230 231 inc_avg.clear(); 232 inc_avg.addToAverage(-10); 233 t.test!(">=")(feqrel(inc_avg.average(), -10.0), PRECISION); 234 inc_avg.addToAverage(-20); 235 t.test!(">=")(feqrel(inc_avg.average(), -15.0), PRECISION); 236 237 inc_avg.clear(); 238 inc_avg.addToAverage(-10, uint.max); 239 inc_avg.addToAverage(10, uint.max); 240 t.test!("==")(inc_avg.count, 2UL * uint.max); 241 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION); 242 243 inc_avg.clear(); 244 inc_avg.addToAverage(long.max); 245 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max), PRECISION); 246 inc_avg.addToAverage(cast(ulong)long.max + 10); 247 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max) + 5, 248 PRECISION); 249 250 inc_avg.clear(); 251 inc_avg.addToAverage(long.max / 2.0); 252 inc_avg.addToAverage(long.max * 1.25); 253 // (0.5 + 1.25) / 2 = 0.875 254 t.test!(">=")(feqrel(inc_avg.average(), long.max * 0.875), PRECISION); 255 256 inc_avg.clear(); 257 inc_avg.addToAverage(long.min); 258 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min), PRECISION); 259 inc_avg.addToAverage(cast(double)long.min - 10); 260 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min - 5), 261 PRECISION); 262 263 inc_avg.clear(); 264 static immutable ADD = ulong.max/1_000_000; 265 for (ulong i = 0; i < ulong.max; i += (ADD < ulong.max - i ? ADD : 1)) 266 inc_avg.addToAverage(i%2); // 1 or 0 267 inc_avg.addToAverage(1); // One more add is missing 268 t.test!(">=")(feqrel(inc_avg.average(), 0.5), PRECISION); 269 } 270 271 unittest 272 { 273 auto t = new NamedTest("Initialise to zero"); 274 275 IncrementalAverage distribution; 276 t.test!("==")(distribution.count(), 0); 277 t.test!(">=")(feqrel(distribution.average(), 0.0), PRECISION); 278 t.test!(">=")(feqrel(distribution.stdDeviation(), 0.0), PRECISION); 279 t.test!(">=")(feqrel(distribution.variance(), 0.0), PRECISION); 280 } 281 282 unittest 283 { 284 auto t = new NamedTest("Mean (average)"); 285 286 IncrementalAverage distribution; 287 288 // test with signed int 289 distribution.clear(); 290 int[] int_values = [-2, -3, -4, -5, -6]; 291 foreach (value; int_values) 292 { 293 distribution.addToAverage(value); 294 } 295 t.test!(">=")(feqrel(distribution.average(), -4.0), PRECISION); 296 297 // test with unsigned long 298 distribution.clear(); 299 ulong[] ulong_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; 300 foreach (value; ulong_values) 301 { 302 distribution.addToAverage(value); 303 } 304 t.test!(">=")(feqrel(distribution.average(), 4.5), PRECISION); 305 306 // test with double 307 distribution.clear(); 308 double[] double_values = [2.4, 5.0, 7.6]; 309 foreach (value; double_values) 310 { 311 distribution.addToAverage(value); 312 } 313 t.test!(">=")(feqrel(distribution.average(), 5.0), PRECISION); 314 } 315 316 unittest 317 { 318 auto t = new NamedTest("Standard Deviation & Variance"); 319 320 IncrementalAverage distribution; 321 322 int[] values = [1, 2, 3, 4]; 323 distribution.clear(); 324 foreach (value; values) 325 { 326 distribution.addToAverage(value); 327 } 328 329 t.test!(">=")(feqrel(distribution.average(), 2.5), PRECISION); 330 331 t.test!(">=")(feqrel(distribution.stdDeviation(0), 1.1180339887498949), 332 PRECISION); 333 t.test!(">=")(feqrel(distribution.stdDeviation(1), 1.2909944487358056), 334 PRECISION); 335 336 t.test!(">=")(feqrel(distribution.variance(0), 1.25), PRECISION); 337 t.test!(">=")(feqrel(distribution.variance(1), 1.66666666666666666), 338 PRECISION); 339 340 values = [10, 20, 30, 40, 50, 60]; 341 distribution.clear(); 342 foreach (value; values) 343 { 344 distribution.addToAverage(value); 345 } 346 347 t.test!(">=")(feqrel(distribution.average(), 35.0), PRECISION); 348 349 t.test!(">=")(feqrel(distribution.stdDeviation(0), 17.0782512765993317), 350 PRECISION); 351 t.test!(">=")(feqrel(distribution.stdDeviation(1), 18.708286933869708), 352 PRECISION); 353 354 t.test!(">=")(feqrel(distribution.variance(0), 291.6666666666666), 355 PRECISION); 356 t.test!(">=")(feqrel(distribution.variance(1), 350.0), PRECISION); 357 } 358 359 unittest 360 { 361 auto t = new NamedTest("Multiple insertion precision"); 362 363 static immutable value_1 = 3; 364 static immutable value_2 = 7; 365 static immutable value_3 = 11; 366 static immutable freq = 1001; 367 368 IncrementalAverage single, multiple; 369 370 multiple.addToAverage(value_1); 371 single.addToAverage(value_1); 372 373 multiple.addToAverage(value_2, freq); 374 for (auto i = 0; i < freq; ++i) 375 { 376 single.addToAverage(value_2); 377 } 378 379 multiple.addToAverage(value_3, freq); 380 for (auto i = 0; i < freq; ++i) 381 { 382 single.addToAverage(value_3); 383 } 384 385 t.test!(">=")(feqrel(single.average(), multiple.average()), PRECISION); 386 t.test!(">=")(feqrel(single.stdDeviation(0), multiple.stdDeviation(0)), 387 PRECISION); 388 t.test!(">=")(feqrel(single.stdDeviation(1), multiple.stdDeviation(1)), 389 PRECISION); 390 t.test!(">=")(feqrel(single.variance(0), multiple.variance(0)), PRECISION); 391 t.test!(">=")(feqrel(single.variance(1), multiple.variance(1)), PRECISION); 392 } 393 394 unittest 395 { 396 auto t = new NamedTest("Add only one element"); 397 398 IncrementalAverage distribution; 399 400 static immutable value = 8.0; 401 402 distribution.addToAverage(value); 403 404 t.test!("==")(distribution.average(), value); 405 406 t.test!("==")(distribution.stdDeviation(0), 0.0); 407 t.test!("==")(distribution.stdDeviation(1), 0.0); 408 409 t.test!("==")(distribution.variance(0), 0.0); 410 t.test!("==")(distribution.variance(1), 0.0); 411 }