1 /******************************************************************************* 2 3 Calculates the average using an accumulative technique (i.e, not all the 4 values are provided at once). 5 The struct doesn't store any previous values. 6 7 Copyright: 8 Copyright (c) 2009-2016 dunnhumby Germany GmbH. 9 All rights reserved. 10 11 License: 12 Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 13 Alternatively, this file may be distributed under the terms of the Tango 14 3-Clause BSD License (see LICENSE_BSD.txt for details). 15 16 *******************************************************************************/ 17 18 module ocean.math.IncrementalAverage; 19 20 import ocean.core.Verify; 21 22 /******************************************************************************* 23 24 Calculates the average using an accumulative technique (i.e, not all the 25 values are provided at once). 26 The struct doesn't store any previous values. 27 28 *******************************************************************************/ 29 30 public struct IncrementalAverage 31 { 32 import ocean.math.Math : sqrt; 33 34 /*************************************************************************** 35 36 Counts the number of averages that has been previously performed. 37 This helps in giving the correct weight to a new number being added 38 to the average in comparison to the previous numbers. 39 40 ***************************************************************************/ 41 42 private ulong count_ = 0; 43 44 45 /*************************************************************************** 46 47 Holds the average value calculated so far. 48 49 ***************************************************************************/ 50 51 private double average_ = 0; 52 53 54 /*************************************************************************** 55 56 Holds the mean of square of the added elements used by the variance() 57 and stdDeviation() methods. 58 59 ***************************************************************************/ 60 61 private double mean_of_square = 0; 62 63 64 /*************************************************************************** 65 66 Adds a new number (giving it an appropriate weight) to the average. 67 68 Note that if too many numbers were added (more than ulong.max) then the 69 the internal counter will overflow (and as a result the average value 70 would be corrupt). 71 72 Params: 73 value = the new value to add to the current average. 74 count = if that value represent in itself the average of other 75 numbers, then this param should define the number of elements 76 that this average stands for. A count = 0 has no effect on the 77 average and gets discarded. 78 79 ***************************************************************************/ 80 81 public void addToAverage (T)(T value, ulong count = 1) 82 { 83 if (count == 0) 84 return; 85 86 this.count_ += count; 87 88 auto delta = (value - this.average_) * count; 89 90 this.average_ += delta / this.count_; 91 92 this.mean_of_square += delta * (value - this.average_); 93 } 94 95 96 /*************************************************************************** 97 98 Returns: 99 the average calculated so far. 100 101 ***************************************************************************/ 102 103 public double average () 104 { 105 return this.average_; 106 } 107 108 109 /*************************************************************************** 110 111 Returns: 112 the count of elements added. 113 114 ***************************************************************************/ 115 116 public ulong count () 117 { 118 return this.count_; 119 } 120 121 122 /*************************************************************************** 123 124 Resets the average incremental instance. 125 126 ***************************************************************************/ 127 128 public void clear () 129 { 130 this.average_ = 0; 131 this.count_ = 0; 132 this.mean_of_square = 0; 133 } 134 135 /*************************************************************************** 136 137 Computes the variance, a measure of the spread of a distribution to 138 quantify how far a set of data values is spread out. 139 140 Note that the Welford's algorithm implementation is used to compute 141 the spread of a distribution incrementally when a value is added 142 through addToAverage(). 143 http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 144 #Online_algorithm 145 146 In standard statistical practice, a correction factor of 0 provides a 147 maximum likelihood estimate of the variance for normally distributed 148 variables, while a correction factor of 1 provides an unbiased 149 estimator of the variance of a hypothetical infinite population. 150 The correction factor will be subtracted from the number of elements 151 added (sample points) when working out the divisor. 152 For more information about correction factors that could be set please 153 see http://en.wikipedia.org/wiki/Standard_deviation#Estimation 154 155 Params: 156 correction_factor = correction factor to normalise the divisor used 157 in the calculation 158 159 Returns: 160 The variance of the set of data values added 161 162 ***************************************************************************/ 163 164 public double variance (double correction_factor = 0) 165 { 166 if (this.count_ < 2) 167 return 0; 168 169 verify(this.count_ > correction_factor, "Correction factor is same " 170 ~ "size or bigger than the number of elements added."); 171 172 return this.mean_of_square / (this.count_ - correction_factor); 173 } 174 175 /*************************************************************************** 176 177 Returns the standard deviation, a measure of the spread of a 178 distribution to quantify the amount of variation or dispersion of a 179 set of data values. 180 181 See notes on variance() 182 183 Params: 184 correction_factor = correction factor to normalise the divisor used 185 in the calculation 186 187 Returns: 188 The standard deviation of the set of data values added 189 190 ***************************************************************************/ 191 192 public double stdDeviation (double correction_factor = 0) 193 { 194 return sqrt(this.variance(correction_factor)); 195 } 196 } 197 198 199 version (unittest) 200 { 201 import ocean.core.Test : NamedTest; 202 203 import ocean.math.IEEE; 204 205 // Bit count to avoid deliberate loss of precision. 206 // Ensures only 8 bits of precision could be lost. 207 static immutable PRECISION = double.mant_dig - 8; 208 } 209 210 unittest 211 { 212 auto t = new NamedTest("Incremental Average - basic unit tests"); 213 214 IncrementalAverage inc_avg; 215 216 t.test!("==")(inc_avg.count, 0); 217 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION); 218 219 inc_avg.addToAverage(1); 220 t.test!(">=")(feqrel(inc_avg.average(), 1.0), PRECISION); 221 222 inc_avg.clear(); 223 t.test!("==")(inc_avg.count, 0); 224 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION); 225 226 inc_avg.addToAverage(10); 227 inc_avg.addToAverage(20); 228 t.test!("==")(inc_avg.count, 2); 229 t.test!(">=")(feqrel(inc_avg.average(), 15.0), PRECISION); 230 231 inc_avg.clear(); 232 inc_avg.addToAverage(-10); 233 t.test!(">=")(feqrel(inc_avg.average(), -10.0), PRECISION); 234 inc_avg.addToAverage(-20); 235 t.test!(">=")(feqrel(inc_avg.average(), -15.0), PRECISION); 236 237 inc_avg.clear(); 238 inc_avg.addToAverage(-10, uint.max); 239 inc_avg.addToAverage(10, uint.max); 240 t.test!("==")(inc_avg.count, 2UL * uint.max); 241 t.test!(">=")(feqrel(inc_avg.average(), 0.0), PRECISION); 242 243 inc_avg.clear(); 244 inc_avg.addToAverage(long.max); 245 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max), PRECISION); 246 inc_avg.addToAverage(cast(ulong)long.max + 10); 247 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.max) + 5, 248 PRECISION); 249 250 inc_avg.clear(); 251 inc_avg.addToAverage(long.max / 2.0); 252 inc_avg.addToAverage(long.max * 1.25); 253 // (0.5 + 1.25) / 2 = 0.875 254 t.test!(">=")(feqrel(inc_avg.average(), long.max * 0.875), PRECISION); 255 256 inc_avg.clear(); 257 inc_avg.addToAverage(long.min); 258 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min), PRECISION); 259 inc_avg.addToAverage(cast(double)long.min - 10); 260 t.test!(">=")(feqrel(inc_avg.average(), cast(double)long.min - 5), 261 PRECISION); 262 263 inc_avg.clear(); 264 static immutable ADD = ulong.max/1_000_000; 265 for (ulong i = 0; i < ulong.max;) 266 { 267 inc_avg.addToAverage(i%2); // 1 or 0 268 269 if (ADD < (ulong.max - i)) 270 i += ADD; 271 else 272 i++; 273 } 274 inc_avg.addToAverage(1); // One more add is missing 275 t.test!(">=")(feqrel(inc_avg.average(), 0.5), PRECISION); 276 } 277 278 unittest 279 { 280 auto t = new NamedTest("Initialise to zero"); 281 282 IncrementalAverage distribution; 283 t.test!("==")(distribution.count(), 0); 284 t.test!(">=")(feqrel(distribution.average(), 0.0), PRECISION); 285 t.test!(">=")(feqrel(distribution.stdDeviation(), 0.0), PRECISION); 286 t.test!(">=")(feqrel(distribution.variance(), 0.0), PRECISION); 287 } 288 289 unittest 290 { 291 auto t = new NamedTest("Mean (average)"); 292 293 IncrementalAverage distribution; 294 295 // test with signed int 296 distribution.clear(); 297 int[] int_values = [-2, -3, -4, -5, -6]; 298 foreach (value; int_values) 299 { 300 distribution.addToAverage(value); 301 } 302 t.test!(">=")(feqrel(distribution.average(), -4.0), PRECISION); 303 304 // test with unsigned long 305 distribution.clear(); 306 ulong[] ulong_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; 307 foreach (value; ulong_values) 308 { 309 distribution.addToAverage(value); 310 } 311 t.test!(">=")(feqrel(distribution.average(), 4.5), PRECISION); 312 313 // test with double 314 distribution.clear(); 315 double[] double_values = [2.4, 5.0, 7.6]; 316 foreach (value; double_values) 317 { 318 distribution.addToAverage(value); 319 } 320 t.test!(">=")(feqrel(distribution.average(), 5.0), PRECISION); 321 } 322 323 unittest 324 { 325 auto t = new NamedTest("Standard Deviation & Variance"); 326 327 IncrementalAverage distribution; 328 329 int[] values = [1, 2, 3, 4]; 330 distribution.clear(); 331 foreach (value; values) 332 { 333 distribution.addToAverage(value); 334 } 335 336 t.test!(">=")(feqrel(distribution.average(), 2.5), PRECISION); 337 338 t.test!(">=")(feqrel(distribution.stdDeviation(0), 1.1180339887498949), 339 PRECISION); 340 t.test!(">=")(feqrel(distribution.stdDeviation(1), 1.2909944487358056), 341 PRECISION); 342 343 t.test!(">=")(feqrel(distribution.variance(0), 1.25), PRECISION); 344 t.test!(">=")(feqrel(distribution.variance(1), 1.66666666666666666), 345 PRECISION); 346 347 values = [10, 20, 30, 40, 50, 60]; 348 distribution.clear(); 349 foreach (value; values) 350 { 351 distribution.addToAverage(value); 352 } 353 354 t.test!(">=")(feqrel(distribution.average(), 35.0), PRECISION); 355 356 t.test!(">=")(feqrel(distribution.stdDeviation(0), 17.0782512765993317), 357 PRECISION); 358 t.test!(">=")(feqrel(distribution.stdDeviation(1), 18.708286933869708), 359 PRECISION); 360 361 t.test!(">=")(feqrel(distribution.variance(0), 291.6666666666666), 362 PRECISION); 363 t.test!(">=")(feqrel(distribution.variance(1), 350.0), PRECISION); 364 } 365 366 unittest 367 { 368 auto t = new NamedTest("Multiple insertion precision"); 369 370 static immutable value_1 = 3; 371 static immutable value_2 = 7; 372 static immutable value_3 = 11; 373 static immutable freq = 1001; 374 375 IncrementalAverage single, multiple; 376 377 multiple.addToAverage(value_1); 378 single.addToAverage(value_1); 379 380 multiple.addToAverage(value_2, freq); 381 for (auto i = 0; i < freq; ++i) 382 { 383 single.addToAverage(value_2); 384 } 385 386 multiple.addToAverage(value_3, freq); 387 for (auto i = 0; i < freq; ++i) 388 { 389 single.addToAverage(value_3); 390 } 391 392 t.test!(">=")(feqrel(single.average(), multiple.average()), PRECISION); 393 t.test!(">=")(feqrel(single.stdDeviation(0), multiple.stdDeviation(0)), 394 PRECISION); 395 t.test!(">=")(feqrel(single.stdDeviation(1), multiple.stdDeviation(1)), 396 PRECISION); 397 t.test!(">=")(feqrel(single.variance(0), multiple.variance(0)), PRECISION); 398 t.test!(">=")(feqrel(single.variance(1), multiple.variance(1)), PRECISION); 399 } 400 401 unittest 402 { 403 auto t = new NamedTest("Add only one element"); 404 405 IncrementalAverage distribution; 406 407 static immutable value = 8.0; 408 409 distribution.addToAverage(value); 410 411 t.test!("==")(distribution.average(), value); 412 413 t.test!("==")(distribution.stdDeviation(0), 0.0); 414 t.test!("==")(distribution.stdDeviation(1), 0.0); 415 416 t.test!("==")(distribution.variance(0), 0.0); 417 t.test!("==")(distribution.variance(1), 0.0); 418 }