1 /** 2 * Cumulative Probability Distribution Functions 3 * 4 * Copyright: 5 * Based on the CEPHES math library, which is 6 * Copyright (C) 1994 Stephen L. Moshier (moshier@world.std.com). 7 * Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 8 * All rights reserved. 9 * 10 * License: 11 * Boost Software License Version 1.0. See LICENSE_BOOST.txt for details. 12 * Alternatively, this file may be distributed under the terms of the Tango 13 * 3-Clause BSD License (see LICENSE_BSD.txt for details). 14 * 15 * Authors: Stephen L. Moshier (original C code), Don Clugston 16 * 17 */ 18 19 /** 20 * Macros: 21 * NAN = $(RED NAN) 22 * SUP = <span style="vertical-align:super;font-size:smaller">$0</span> 23 * GAMMA = Γ 24 * INTEGRAL = ∫ 25 * INTEGRATE = $(BIG ∫<sub>$(SMALL $1)</sub><sup>$2</sup>) 26 * POWER = $1<sup>$2</sup> 27 * BIGSUM = $(BIG Σ <sup>$2</sup><sub>$(SMALL $1)</sub>) 28 * CHOOSE = $(BIG () <sup>$(SMALL $1)</sup><sub>$(SMALL $2)</sub> $(BIG )) 29 * TABLE_SV = <table border=1 cellpadding=4 cellspacing=0> 30 * <caption>Special Values</caption> 31 * $0</table> 32 * SVH = $(TR $(TH $1) $(TH $2)) 33 * SV = $(TR $(TD $1) $(TD $2)) 34 */ 35 36 module ocean.math.Probability; 37 static import ocean.math.ErrorFunction; 38 import ocean.math.GammaFunction; 39 import ocean.math.Math; 40 import ocean.math.IEEE; 41 import ocean.core.Verify; 42 43 version (unittest) import ocean.core.Test; 44 45 /*** 46 Cumulative distribution function for the Normal distribution, and its complement. 47 48 The normal (or Gaussian, or bell-shaped) distribution is 49 defined as: 50 51 normalDist(x) = 1/$(SQRT) π $(INTEGRAL -$(INFINITY), x) exp( - $(POWER t, 2)/2) dt 52 = 0.5 + 0.5 * erf(x/sqrt(2)) 53 = 0.5 * erfc(- x/sqrt(2)) 54 55 Note that 56 normalDistribution(x) = 1 - normalDistribution(-x). 57 58 Accuracy: 59 Within a few bits of machine resolution over the entire 60 range. 61 62 References: 63 $(LINK http://www.netlib.org/cephes/ldoubdoc.html), 64 G. Marsaglia, "Evaluating the Normal Distribution", 65 Journal of Statistical Software <b>11</b>, (July 2004). 66 */ 67 real normalDistribution(real a) 68 { 69 return ocean.math.ErrorFunction.normalDistributionImpl(a); 70 } 71 72 /** ditto */ 73 real normalDistributionCompl(real a) 74 { 75 return -ocean.math.ErrorFunction.normalDistributionImpl(-a); 76 } 77 78 /****************************** 79 * Inverse of Normal distribution function 80 * 81 * Returns the argument, x, for which the area under the 82 * Normal probability density function (integrated from 83 * minus infinity to x) is equal to p. 84 * 85 * For small arguments 0 < p < exp(-2), the program computes 86 * z = sqrt( -2 log(p) ); then the approximation is 87 * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z) . 88 * For larger arguments, x/sqrt(2 pi) = w + w^3 R(w^2)/S(w^2) , 89 * where w = p - 0.5 . 90 */ 91 real normalDistributionInv(real p) 92 { 93 return ocean.math.ErrorFunction.normalDistributionInvImpl(p); 94 } 95 96 /** ditto */ 97 real normalDistributionComplInv(real p) 98 { 99 return -ocean.math.ErrorFunction.normalDistributionInvImpl(-p); 100 } 101 102 unittest { 103 test(feqrel(normalDistributionInv(normalDistribution(0.1)),0.1L)>=real.mant_dig-4); 104 test(feqrel(normalDistributionComplInv(normalDistributionCompl(0.1)),0.1L)>=real.mant_dig-4); 105 } 106 107 /** Student's t cumulative distribution function 108 * 109 * Computes the integral from minus infinity to t of the Student 110 * t distribution with integer nu > 0 degrees of freedom: 111 * 112 * $(GAMMA)( (nu+1)/2) / ( sqrt(nu π) $(GAMMA)(nu/2) ) * 113 * $(INTEGRATE -∞, t) $(POWER (1+$(POWER x, 2)/nu), -(nu+1)/2) dx 114 * 115 * Can be used to test whether the means of two normally distributed populations 116 * are equal. 117 * 118 * It is related to the incomplete beta integral: 119 * 1 - studentsDistribution(nu,t) = 0.5 * betaDistribution( nu/2, 1/2, z ) 120 * where 121 * z = nu/(nu + t<sup>2</sup>). 122 * 123 * For t < -1.6, this is the method of computation. For higher t, 124 * a direct method is derived from integration by parts. 125 * Since the function is symmetric about t=0, the area under the 126 * right tail of the density is found by calling the function 127 * with -t instead of t. 128 */ 129 real studentsTDistribution(int nu, real t) 130 { 131 verify(nu>0); 132 133 /* Based on code from Cephes Math Library Release 2.3: January, 1995 134 Copyright 1984, 1995 by Stephen L. Moshier 135 */ 136 137 if ( nu <= 0 ) return NaN(TANGO_NAN.STUDENTSDDISTRIBUTION_DOMAIN); // domain error -- or should it return 0? 138 if ( t == 0.0 ) return 0.5; 139 140 real rk, z, p; 141 142 if ( t < -1.6 ) { 143 rk = nu; 144 z = rk / (rk + t * t); 145 return 0.5L * betaIncomplete( 0.5L*rk, 0.5L, z ); 146 } 147 148 /* compute integral from -t to + t */ 149 150 rk = nu; /* degrees of freedom */ 151 152 real x; 153 if (t < 0) x = -t; else x = t; 154 z = 1.0L + ( x * x )/rk; 155 156 real f, tz; 157 int j; 158 159 if ( nu & 1) { 160 /* computation for odd nu */ 161 real xsqk = x/sqrt(rk); 162 p = atan( xsqk ); 163 if ( nu > 1 ) { 164 f = 1.0L; 165 tz = 1.0L; 166 j = 3; 167 while( (j<=(nu-2)) && ( (tz/f) > real.epsilon ) ) { 168 tz *= (j-1)/( z * j ); 169 f += tz; 170 j += 2; 171 } 172 p += f * xsqk/z; 173 } 174 p *= 2.0L/PI; 175 } else { 176 /* computation for even nu */ 177 f = 1.0L; 178 tz = 1.0L; 179 j = 2; 180 181 while ( ( j <= (nu-2) ) && ( (tz/f) > real.epsilon ) ) { 182 tz *= (j - 1)/( z * j ); 183 f += tz; 184 j += 2; 185 } 186 p = f * x/sqrt(z*rk); 187 } 188 if ( t < 0.0L ) 189 p = -p; /* note destruction of relative accuracy */ 190 191 p = 0.5L + 0.5L * p; 192 return p; 193 } 194 195 /** Inverse of Student's t distribution 196 * 197 * Given probability p and degrees of freedom nu, 198 * finds the argument t such that the one-sided 199 * studentsDistribution(nu,t) is equal to p. 200 * 201 * Params: 202 * nu = degrees of freedom. Must be >1 203 * p = probability. 0 < p < 1 204 */ 205 real studentsTDistributionInv(int nu, real p ) 206 { 207 verify(nu>0); 208 verify(p>=0.0L && p<=1.0L); 209 210 if (p==0) return -real.infinity; 211 if (p==1) return real.infinity; 212 213 real rk, z; 214 rk = nu; 215 216 if ( p > 0.25L && p < 0.75L ) { 217 if ( p == 0.5L ) return 0; 218 z = 1.0L - 2.0L * p; 219 z = betaIncompleteInv( 0.5L, 0.5L*rk, fabs(z) ); 220 real t = sqrt( rk*z/(1.0L-z) ); 221 if( p < 0.5L ) 222 t = -t; 223 return t; 224 } 225 int rflg = -1; // sign of the result 226 if (p >= 0.5L) { 227 p = 1.0L - p; 228 rflg = 1; 229 } 230 z = betaIncompleteInv( 0.5L*rk, 0.5L, 2.0L*p ); 231 232 if (z<0) return rflg * real.infinity; 233 return rflg * sqrt( rk/z - rk ); 234 } 235 236 unittest { 237 238 // There are simple forms for nu = 1 and nu = 2. 239 240 // if (nu == 1), tDistribution(x) = 0.5 + atan(x)/PI 241 // so tDistributionInv(p) = tan( PI * (p-0.5) ); 242 // nu==2: tDistribution(x) = 0.5 * (1 + x/ sqrt(2+x*x) ) 243 244 test(studentsTDistribution(1, -0.4)== 0.5 + atan(-0.4)/PI); 245 test(studentsTDistribution(2, 0.9) == 0.5L * (1 + 0.9L/sqrt(2.0L + 0.9*0.9)) ); 246 test(studentsTDistribution(2, -5.4) == 0.5L * (1 - 5.4L/sqrt(2.0L + 5.4*5.4)) ); 247 248 // return true if a==b to given number of places. 249 bool isfeqabs(real a, real b, real diff) 250 { 251 return fabs(a-b) < diff; 252 } 253 254 // Check a few spot values with statsoft.com (Mathworld values are wrong!!) 255 // According to statsoft.com, studentsDistributionInv(10, 0.995)= 3.16927. 256 257 // The remaining values listed here are from Excel, and are unlikely to be accurate 258 // in the last decimal places. However, they are helpful as a sanity check. 259 260 // Microsoft Excel 2003 gives TINV(2*(1-0.995), 10) == 3.16927267160917 261 test(isfeqabs(studentsTDistributionInv(10, 0.995), 3.169_272_67L, 0.000_000_005L)); 262 263 264 test(isfeqabs(studentsTDistributionInv(8, 0.6), 0.261_921_096_769_043L,0.000_000_000_05L)); 265 // -TINV(2*0.4, 18) == -0.257123042655869 266 267 test(isfeqabs(studentsTDistributionInv(18, 0.4), -0.257_123_042_655_869L, 0.000_000_000_05L)); 268 test( feqrel(studentsTDistribution(18, studentsTDistributionInv(18, 0.4L)),0.4L) 269 > real.mant_dig-5 ); 270 test( feqrel(studentsTDistribution(11, studentsTDistributionInv(11, 0.9L)),0.9L) 271 > real.mant_dig-2); 272 } 273 274 /** The F distribution, its complement, and inverse. 275 * 276 * The F density function (also known as Snedcor's density or the 277 * variance ratio density) is the density 278 * of x = (u1/df1)/(u2/df2), where u1 and u2 are random 279 * variables having $(POWER χ,2) distributions with df1 280 * and df2 degrees of freedom, respectively. 281 * 282 * fDistribution returns the area from zero to x under the F density 283 * function. The complementary function, 284 * fDistributionCompl, returns the area from x to ∞ under the F density function. 285 * 286 * The inverse of the complemented F distribution, 287 * fDistributionComplInv, finds the argument x such that the integral 288 * from x to infinity of the F density is equal to the given probability y. 289 * 290 * Can be used to test whether the means of multiple normally distributed 291 * populations, all with the same standard deviation, are equal; 292 * or to test that the standard deviations of two normally distributed 293 * populations are equal. 294 * 295 * Params: 296 * df1 = Degrees of freedom of the first variable. Must be >= 1 297 * df2 = Degrees of freedom of the second variable. Must be >= 1 298 * x = Must be >= 0 299 */ 300 real fDistribution(int df1, int df2, real x) 301 { 302 verify(df1>=1 && df2>=1); 303 verify(x>=0); 304 305 real a = cast(real)(df1); 306 real b = cast(real)(df2); 307 real w = a * x; 308 w = w/(b + w); 309 return betaIncomplete(0.5L*a, 0.5L*b, w); 310 } 311 312 /** ditto */ 313 real fDistributionCompl(int df1, int df2, real x) 314 { 315 verify(df1>=1 && df2>=1); 316 verify(x>=0); 317 318 real a = cast(real)(df1); 319 real b = cast(real)(df2); 320 real w = b / (b + a * x); 321 return betaIncomplete( 0.5L*b, 0.5L*a, w ); 322 } 323 324 /* 325 * Inverse of complemented F distribution 326 * 327 * Finds the F density argument x such that the integral 328 * from x to infinity of the F density is equal to the 329 * given probability p. 330 * 331 * This is accomplished using the inverse beta integral 332 * function and the relations 333 * 334 * z = betaIncompleteInv( df2/2, df1/2, p ), 335 * x = df2 (1-z) / (df1 z). 336 * 337 * Note that the following relations hold for the inverse of 338 * the uncomplemented F distribution: 339 * 340 * z = betaIncompleteInv( df1/2, df2/2, p ), 341 * x = df2 z / (df1 (1-z)). 342 */ 343 344 /** ditto */ 345 real fDistributionComplInv(int df1, int df2, real p ) 346 { 347 verify(df1>=1 && df2>=1); 348 verify(p>=0 && p<=1.0); 349 350 real a = df1; 351 real b = df2; 352 /* Compute probability for x = 0.5. */ 353 real w = betaIncomplete( 0.5L*b, 0.5L*a, 0.5L ); 354 /* If that is greater than p, then the solution w < .5. 355 Otherwise, solve at 1-p to remove cancellation in (b - b*w). */ 356 if ( w > p || p < 0.001L) { 357 w = betaIncompleteInv( 0.5L*b, 0.5L*a, p ); 358 return (b - b*w)/(a*w); 359 } else { 360 w = betaIncompleteInv( 0.5L*a, 0.5L*b, 1.0L - p ); 361 return b*w/(a*(1.0L-w)); 362 } 363 } 364 365 unittest { 366 // fDistCompl(df1, df2, x) = Excel's FDIST(x, df1, df2) 367 test(fabs(fDistributionCompl(6, 4, 16.5) - 0.00858719177897249L)< 0.0000000000005L); 368 test(fabs((1-fDistribution(12, 23, 0.1)) - 0.99990562845505L)< 0.0000000000005L); 369 test(fabs(fDistributionComplInv(8, 34, 0.2) - 1.48267037661408L)< 0.0000000005L); 370 test(fabs(fDistributionComplInv(4, 16, 0.008) - 5.043_537_593_48596L)< 0.0000000005L); 371 // Regression test: This one used to fail because of a bug in the definition of MINLOG. 372 test(feqrel(fDistributionCompl(4, 16, fDistributionComplInv(4,16, 0.008)), 0.008L)>=real.mant_dig-3); 373 } 374 375 /** $(POWER χ,2) cumulative distribution function and its complement. 376 * 377 * Returns the area under the left hand tail (from 0 to x) 378 * of the Chi square probability density function with 379 * v degrees of freedom. The complement returns the area under 380 * the right hand tail (from x to ∞). 381 * 382 * chiSqrDistribution(x | v) = ($(INTEGRATE 0, x) 383 * $(POWER t, v/2-1) $(POWER e, -t/2) dt ) 384 * / $(POWER 2, v/2) $(GAMMA)(v/2) 385 * 386 * chiSqrDistributionCompl(x | v) = ($(INTEGRATE x, ∞) 387 * $(POWER t, v/2-1) $(POWER e, -t/2) dt ) 388 * / $(POWER 2, v/2) $(GAMMA)(v/2) 389 * 390 * Params: 391 * v = degrees of freedom. Must be positive. 392 * x = the $(POWER χ,2) variable. Must be positive. 393 * 394 */ 395 real chiSqrDistribution(real v, real x) 396 { 397 verify(x>=0); 398 verify(v>=1.0); 399 400 return gammaIncomplete( 0.5*v, 0.5*x); 401 } 402 403 /** ditto */ 404 real chiSqrDistributionCompl(real v, real x) 405 { 406 verify(x>=0); 407 verify(v>=1.0); 408 409 return gammaIncompleteCompl( 0.5L*v, 0.5L*x ); 410 } 411 412 /** 413 * Inverse of complemented $(POWER χ, 2) distribution 414 * 415 * Finds the $(POWER χ, 2) argument x such that the integral 416 * from x to ∞ of the $(POWER χ, 2) density is equal 417 * to the given cumulative probability p. 418 * 419 * Params: 420 * p = Cumulative probability. 0<= p <=1. 421 * v = Degrees of freedom. Must be positive. 422 * 423 */ 424 real chiSqrDistributionComplInv(real v, real p) 425 { 426 verify(p>=0 && p<=1.0L); 427 verify(v>=1.0L); 428 429 return 2.0 * gammaIncompleteComplInv( 0.5*v, p); 430 } 431 432 unittest { 433 test(feqrel(chiSqrDistributionCompl(3.5L, chiSqrDistributionComplInv(3.5L, 0.1L)), 0.1L)>=real.mant_dig-5); 434 test(chiSqrDistribution(19.02L, 0.4L) + chiSqrDistributionCompl(19.02L, 0.4L) ==1.0L); 435 } 436 437 /** 438 * The Γ distribution and its complement 439 * 440 * The Γ distribution is defined as the integral from 0 to x of the 441 * gamma probability density function. The complementary function returns the 442 * integral from x to ∞ 443 * 444 * gammaDistribution = ($(INTEGRATE 0, x) $(POWER t, b-1)$(POWER e, -at) dt) $(POWER a, b)/Γ(b) 445 * 446 * x must be greater than 0. 447 */ 448 real gammaDistribution(real a, real b, real x) 449 { 450 verify(x>=0); 451 452 return gammaIncomplete(b, a*x); 453 } 454 455 /** ditto */ 456 real gammaDistributionCompl(real a, real b, real x ) 457 { 458 verify(x>=0); 459 460 return gammaIncompleteCompl( b, a * x ); 461 } 462 463 unittest { 464 test(gammaDistribution(7,3,0.18)+gammaDistributionCompl(7,3,0.18)==1); 465 } 466 467 /********************** 468 * Beta distribution and its inverse 469 * 470 * Returns the incomplete beta integral of the arguments, evaluated 471 * from zero to x. The function is defined as 472 * 473 * betaDistribution = Γ(a+b)/(Γ(a) Γ(b)) * 474 * $(INTEGRATE 0, x) $(POWER t, a-1)$(POWER (1-t),b-1) dt 475 * 476 * The domain of definition is 0 <= x <= 1. In this 477 * implementation a and b are restricted to positive values. 478 * The integral from x to 1 may be obtained by the symmetry 479 * relation 480 * 481 * betaDistributionCompl(a, b, x ) = betaDistribution( b, a, 1-x ) 482 * 483 * The integral is evaluated by a continued fraction expansion 484 * or, when b*x is small, by a power series. 485 * 486 * The inverse finds the value of x for which betaDistribution(a,b,x) - y = 0 487 */ 488 real betaDistribution(real a, real b, real x ) 489 { 490 return betaIncomplete(a, b, x ); 491 } 492 493 /** ditto */ 494 real betaDistributionCompl(real a, real b, real x) 495 { 496 return betaIncomplete(b, a, 1-x); 497 } 498 499 /** ditto */ 500 real betaDistributionInv(real a, real b, real y) 501 { 502 return betaIncompleteInv(a, b, y); 503 } 504 505 /** ditto */ 506 real betaDistributionComplInv(real a, real b, real y) 507 { 508 return 1-betaIncompleteInv(b, a, y); 509 } 510 511 unittest { 512 test(feqrel(betaDistributionInv(2, 6, betaDistribution(2,6, 0.7L)),0.7L)>=real.mant_dig-3); 513 test(feqrel(betaDistributionComplInv(1.3, 8, betaDistributionCompl(1.3,8, 0.01L)),0.01L)>=real.mant_dig-4); 514 } 515 516 /** 517 * The Poisson distribution, its complement, and inverse 518 * 519 * k is the number of events. m is the mean. 520 * The Poisson distribution is defined as the sum of the first k terms of 521 * the Poisson density function. 522 * The complement returns the sum of the terms k+1 to ∞. 523 * 524 * poissonDistribution = $(BIGSUM j=0, k) $(POWER e, -m) $(POWER m, j)/j! 525 * 526 * poissonDistributionCompl = $(BIGSUM j=k+1, ∞) $(POWER e, -m) $(POWER m, j)/j! 527 * 528 * The terms are not summed directly; instead the incomplete 529 * gamma integral is employed, according to the relation 530 * 531 * y = poissonDistribution( k, m ) = gammaIncompleteCompl( k+1, m ). 532 * 533 * The arguments must both be positive. 534 */ 535 real poissonDistribution(int k, real m ) 536 { 537 verify(k>=0); 538 verify(m>0); 539 540 return gammaIncompleteCompl( k+1.0, m ); 541 } 542 543 /** ditto */ 544 real poissonDistributionCompl(int k, real m ) 545 { 546 verify(k>=0); 547 verify(m>0); 548 549 return gammaIncomplete( k+1.0, m ); 550 } 551 552 /** ditto */ 553 real poissonDistributionInv( int k, real p ) 554 { 555 verify(k>=0); 556 verify(p>=0.0 && p<=1.0); 557 558 return gammaIncompleteComplInv(k+1, p); 559 } 560 561 unittest { 562 // = Excel's POISSON(k, m, TRUE) 563 test( fabs(poissonDistribution(5, 6.3) 564 - 0.398771730072867L) < 0.000000000000005L); 565 test( feqrel(poissonDistributionInv(8, poissonDistribution(8, 2.7e3L)), 2.7e3L)>=real.mant_dig-2); 566 test( poissonDistribution(2, 8.4e-5) + poissonDistributionCompl(2, 8.4e-5) == 1.0L); 567 } 568 569 /*********************************** 570 * Binomial distribution and complemented binomial distribution 571 * 572 * The binomial distribution is defined as the sum of the terms 0 through k 573 * of the Binomial probability density. 574 * The complement returns the sum of the terms k+1 through n. 575 * 576 binomialDistribution = $(BIGSUM j=0, k) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j) 577 578 binomialDistributionCompl = $(BIGSUM j=k+1, n) $(CHOOSE n, j) $(POWER p, j) $(POWER (1-p), n-j) 579 * 580 * The terms are not summed directly; instead the incomplete 581 * beta integral is employed, according to the formula 582 * 583 * y = binomialDistribution( k, n, p ) = betaDistribution( n-k, k+1, 1-p ). 584 * 585 * The arguments must be positive, with p ranging from 0 to 1, and k<=n. 586 */ 587 real binomialDistribution(int k, int n, real p ) 588 { 589 verify(p>=0 && p<=1.0); // domain error 590 verify(k>=0 && k<=n); 591 592 real dk, dn, q; 593 if( k == n ) 594 return 1.0L; 595 596 q = 1.0L - p; 597 dn = n - k; 598 if ( k == 0 ) { 599 return pow( q, dn ); 600 } else { 601 return betaIncomplete( dn, k + 1, q ); 602 } 603 } 604 605 unittest { 606 // = Excel's BINOMDIST(k, n, p, TRUE) 607 test( fabs(binomialDistribution(8, 12, 0.5) 608 - 0.927001953125L) < 0.0000000000005L); 609 test( fabs(binomialDistribution(0, 3, 0.008L) 610 - 0.976191488L) < 0.00000000005L); 611 test(binomialDistribution(7,7, 0.3)==1.0); 612 } 613 614 /** ditto */ 615 real binomialDistributionCompl(int k, int n, real p ) 616 { 617 verify(p>=0 && p<=1.0); // domain error 618 verify(k>=0 && k<=n); 619 620 if ( k == n ) { 621 return 0; 622 } 623 real dn = n - k; 624 if ( k == 0 ) { 625 if ( p < .01L ) 626 return -expm1( dn * log1p(-p) ); 627 else 628 return 1.0L - pow( 1.0L-p, dn ); 629 } else { 630 return betaIncomplete( k+1, dn, p ); 631 } 632 } 633 634 unittest { 635 // = Excel's (1 - BINOMDIST(k, n, p, TRUE)) 636 test( fabs(1.0L-binomialDistributionCompl(0, 15, 0.003) 637 - 0.955932824838906L) < 0.0000000000000005L); 638 test( fabs(1.0L-binomialDistributionCompl(0, 25, 0.2) 639 - 0.00377789318629572L) < 0.000000000000000005L); 640 test( fabs(1.0L-binomialDistributionCompl(8, 12, 0.5) 641 - 0.927001953125L) < 0.00000000000005L); 642 test(binomialDistributionCompl(7,7, 0.3)==0.0); 643 } 644 645 /** Inverse binomial distribution 646 * 647 * Finds the event probability p such that the sum of the 648 * terms 0 through k of the Binomial probability density 649 * is equal to the given cumulative probability y. 650 * 651 * This is accomplished using the inverse beta integral 652 * function and the relation 653 * 654 * 1 - p = betaDistributionInv( n-k, k+1, y ). 655 * 656 * The arguments must be positive, with 0 <= y <= 1, and k <= n. 657 */ 658 real binomialDistributionInv( int k, int n, real y ) 659 { 660 verify(y>=0 && y<=1.0); // domain error 661 verify(k>=0 && k<=n); 662 663 real dk, p; 664 real dn = n - k; 665 if ( k == 0 ) { 666 if( y > 0.8L ) 667 p = -expm1( log1p(y-1.0L) / dn ); 668 else 669 p = 1.0L - pow( y, 1.0L/dn ); 670 } else { 671 dk = k + 1; 672 p = betaIncomplete( dn, dk, y ); 673 if( p > 0.5 ) 674 p = betaIncompleteInv( dk, dn, 1.0L-y ); 675 else 676 p = 1.0 - betaIncompleteInv( dn, dk, y ); 677 } 678 return p; 679 } 680 681 unittest { 682 real w = binomialDistribution(9, 15, 0.318L); 683 test(feqrel(binomialDistributionInv(9, 15, w), 0.318L)>=real.mant_dig-3); 684 w = binomialDistribution(5, 35, 0.718L); 685 test(feqrel(binomialDistributionInv(5, 35, w), 0.718L)>=real.mant_dig-3); 686 w = binomialDistribution(0, 24, 0.637L); 687 test(feqrel(binomialDistributionInv(0, 24, w), 0.637L)>=real.mant_dig-3); 688 w = binomialDistributionInv(0, 59, 0.962L); 689 test(feqrel(binomialDistribution(0, 59, w), 0.962L)>=real.mant_dig-5); 690 } 691 692 /** Negative binomial distribution and its inverse 693 * 694 * Returns the sum of the terms 0 through k of the negative 695 * binomial distribution: 696 * 697 * $(BIGSUM j=0, k) $(CHOOSE n+j-1, j-1) $(POWER p, n) $(POWER (1-p), j) 698 * 699 * In a sequence of Bernoulli trials, this is the probability 700 * that k or fewer failures precede the n-th success. 701 * 702 * The arguments must be positive, with 0 < p < 1 and r>0. 703 * 704 * The inverse finds the argument y such 705 * that negativeBinomialDistribution(k,n,y) is equal to p. 706 * 707 * The Geometric Distribution is a special case of the negative binomial 708 * distribution. 709 * ----------------------- 710 * geometricDistribution(k, p) = negativeBinomialDistribution(k, 1, p); 711 * ----------------------- 712 * References: 713 * $(LINK http://mathworld.wolfram.com/NegativeBinomialDistribution.html) 714 */ 715 716 real negativeBinomialDistribution(int k, int n, real p ) 717 { 718 verify(p>=0 && p<=1.0); // domain error 719 verify(k>=0); 720 721 if ( k == 0 ) return pow( p, n ); 722 return betaIncomplete( n, k + 1, p ); 723 } 724 725 /** ditto */ 726 real negativeBinomialDistributionInv(int k, int n, real p ) 727 { 728 verify(p>=0 && p<=1.0); // domain error 729 verify(k>=0); 730 731 return betaIncompleteInv(n, k + 1, p); 732 } 733 734 unittest { 735 // Value obtained by sum of terms of MS Excel 2003's NEGBINOMDIST. 736 test( fabs(negativeBinomialDistribution(10, 20, 0.2) - 3.830_52E-08)< 0.000_005e-08); 737 test(feqrel(negativeBinomialDistributionInv(14, 208, negativeBinomialDistribution(14, 208, 1e-4L)), 1e-4L)>=real.mant_dig-3); 738 }