1 /** Algorithms for finding roots and extrema of one-argument real functions 2 * using bracketing. 3 * 4 * Copyright: 5 * Copyright (C) 2008 Don Clugston. 6 * Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 7 * All rights reserved. 8 * 9 * License: 10 * Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 11 * See LICENSE_TANGO.txt for details. 12 * 13 * Authors: Don Clugston. 14 * 15 */ 16 module ocean.math.Bracket; 17 import ocean.meta.types.Qualifiers; 18 19 static import tsm = core.stdc.math; 20 import ocean.math.Math; 21 import ocean.math.IEEE; 22 import ocean.core.Verify; 23 24 version (unittest) import ocean.core.Test; 25 26 private: 27 28 // return true if a and b have opposite sign 29 bool oppositeSigns(T)(T a, T b) 30 { 31 return (signbit(a) ^ signbit(b))!=0; 32 } 33 34 // TODO: This should be exposed publically, but needs a better name. 35 struct BracketResult(T, R) 36 { 37 T xlo; 38 T xhi; 39 R fxlo; 40 R fxhi; 41 } 42 43 public: 44 45 /** Find a real root of the real function f(x) via bracketing. 46 * 47 * Given a range [a..b] such that f(a) and f(b) have opposite sign, 48 * returns the value of x in the range which is closest to a root of f(x). 49 * If f(x) has more than one root in the range, one will be chosen arbitrarily. 50 * If f(x) returns $(NAN), $(NAN) will be returned; otherwise, this algorithm 51 * is guaranteed to succeed. 52 * 53 * Uses an algorithm based on TOMS748, which uses inverse cubic interpolation 54 * whenever possible, otherwise reverting to parabolic or secant 55 * interpolation. Compared to TOMS748, this implementation improves worst-case 56 * performance by a factor of more than 100, and typical performance by a factor 57 * of 2. For 80-bit reals, most problems require 8 - 15 calls to f(x) to achieve 58 * full machine precision. The worst-case performance (pathological cases) is 59 * approximately twice the number of bits. 60 * 61 * References: 62 * "On Enclosing Simple Roots of Nonlinear Equations", G. Alefeld, F.A. Potra, 63 * Yixun Shi, Mathematics of Computation 61, pp733-744 (1993). 64 * Fortran code available from www.netlib.org as algorithm TOMS478. 65 * 66 */ 67 T findRoot(T, R)(scope R delegate(T) f, T ax, T bx) 68 { 69 auto r = findRoot(f, ax, bx, f(ax), f(bx), (BracketResult!(T,R) r){ 70 return r.xhi==nextUp(r.xlo); }); 71 return fabs(r.fxlo)<=fabs(r.fxhi) ? r.xlo : r.xhi; 72 } 73 74 private: 75 76 /** Find root by bracketing, allowing termination condition to be specified 77 * 78 * Params: 79 * tolerance Defines the termination condition. Return true when acceptable 80 * bounds have been obtained. 81 */ 82 BracketResult!(T, R) findRoot(T,R)(scope R delegate(T) f, T ax, T bx, R fax, R fbx, 83 scope bool delegate(BracketResult!(T,R) r) tolerance) 84 { 85 verify(ax<=bx, "Parameters ax and bx out of order."); 86 verify(!tsm.isnan(ax) && !tsm.isnan(bx), "Limits must not be NaN"); 87 verify(oppositeSigns(fax,fbx), "Parameters must bracket the root."); 88 89 // This code is (heavily) modified from TOMS748 (www.netlib.org). Some ideas 90 // were borrowed from the Boost Mathematics Library. 91 92 T a = ax, b = bx, d; // [a..b] is our current bracket. 93 R fa = fax, fb = fbx, fd; // d is the third best guess. 94 95 // Test the function at point c; update brackets accordingly 96 void bracket(T c) 97 { 98 T fc = f(c); 99 if (fc == 0) { // Exact solution 100 a = c; 101 fa = fc; 102 d = c; 103 fd = fc; 104 return; 105 } 106 // Determine new enclosing interval 107 if (oppositeSigns(fa, fc)) { 108 d = b; 109 fd = fb; 110 b = c; 111 fb = fc; 112 } else { 113 d = a; 114 fd = fa; 115 a = c; 116 fa = fc; 117 } 118 } 119 120 /* Perform a secant interpolation. If the result would lie on a or b, or if 121 a and b differ so wildly in magnitude that the result would be meaningless, 122 perform a bisection instead. 123 */ 124 T secant_interpolate(T a, T b, T fa, T fb) 125 { 126 if (( ((a - b) == a) && b!=0) || (a!=0 && ((b - a) == b))) { 127 // Catastrophic cancellation 128 if (a == 0) a = copysign(0.0L, b); 129 else if (b == 0) b = copysign(0.0L, a); 130 else if (oppositeSigns(a, b)) return 0; 131 T c = ieeeMean(a, b); 132 return c; 133 } 134 // avoid overflow 135 if (b - a > T.max) return b / 2.0 + a / 2.0; 136 if (fb - fa > T.max) return a - (b - a) / 2; 137 T c = a - (fa / (fb - fa)) * (b - a); 138 if (c == a || c == b) return (a + b) / 2; 139 return c; 140 } 141 142 /* Uses 'numsteps' newton steps to approximate the zero in [a..b] of the 143 quadratic polynomial interpolating f(x) at a, b, and d. 144 Returns: 145 The approximate zero in [a..b] of the quadratic polynomial. 146 */ 147 T newtonQuadratic(int numsteps) 148 { 149 // Find the coefficients of the quadratic polynomial. 150 T a0 = fa; 151 T a1 = (fb - fa)/(b - a); 152 T a2 = ((fd - fb)/(d - b) - a1)/(d - a); 153 154 // Determine the starting point of newton steps. 155 T c = oppositeSigns(a2, fa) ? a : b; 156 157 // start the safeguarded newton steps. 158 for (int i = 0; i<numsteps; ++i) { 159 T pc = a0 + (a1 + a2 * (c - b))*(c - a); 160 T pdc = a1 + a2*((2.0 * c) - (a + b)); 161 if (pdc == 0) return a - a0 / a1; 162 else c = c - pc / pdc; 163 } 164 return c; 165 } 166 167 // On the first iteration we take a secant step: 168 if(fa != 0) { 169 bracket(secant_interpolate(a, b, fa, fb)); 170 } 171 // Starting with the second iteration, higher-order interpolation can 172 // be used. 173 int itnum = 1; // Iteration number 174 int baditer = 1; // Num bisections to take if an iteration is bad. 175 T c, e; // e is our fourth best guess 176 R fe; 177 whileloop: 178 while((fa != 0) && !tolerance(BracketResult!(T,R)(a, b, fa, fb))) { 179 T a0 = a, b0 = b; // record the brackets 180 181 // Do two higher-order (cubic or parabolic) interpolation steps. 182 for (int QQ = 0; QQ < 2; ++QQ) { 183 // Cubic inverse interpolation requires that 184 // all four function values fa, fb, fd, and fe are distinct; 185 // otherwise use quadratic interpolation. 186 bool distinct = (fa != fb) && (fa != fd) && (fa != fe) 187 && (fb != fd) && (fb != fe) && (fd != fe); 188 // The first time, cubic interpolation is impossible. 189 if (itnum<2) distinct = false; 190 bool ok = distinct; 191 if (distinct) { 192 // Cubic inverse interpolation of f(x) at a, b, d, and e 193 real q11 = (d - e) * fd / (fe - fd); 194 real q21 = (b - d) * fb / (fd - fb); 195 real q31 = (a - b) * fa / (fb - fa); 196 real d21 = (b - d) * fd / (fd - fb); 197 real d31 = (a - b) * fb / (fb - fa); 198 199 real q22 = (d21 - q11) * fb / (fe - fb); 200 real q32 = (d31 - q21) * fa / (fd - fa); 201 real d32 = (d31 - q21) * fd / (fd - fa); 202 real q33 = (d32 - q22) * fa / (fe - fa); 203 c = a + (q31 + q32 + q33); 204 if (tsm.isnan(c) || (c <= a) || (c >= b)) { 205 // DAC: If the interpolation predicts a or b, it's 206 // probable that it's the actual root. Only allow this if 207 // we're already close to the root. 208 if (c == a && a - b != a) { 209 c = nextUp(a); 210 } 211 else if (c == b && a - b != -b) { 212 c = nextDown(b); 213 } else { 214 ok = false; 215 } 216 } 217 } 218 if (!ok) { 219 c = newtonQuadratic(distinct ? 3 : 2); 220 if(tsm.isnan(c) || (c <= a) || (c >= b)) { 221 // Failure, try a secant step: 222 c = secant_interpolate(a, b, fa, fb); 223 } 224 } 225 ++itnum; 226 e = d; 227 fe = fd; 228 bracket(c); 229 if((fa == 0) || tolerance(BracketResult!(T,R)(a, b, fa, fb))) 230 break whileloop; 231 if (itnum == 2) 232 continue whileloop; 233 } 234 // Now we take a double-length secant step: 235 T u; 236 R fu; 237 if(fabs(fa) < fabs(fb)) { 238 u = a; 239 fu = fa; 240 } else { 241 u = b; 242 fu = fb; 243 } 244 c = u - 2 * (fu / (fb - fa)) * (b - a); 245 // DAC: If the secant predicts a value equal to an endpoint, it's 246 // probably false. 247 if(c==a || c==b || tsm.isnan(c) || fabs(c - u) > (b - a) / 2) { 248 if ((a-b) == a || (b-a) == b) { 249 if ( (a>0 && b<0) || (a<0 && b>0) ) c = 0; 250 else { 251 if (a==0) c = ieeeMean(copysign(0.0L, b), b); 252 else if (b==0) c = ieeeMean(copysign(0.0L, a), a); 253 else c = ieeeMean(a, b); 254 } 255 } else { 256 c = a + (b - a) / 2; 257 } 258 } 259 e = d; 260 fe = fd; 261 bracket(c); 262 if((fa == 0) || tolerance(BracketResult!(T,R)(a, b, fa, fb))) 263 break; 264 265 // We must ensure that the bounds reduce by a factor of 2 266 // (DAC: in binary space!) every iteration. If we haven't achieved this 267 // yet (DAC: or if we don't yet know what the exponent is), 268 // perform a binary chop. 269 270 if( (a==0 || b==0 || 271 (fabs(a) >= 0.5 * fabs(b) && fabs(b) >= 0.5 * fabs(a))) 272 && (b - a) < 0.25 * (b0 - a0)) { 273 baditer = 1; 274 continue; 275 } 276 // DAC: If this happens on consecutive iterations, we probably have a 277 // pathological function. Perform a number of bisections equal to the 278 // total number of consecutive bad iterations. 279 280 if ((b - a) < 0.25 * (b0 - a0)) baditer=1; 281 for (int QQ = 0; QQ < baditer ;++QQ) { 282 e = d; 283 fe = fd; 284 285 T w; 286 if ((a>0 && b<0) ||(a<0 && b>0)) w = 0; 287 else { 288 T usea = a; 289 T useb = b; 290 if (a == 0) usea = copysign(0.0L, b); 291 else if (b == 0) useb = copysign(0.0L, a); 292 w = ieeeMean(usea, useb); 293 } 294 bracket(w); 295 } 296 ++baditer; 297 } 298 299 if (fa == 0) return BracketResult!(T, R)(a, a, fa, fa); 300 else if (fb == 0) return BracketResult!(T, R)(b, b, fb, fb); 301 else return BracketResult!(T, R)(a, b, fa, fb); 302 } 303 304 public: 305 /** 306 * Find the minimum value of the function func(). 307 * 308 * Returns the value of x such that func(x) is minimised. Uses Brent's method, 309 * which uses a parabolic fit to rapidly approach the minimum but reverts to a 310 * Golden Section search where necessary. 311 * 312 * The minimum is located to an accuracy of feqrel(min, truemin) < 313 * real.mant_dig/2. 314 * 315 * Parameters: 316 * func The function to be minimized 317 * xinitial Initial guess to be used. 318 * xlo, xhi Upper and lower bounds on x. 319 * func(xinitial) <= func(x1) and func(xinitial) <= func(x2) 320 * funcMin The minimum value of func(x). 321 */ 322 T findMinimum(T,R)(scope R delegate(T) func, T xlo, T xhi, T xinitial, 323 out R funcMin) 324 { 325 verify(xlo <= xhi); 326 verify(xinitial >= xlo); 327 verify(xinitial <= xhi); 328 verify(func(xinitial) <= func(xlo) && func(xinitial) <= func(xhi)); 329 330 // Based on the original Algol code by R.P. Brent. 331 static immutable real GOLDENRATIO = 0.3819660112501051; // (3 - sqrt(5))/2 = 1 - 1/phi 332 333 T stepBeforeLast = 0.0; 334 T lastStep; 335 T bestx = xinitial; // the best value so far (min value for f(x)). 336 R fbest = func(bestx); 337 T second = xinitial; // the point with the second best value of f(x) 338 R fsecond = fbest; 339 T third = xinitial; // the previous value of second. 340 R fthird = fbest; 341 int numiter = 0; 342 for (;;) { 343 ++numiter; 344 T xmid = 0.5 * (xlo + xhi); 345 static immutable real SQRTEPSILON = 3e-10L; // sqrt(real.epsilon) 346 T tol1 = SQRTEPSILON * fabs(bestx); 347 T tol2 = 2.0 * tol1; 348 if (fabs(bestx - xmid) <= (tol2 - 0.5*(xhi - xlo)) ) { 349 funcMin = fbest; 350 return bestx; 351 } 352 if (fabs(stepBeforeLast) > tol1) { 353 // trial parabolic fit 354 real r = (bestx - second) * (fbest - fthird); 355 // DAC: This can be infinite, in which case lastStep will be NaN. 356 real denom = (bestx - third) * (fbest - fsecond); 357 real numerator = (bestx - third) * denom - (bestx - second) * r; 358 denom = 2.0 * (denom-r); 359 if ( denom > 0) numerator = -numerator; 360 denom = fabs(denom); 361 // is the parabolic fit good enough? 362 // it must be a step that is less than half the movement 363 // of the step before last, AND it must fall 364 // into the bounding interval [xlo,xhi]. 365 if (fabs(numerator) >= fabs(0.5 * denom * stepBeforeLast) 366 || numerator <= denom*(xlo-bestx) 367 || numerator >= denom*(xhi-bestx)) { 368 // No, use a golden section search instead. 369 // Step into the larger of the two segments. 370 stepBeforeLast = (bestx >= xmid) ? xlo - bestx : xhi - bestx; 371 lastStep = GOLDENRATIO * stepBeforeLast; 372 } else { 373 // parabola is OK 374 stepBeforeLast = lastStep; 375 lastStep = numerator/denom; 376 real xtest = bestx + lastStep; 377 if (xtest-xlo < tol2 || xhi-xtest < tol2) { 378 if (xmid-bestx > 0) 379 lastStep = tol1; 380 else lastStep = -tol1; 381 } 382 } 383 } else { 384 // Use a golden section search instead 385 stepBeforeLast = bestx >= xmid ? xlo - bestx : xhi - bestx; 386 lastStep = GOLDENRATIO * stepBeforeLast; 387 } 388 T xtest; 389 if (fabs(lastStep) < tol1 || isNaN(lastStep)) { 390 if (lastStep > 0) lastStep = tol1; 391 else lastStep = - tol1; 392 } 393 xtest = bestx + lastStep; 394 // Evaluate the function at point xtest. 395 R ftest = func(xtest); 396 397 if (ftest <= fbest) { 398 // We have a new best point! 399 // The previous best point becomes a limit. 400 if (xtest >= bestx) xlo = bestx; else xhi = bestx; 401 third = second; fthird = fsecond; 402 second = bestx; fsecond = fbest; 403 bestx = xtest; fbest = ftest; 404 } else { 405 // This new point is now one of the limits. 406 if (xtest < bestx) xlo = xtest; else xhi = xtest; 407 // Is it a new second best point? 408 if (ftest < fsecond || second == bestx) { 409 third = second; fthird = fsecond; 410 second = xtest; fsecond = ftest; 411 } else if (ftest <= fthird || third == bestx || third == second) { 412 // At least it's our third best point! 413 third = xtest; fthird = ftest; 414 } 415 } 416 } 417 } 418 419 unittest { 420 421 int numProblems = 0; 422 int numCalls; 423 424 void testFindRoot(scope real delegate(real) f, real x1, real x2) { 425 numCalls=0; 426 ++numProblems; 427 test(!isNaN(x1) && !isNaN(x2)); 428 auto result = findRoot(f, x1, x2, f(x1), f(x2), 429 (BracketResult!(real, real) r){ return r.xhi==nextUp(r.xlo); }); 430 431 auto flo = f(result.xlo); 432 auto fhi = f(result.xhi); 433 if (flo!=0) { 434 test(oppositeSigns(flo, fhi)); 435 } 436 } 437 438 // Test functions 439 real cubicfn (real x) { 440 ++numCalls; 441 if (x>float.max) x = float.max; 442 if (x<-double.max) x = -double.max; 443 // This has a single real root at -59.286543284815 444 return 0.386*x*x*x + 23*x*x + 15.7*x + 525.2; 445 } 446 // Test a function with more than one root. 447 real multisine(real x) { ++numCalls; return sin(x); } 448 testFindRoot( &multisine, 6, 90); 449 testFindRoot(&cubicfn, -100, 100); 450 testFindRoot( &cubicfn, -double.max, real.max); 451 452 453 /* Tests from the paper: 454 * "On Enclosing Simple Roots of Nonlinear Equations", G. Alefeld, F.A. Potra, 455 * Yixun Shi, Mathematics of Computation 61, pp733-744 (1993). 456 */ 457 // Parameters common to many alefeld tests. 458 int n; 459 real ale_a, ale_b; 460 461 int powercalls = 0; 462 463 real power(real x) { 464 ++powercalls; 465 ++numCalls; 466 return pow(x, n) + double.min_normal; 467 } 468 int [] power_nvals = [3, 5, 7, 9, 19, 25]; 469 // Alefeld paper states that pow(x,n) is a very poor case, where bisection 470 // outperforms his method, and gives total numcalls = 471 // 921 for bisection (2.4 calls per bit), 1830 for Alefeld (4.76/bit), 472 // 2624 for brent (6.8/bit) 473 // ... but that is for double, not real80. 474 // This poor performance seems mainly due to catastrophic cancellation, 475 // which is avoided here by the use of ieeeMean(). 476 // I get: 231 (0.48/bit). 477 // IE this is 10X faster in Alefeld's worst case 478 numProblems=0; 479 foreach(k; power_nvals) { 480 n = k; 481 testFindRoot(&power, -1, 10); 482 } 483 484 int powerProblems = numProblems; 485 486 // Tests from Alefeld paper 487 488 int [9] alefeldSums; 489 real alefeld0(real x){ 490 ++alefeldSums[0]; 491 ++numCalls; 492 real q = sin(x) - x/2; 493 for (int i=1; i<20; ++i) 494 q+=(2*i-5.0)*(2*i-5.0)/((x-i*i)*(x-i*i)*(x-i*i)); 495 return q; 496 } 497 real alefeld1(real x) { 498 ++numCalls; 499 ++alefeldSums[1]; 500 return ale_a*x + exp(ale_b * x); 501 } 502 real alefeld2(real x) { 503 ++numCalls; 504 ++alefeldSums[2]; 505 return pow(x, n) - ale_a; 506 } 507 real alefeld3(real x) { 508 ++numCalls; 509 ++alefeldSums[3]; 510 return (1.0 +pow(1.0L-n, 2))*x - pow(1.0L-n*x, 2); 511 } 512 real alefeld4(real x) { 513 ++numCalls; 514 ++alefeldSums[4]; 515 return x*x - pow(1-x, n); 516 } 517 518 real alefeld5(real x) { 519 ++numCalls; 520 ++alefeldSums[5]; 521 return (1+pow(1.0L-n, 4))*x - pow(1.0L-n*x, 4); 522 } 523 524 real alefeld6(real x) { 525 ++numCalls; 526 ++alefeldSums[6]; 527 return exp(-n*x)*(x-1.01L) + pow(x, n); 528 } 529 530 real alefeld7(real x) { 531 ++numCalls; 532 ++alefeldSums[7]; 533 return (n*x-1)/((n-1)*x); 534 } 535 numProblems=0; 536 testFindRoot(&alefeld0, PI_2, PI); 537 for (n=1; n<=10; ++n) { 538 testFindRoot(&alefeld0, n*n+1e-9L, (n+1)*(n+1)-1e-9L); 539 } 540 ale_a = -40; ale_b = -1; 541 testFindRoot(&alefeld1, -9, 31); 542 ale_a = -100; ale_b = -2; 543 testFindRoot(&alefeld1, -9, 31); 544 ale_a = -200; ale_b = -3; 545 testFindRoot(&alefeld1, -9, 31); 546 int [] nvals_3 = [1, 2, 5, 10, 15, 20]; 547 int [] nvals_5 = [1, 2, 4, 5, 8, 15, 20]; 548 int [] nvals_6 = [1, 5, 10, 15, 20]; 549 int [] nvals_7 = [2, 5, 15, 20]; 550 551 for(int i=4; i<12; i+=2) { 552 n = i; 553 ale_a = 0.2; 554 testFindRoot(&alefeld2, 0, 5); 555 ale_a=1; 556 testFindRoot(&alefeld2, 0.95, 4.05); 557 testFindRoot(&alefeld2, 0, 1.5); 558 } 559 foreach(i; nvals_3) { 560 n=i; 561 testFindRoot(&alefeld3, 0, 1); 562 } 563 foreach(i; nvals_3) { 564 n=i; 565 testFindRoot(&alefeld4, 0, 1); 566 } 567 foreach(i; nvals_5) { 568 n=i; 569 testFindRoot(&alefeld5, 0, 1); 570 } 571 foreach(i; nvals_6) { 572 n=i; 573 testFindRoot(&alefeld6, 0, 1); 574 } 575 foreach(i; nvals_7) { 576 n=i; 577 testFindRoot(&alefeld7, 0.01L, 1); 578 } 579 real worstcase(real x) { ++numCalls; 580 return x<0.3*real.max? -0.999e-3 : 1.0; 581 } 582 testFindRoot(&worstcase, -real.max, real.max); 583 584 /* 585 int grandtotal=0; 586 foreach(calls; alefeldSums) { 587 grandtotal+=calls; 588 } 589 grandtotal-=2*numProblems; 590 printf("\nALEFELD TOTAL = %d avg = %f (alefeld avg=19.3 for double)\n", 591 grandtotal, (1.0*grandtotal)/numProblems); 592 powercalls -= 2*powerProblems; 593 printf("POWER TOTAL = %d avg = %f ", powercalls, 594 (1.0*powercalls)/powerProblems); 595 */ 596 } 597 598 unittest { 599 int numcalls=-4; 600 // Extremely well-behaved function. 601 real parab(real bestx) { 602 ++numcalls; 603 return 3 * (bestx-7.14L) * (bestx-7.14L) + 18; 604 } 605 real minval; 606 real minx; 607 // Note, performs extremely poorly if we have an overflow, so that the 608 // function returns infinity. It might be better to explicitly deal with 609 // that situation (all parabolic fits will fail whenever an infinity is 610 // present). 611 minx = findMinimum(¶b, -sqrt(real.max), sqrt(real.max), 612 cast(real)(float.max), minval); 613 test(minval==18); 614 test(feqrel(minx,7.14L)>=float.mant_dig); 615 616 // Problems from Jack Crenshaw's "World's Best Root Finder" 617 // http://www.embedded.com/columns/programmerstoolbox/9900609 618 // This has a minimum of cbrt(0.5). 619 real crenshawcos(real x) { return cos(2*PI*x*x*x); } 620 minx = findMinimum(&crenshawcos, 0.0L, 1.0L, 0.1L, minval); 621 test(feqrel(minx*minx*minx, 0.5L)<=real.mant_dig-4); 622 623 }