1 /** Algorithms for finding roots and extrema of one-argument real functions
2  * using bracketing.
3  *
4  * Copyright:
5  *     Copyright (C) 2008 Don Clugston.
6  *     Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
7  *     All rights reserved.
8  *
9  * License:
10  *     Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
11  *     See LICENSE_TANGO.txt for details.
12  *
13  * Authors: Don Clugston.
14  *
15  */
16 module ocean.math.Bracket;
17 import ocean.meta.types.Qualifiers;
18 
19 static import tsm = core.stdc.math;
20 import ocean.math.Math;
21 import ocean.math.IEEE;
22 import ocean.core.Verify;
23 
24 version (unittest) import ocean.core.Test;
25 
26 private:
27 
28 // return true if a and b have opposite sign
29 bool oppositeSigns(T)(T a, T b)
30 {
31     return (signbit(a) ^ signbit(b))!=0;
32 }
33 
34 // TODO: This should be exposed publically, but needs a better name.
35 struct BracketResult(T, R)
36 {
37     T xlo;
38     T xhi;
39     R fxlo;
40     R fxhi;
41 }
42 
43 public:
44 
45 /**  Find a real root of the real function f(x) via bracketing.
46  *
47  * Given a range [a..b] such that f(a) and f(b) have opposite sign,
48  * returns the value of x in the range which is closest to a root of f(x).
49  * If f(x) has more than one root in the range, one will be chosen arbitrarily.
50  * If f(x) returns $(NAN), $(NAN) will be returned; otherwise, this algorithm
51  * is guaranteed to succeed.
52  *
53  * Uses an algorithm based on TOMS748, which uses inverse cubic interpolation
54  * whenever possible, otherwise reverting to parabolic or secant
55  * interpolation. Compared to TOMS748, this implementation improves worst-case
56  * performance by a factor of more than 100, and typical performance by a factor
57  * of 2. For 80-bit reals, most problems require 8 - 15 calls to f(x) to achieve
58  * full machine precision. The worst-case performance (pathological cases) is
59  * approximately twice the number of bits.
60  *
61  * References:
62  * "On Enclosing Simple Roots of Nonlinear Equations", G. Alefeld, F.A. Potra,
63  *   Yixun Shi, Mathematics of Computation 61, pp733-744 (1993).
64  *   Fortran code available from www.netlib.org as algorithm TOMS478.
65  *
66  */
67 T findRoot(T, R)(scope R delegate(T) f, T ax, T bx)
68 {
69     auto r = findRoot(f, ax, bx, f(ax), f(bx), (BracketResult!(T,R) r){
70          return r.xhi==nextUp(r.xlo); });
71     return fabs(r.fxlo)<=fabs(r.fxhi) ? r.xlo : r.xhi;
72 }
73 
74 private:
75 
76 /** Find root by bracketing, allowing termination condition to be specified
77  *
78  * Params:
79  * tolerance   Defines the termination condition. Return true when acceptable
80  *             bounds have been obtained.
81  */
82 BracketResult!(T, R) findRoot(T,R)(scope R delegate(T) f, T ax, T bx, R fax, R fbx,
83     scope bool delegate(BracketResult!(T,R) r) tolerance)
84 {
85     verify(ax<=bx, "Parameters ax and bx out of order.");
86     verify(!tsm.isnan(ax) && !tsm.isnan(bx), "Limits must not be NaN");
87     verify(oppositeSigns(fax,fbx), "Parameters must bracket the root.");
88 
89 // This code is (heavily) modified from TOMS748 (www.netlib.org). Some ideas
90 // were borrowed from the Boost Mathematics Library.
91 
92     T a = ax, b = bx, d;  // [a..b] is our current bracket.
93     R fa = fax, fb = fbx, fd; // d is the third best guess.
94 
95     // Test the function at point c; update brackets accordingly
96     void bracket(T c)
97     {
98         T fc = f(c);
99         if (fc == 0) { // Exact solution
100             a = c;
101             fa = fc;
102             d = c;
103             fd = fc;
104             return;
105         }
106         // Determine new enclosing interval
107         if (oppositeSigns(fa, fc)) {
108             d = b;
109             fd = fb;
110             b = c;
111             fb = fc;
112         } else {
113             d = a;
114             fd = fa;
115             a = c;
116             fa = fc;
117         }
118     }
119 
120    /* Perform a secant interpolation. If the result would lie on a or b, or if
121      a and b differ so wildly in magnitude that the result would be meaningless,
122      perform a bisection instead.
123     */
124     T secant_interpolate(T a, T b, T fa, T fb)
125     {
126         if (( ((a - b) == a) && b!=0) || (a!=0 && ((b - a) == b))) {
127             // Catastrophic cancellation
128             if (a == 0) a = copysign(0.0L, b);
129             else if (b == 0) b = copysign(0.0L, a);
130             else if (oppositeSigns(a, b)) return 0;
131             T c = ieeeMean(a, b);
132             return c;
133         }
134        // avoid overflow
135        if (b - a > T.max)    return b / 2.0 + a / 2.0;
136        if (fb - fa > T.max)  return a - (b - a) / 2;
137        T c = a - (fa / (fb - fa)) * (b - a);
138        if (c == a || c == b) return (a + b) / 2;
139        return c;
140     }
141 
142     /* Uses 'numsteps' newton steps to approximate the zero in [a..b] of the
143        quadratic polynomial interpolating f(x) at a, b, and d.
144        Returns:
145          The approximate zero in [a..b] of the quadratic polynomial.
146     */
147     T newtonQuadratic(int numsteps)
148     {
149         // Find the coefficients of the quadratic polynomial.
150         T a0 = fa;
151         T a1 = (fb - fa)/(b - a);
152         T a2 = ((fd - fb)/(d - b) - a1)/(d - a);
153 
154         // Determine the starting point of newton steps.
155         T c = oppositeSigns(a2, fa) ? a  : b;
156 
157         // start the safeguarded newton steps.
158         for (int i = 0; i<numsteps; ++i) {
159             T pc = a0 + (a1 + a2 * (c - b))*(c - a);
160             T pdc = a1 + a2*((2.0 * c) - (a + b));
161             if (pdc == 0) return a - a0 / a1;
162             else c = c - pc / pdc;
163         }
164         return c;
165     }
166 
167     // On the first iteration we take a secant step:
168     if(fa != 0) {
169         bracket(secant_interpolate(a, b, fa, fb));
170     }
171     // Starting with the second iteration, higher-order interpolation can
172     // be used.
173     int itnum = 1;   // Iteration number
174     int baditer = 1; // Num bisections to take if an iteration is bad.
175     T c, e;  // e is our fourth best guess
176     R fe;
177 whileloop:
178     while((fa != 0) && !tolerance(BracketResult!(T,R)(a, b, fa, fb))) {
179         T a0 = a, b0 = b; // record the brackets
180 
181         // Do two higher-order (cubic or parabolic) interpolation steps.
182         for (int QQ = 0; QQ < 2; ++QQ) {
183             // Cubic inverse interpolation requires that
184             // all four function values fa, fb, fd, and fe are distinct;
185             // otherwise use quadratic interpolation.
186             bool distinct = (fa != fb) && (fa != fd) && (fa != fe)
187                          && (fb != fd) && (fb != fe) && (fd != fe);
188             // The first time, cubic interpolation is impossible.
189             if (itnum<2) distinct = false;
190             bool ok = distinct;
191             if (distinct) {
192                 // Cubic inverse interpolation of f(x) at a, b, d, and e
193                 real q11 = (d - e) * fd / (fe - fd);
194                 real q21 = (b - d) * fb / (fd - fb);
195                 real q31 = (a - b) * fa / (fb - fa);
196                 real d21 = (b - d) * fd / (fd - fb);
197                 real d31 = (a - b) * fb / (fb - fa);
198 
199                 real q22 = (d21 - q11) * fb / (fe - fb);
200                 real q32 = (d31 - q21) * fa / (fd - fa);
201                 real d32 = (d31 - q21) * fd / (fd - fa);
202                 real q33 = (d32 - q22) * fa / (fe - fa);
203                 c = a + (q31 + q32 + q33);
204                 if (tsm.isnan(c) || (c <= a) || (c >= b)) {
205                     // DAC: If the interpolation predicts a or b, it's
206                     // probable that it's the actual root. Only allow this if
207                     // we're already close to the root.
208                     if (c == a && a - b != a) {
209                         c = nextUp(a);
210                     }
211                     else if (c == b && a - b != -b) {
212                         c = nextDown(b);
213                     } else {
214                         ok = false;
215                     }
216                 }
217             }
218             if (!ok) {
219                c = newtonQuadratic(distinct ? 3 : 2);
220                if(tsm.isnan(c) || (c <= a) || (c >= b)) {
221                   // Failure, try a secant step:
222                   c = secant_interpolate(a, b, fa, fb);
223                }
224             }
225             ++itnum;
226             e = d;
227             fe = fd;
228             bracket(c);
229             if((fa == 0) || tolerance(BracketResult!(T,R)(a, b, fa, fb)))
230                 break whileloop;
231             if (itnum == 2)
232                 continue whileloop;
233         }
234         // Now we take a double-length secant step:
235         T u;
236         R fu;
237         if(fabs(fa) < fabs(fb)) {
238              u = a;
239              fu = fa;
240         } else {
241              u = b;
242              fu = fb;
243         }
244         c = u - 2 * (fu / (fb - fa)) * (b - a);
245         // DAC: If the secant predicts a value equal to an endpoint, it's
246         // probably false.
247         if(c==a || c==b || tsm.isnan(c) || fabs(c - u) > (b - a) / 2) {
248             if ((a-b) == a || (b-a) == b) {
249                 if ( (a>0 && b<0) || (a<0 && b>0) ) c = 0;
250                 else {
251                    if (a==0) c = ieeeMean(copysign(0.0L, b), b);
252                    else if (b==0) c = ieeeMean(copysign(0.0L, a), a);
253                    else c = ieeeMean(a, b);
254                 }
255             } else {
256                 c = a + (b - a) / 2;
257             }
258         }
259         e = d;
260         fe = fd;
261         bracket(c);
262         if((fa == 0) || tolerance(BracketResult!(T,R)(a, b, fa, fb)))
263             break;
264 
265         // We must ensure that the bounds reduce by a factor of 2
266         // (DAC: in binary space!) every iteration. If we haven't achieved this
267         // yet (DAC: or if we don't yet know what the exponent is),
268         // perform a binary chop.
269 
270         if( (a==0 || b==0 ||
271             (fabs(a) >= 0.5 * fabs(b) && fabs(b) >= 0.5 * fabs(a)))
272             &&  (b - a) < 0.25 * (b0 - a0))  {
273                 baditer = 1;
274                 continue;
275             }
276         // DAC: If this happens on consecutive iterations, we probably have a
277         // pathological function. Perform a number of bisections equal to the
278         // total number of consecutive bad iterations.
279 
280         if ((b - a) < 0.25 * (b0 - a0)) baditer=1;
281         for (int QQ = 0; QQ < baditer ;++QQ) {
282             e = d;
283             fe = fd;
284 
285             T w;
286             if ((a>0 && b<0) ||(a<0 && b>0)) w = 0;
287             else {
288                 T usea = a;
289                 T useb = b;
290                 if (a == 0) usea = copysign(0.0L, b);
291                 else if (b == 0) useb = copysign(0.0L, a);
292                 w = ieeeMean(usea, useb);
293             }
294             bracket(w);
295         }
296         ++baditer;
297     }
298 
299     if (fa == 0) return BracketResult!(T, R)(a, a, fa, fa);
300     else if (fb == 0) return BracketResult!(T, R)(b, b, fb, fb);
301     else return BracketResult!(T, R)(a, b, fa, fb);
302 }
303 
304 public:
305 /**
306  * Find the minimum value of the function func().
307  *
308  * Returns the value of x such that func(x) is minimised. Uses Brent's method,
309  * which uses a parabolic fit to rapidly approach the minimum but reverts to a
310  * Golden Section search where necessary.
311  *
312  * The minimum is located to an accuracy of feqrel(min, truemin) <
313  * real.mant_dig/2.
314  *
315  * Parameters:
316  *     func         The function to be minimized
317  *     xinitial     Initial guess to be used.
318  *     xlo, xhi     Upper and lower bounds on x.
319  *                  func(xinitial) <= func(x1) and func(xinitial) <= func(x2)
320  *     funcMin      The minimum value of func(x).
321  */
322 T findMinimum(T,R)(scope R delegate(T) func, T xlo, T xhi, T xinitial,
323      out R funcMin)
324 {
325     verify(xlo <= xhi);
326     verify(xinitial >= xlo);
327     verify(xinitial <= xhi);
328     verify(func(xinitial) <= func(xlo) && func(xinitial) <= func(xhi));
329 
330     // Based on the original Algol code by R.P. Brent.
331     static immutable real GOLDENRATIO = 0.3819660112501051; // (3 - sqrt(5))/2 = 1 - 1/phi
332 
333     T stepBeforeLast = 0.0;
334     T lastStep;
335     T bestx = xinitial; // the best value so far (min value for f(x)).
336     R fbest = func(bestx);
337     T second = xinitial;  // the point with the second best value of f(x)
338     R fsecond = fbest;
339     T third = xinitial;  // the previous value of second.
340     R fthird = fbest;
341     int numiter = 0;
342     for (;;) {
343         ++numiter;
344         T xmid = 0.5 * (xlo + xhi);
345         static immutable real SQRTEPSILON = 3e-10L; // sqrt(real.epsilon)
346         T tol1 = SQRTEPSILON * fabs(bestx);
347         T tol2 = 2.0 * tol1;
348         if (fabs(bestx - xmid) <= (tol2 - 0.5*(xhi - xlo)) ) {
349             funcMin = fbest;
350             return bestx;
351         }
352         if (fabs(stepBeforeLast) > tol1) {
353             // trial parabolic fit
354             real r = (bestx - second) * (fbest - fthird);
355             // DAC: This can be infinite, in which case lastStep will be NaN.
356             real denom = (bestx - third) * (fbest - fsecond);
357             real numerator = (bestx - third) * denom - (bestx - second) * r;
358             denom = 2.0 * (denom-r);
359             if ( denom > 0) numerator = -numerator;
360             denom = fabs(denom);
361             // is the parabolic fit good enough?
362             // it must be a step that is less than half the movement
363             // of the step before last, AND it must fall
364             // into the bounding interval [xlo,xhi].
365             if (fabs(numerator) >= fabs(0.5 * denom * stepBeforeLast)
366                 || numerator <= denom*(xlo-bestx)
367                 || numerator >= denom*(xhi-bestx)) {
368                 // No, use a golden section search instead.
369                 // Step into the larger of the two segments.
370                 stepBeforeLast = (bestx >= xmid) ? xlo - bestx : xhi - bestx;
371                 lastStep = GOLDENRATIO * stepBeforeLast;
372             } else {
373                 // parabola is OK
374                 stepBeforeLast = lastStep;
375                 lastStep = numerator/denom;
376                 real xtest = bestx + lastStep;
377                 if (xtest-xlo < tol2 || xhi-xtest < tol2) {
378                     if (xmid-bestx > 0)
379                         lastStep = tol1;
380                     else lastStep = -tol1;
381                 }
382             }
383         } else {
384             // Use a golden section search instead
385             stepBeforeLast = bestx >= xmid ? xlo - bestx : xhi - bestx;
386             lastStep = GOLDENRATIO * stepBeforeLast;
387         }
388         T xtest;
389         if (fabs(lastStep) < tol1 || isNaN(lastStep)) {
390             if (lastStep > 0) lastStep = tol1;
391             else lastStep = - tol1;
392         }
393         xtest = bestx + lastStep;
394         // Evaluate the function at point xtest.
395         R ftest = func(xtest);
396 
397         if (ftest <= fbest) {
398             // We have a new best point!
399             // The previous best point becomes a limit.
400             if (xtest >= bestx) xlo = bestx; else xhi = bestx;
401             third = second;  fthird = fsecond;
402             second = bestx;  fsecond = fbest;
403             bestx = xtest;  fbest = ftest;
404         } else {
405             // This new point is now one of the limits.
406             if (xtest < bestx)  xlo = xtest; else xhi = xtest;
407             // Is it a new second best point?
408             if (ftest < fsecond || second == bestx) {
409                 third = second;  fthird = fsecond;
410                 second = xtest;  fsecond = ftest;
411             } else if (ftest <= fthird || third == bestx || third == second) {
412                 // At least it's our third best point!
413                 third = xtest;  fthird = ftest;
414             }
415         }
416     }
417 }
418 
419 unittest {
420 
421     int numProblems = 0;
422     int numCalls;
423 
424     void testFindRoot(scope real delegate(real) f, real x1, real x2) {
425         numCalls=0;
426         ++numProblems;
427         test(!isNaN(x1) && !isNaN(x2));
428         auto result = findRoot(f, x1, x2, f(x1), f(x2),
429             (BracketResult!(real, real) r){ return r.xhi==nextUp(r.xlo); });
430 
431         auto flo = f(result.xlo);
432         auto fhi = f(result.xhi);
433         if (flo!=0) {
434             test(oppositeSigns(flo, fhi));
435         }
436     }
437 
438     // Test functions
439     real cubicfn (real x) {
440        ++numCalls;
441        if (x>float.max) x = float.max;
442        if (x<-double.max) x = -double.max;
443        // This has a single real root at -59.286543284815
444        return 0.386*x*x*x + 23*x*x + 15.7*x + 525.2;
445     }
446     // Test a function with more than one root.
447     real multisine(real x) { ++numCalls; return sin(x); }
448     testFindRoot( &multisine, 6, 90);
449     testFindRoot(&cubicfn, -100, 100);
450     testFindRoot( &cubicfn, -double.max, real.max);
451 
452 
453 /* Tests from the paper:
454  * "On Enclosing Simple Roots of Nonlinear Equations", G. Alefeld, F.A. Potra,
455  *   Yixun Shi, Mathematics of Computation 61, pp733-744 (1993).
456  */
457     // Parameters common to many alefeld tests.
458     int n;
459     real ale_a, ale_b;
460 
461     int powercalls = 0;
462 
463     real power(real x) {
464         ++powercalls;
465         ++numCalls;
466         return pow(x, n) + double.min_normal;
467     }
468     int [] power_nvals = [3, 5, 7, 9, 19, 25];
469     // Alefeld paper states that pow(x,n) is a very poor case, where bisection
470     // outperforms his method, and gives total numcalls =
471     // 921 for bisection (2.4 calls per bit), 1830 for Alefeld (4.76/bit),
472     // 2624 for brent (6.8/bit)
473     // ... but that is for double, not real80.
474     // This poor performance seems mainly due to catastrophic cancellation,
475     // which is avoided here by the use of ieeeMean().
476     // I get: 231 (0.48/bit).
477     // IE this is 10X faster in Alefeld's worst case
478     numProblems=0;
479     foreach(k; power_nvals) {
480         n = k;
481         testFindRoot(&power, -1, 10);
482     }
483 
484     int powerProblems = numProblems;
485 
486     // Tests from Alefeld paper
487 
488     int [9] alefeldSums;
489     real alefeld0(real x){
490         ++alefeldSums[0];
491         ++numCalls;
492         real q =  sin(x) - x/2;
493         for (int i=1; i<20; ++i)
494             q+=(2*i-5.0)*(2*i-5.0)/((x-i*i)*(x-i*i)*(x-i*i));
495         return q;
496     }
497    real alefeld1(real x) {
498         ++numCalls;
499        ++alefeldSums[1];
500        return ale_a*x + exp(ale_b * x);
501    }
502    real alefeld2(real x) {
503         ++numCalls;
504        ++alefeldSums[2];
505        return pow(x, n) - ale_a;
506    }
507    real alefeld3(real x) {
508         ++numCalls;
509        ++alefeldSums[3];
510        return (1.0 +pow(1.0L-n, 2))*x - pow(1.0L-n*x, 2);
511    }
512    real alefeld4(real x) {
513         ++numCalls;
514        ++alefeldSums[4];
515        return x*x - pow(1-x, n);
516    }
517 
518    real alefeld5(real x) {
519         ++numCalls;
520        ++alefeldSums[5];
521        return (1+pow(1.0L-n, 4))*x - pow(1.0L-n*x, 4);
522    }
523 
524    real alefeld6(real x) {
525         ++numCalls;
526        ++alefeldSums[6];
527        return exp(-n*x)*(x-1.01L) + pow(x, n);
528    }
529 
530    real alefeld7(real x) {
531         ++numCalls;
532        ++alefeldSums[7];
533        return (n*x-1)/((n-1)*x);
534    }
535    numProblems=0;
536    testFindRoot(&alefeld0, PI_2, PI);
537    for (n=1; n<=10; ++n) {
538     testFindRoot(&alefeld0, n*n+1e-9L, (n+1)*(n+1)-1e-9L);
539    }
540    ale_a = -40; ale_b = -1;
541    testFindRoot(&alefeld1, -9, 31);
542    ale_a = -100; ale_b = -2;
543    testFindRoot(&alefeld1, -9, 31);
544    ale_a = -200; ale_b = -3;
545    testFindRoot(&alefeld1, -9, 31);
546    int [] nvals_3 = [1, 2, 5, 10, 15, 20];
547    int [] nvals_5 = [1, 2, 4, 5, 8, 15, 20];
548    int [] nvals_6 = [1, 5, 10, 15, 20];
549    int [] nvals_7 = [2, 5, 15, 20];
550 
551     for(int i=4; i<12; i+=2) {
552        n = i;
553        ale_a = 0.2;
554        testFindRoot(&alefeld2, 0, 5);
555        ale_a=1;
556        testFindRoot(&alefeld2, 0.95, 4.05);
557        testFindRoot(&alefeld2, 0, 1.5);
558     }
559     foreach(i; nvals_3) {
560         n=i;
561         testFindRoot(&alefeld3, 0, 1);
562     }
563     foreach(i; nvals_3) {
564         n=i;
565         testFindRoot(&alefeld4, 0, 1);
566     }
567     foreach(i; nvals_5) {
568         n=i;
569         testFindRoot(&alefeld5, 0, 1);
570     }
571     foreach(i; nvals_6) {
572         n=i;
573         testFindRoot(&alefeld6, 0, 1);
574     }
575     foreach(i; nvals_7) {
576         n=i;
577         testFindRoot(&alefeld7, 0.01L, 1);
578     }
579     real worstcase(real x) { ++numCalls;
580         return x<0.3*real.max? -0.999e-3 : 1.0;
581     }
582     testFindRoot(&worstcase, -real.max, real.max);
583 
584 /*
585    int grandtotal=0;
586    foreach(calls; alefeldSums) {
587        grandtotal+=calls;
588    }
589    grandtotal-=2*numProblems;
590    printf("\nALEFELD TOTAL = %d avg = %f (alefeld avg=19.3 for double)\n",
591    grandtotal, (1.0*grandtotal)/numProblems);
592    powercalls -= 2*powerProblems;
593    printf("POWER TOTAL = %d avg = %f ", powercalls,
594         (1.0*powercalls)/powerProblems);
595 */
596 }
597 
598 unittest {
599     int numcalls=-4;
600     // Extremely well-behaved function.
601     real parab(real bestx) {
602         ++numcalls;
603         return 3 * (bestx-7.14L) * (bestx-7.14L) + 18;
604     }
605     real minval;
606     real minx;
607     // Note, performs extremely poorly if we have an overflow, so that the
608     // function returns infinity. It might be better to explicitly deal with
609     // that situation (all parabolic fits will fail whenever an infinity is
610     // present).
611     minx = findMinimum(&parab, -sqrt(real.max), sqrt(real.max),
612         cast(real)(float.max), minval);
613     test(minval==18);
614     test(feqrel(minx,7.14L)>=float.mant_dig);
615 
616      // Problems from Jack Crenshaw's "World's Best Root Finder"
617     // http://www.embedded.com/columns/programmerstoolbox/9900609
618    // This has a minimum of cbrt(0.5).
619    real crenshawcos(real x) { return cos(2*PI*x*x*x); }
620    minx = findMinimum(&crenshawcos, 0.0L, 1.0L, 0.1L, minval);
621    test(feqrel(minx*minx*minx, 0.5L)<=real.mant_dig-4);
622 
623 }